here's a python translation of your code (minus the sign flip) that seems to converge:
import numpy as np
warm_up = 10
eps = 1e-5
tau_init = 2.2
def sq(x):
return x*x
def aver(a, x, tau):
return a * (1 - 1/tau) + x / tau
def gradient(x):
return 2*x # np.random.uniform(-0.5,0.5)
def adasecant(x, t_max):
gamma_numer = 0.
gamma_denom = 0.
g_aver = 0.
g_sq_aver = 0.
g_prev = 0.
tau = tau_init
alpha_aver = 0.
alpha_sq_aver = 0.
alpha_delta_aver = 0.
delta_sq_aver = 0.
delta_aver = 0.
delta = 0.
t = 0
while t < t_max:
t += 1
g = gradient(x)
alpha = g - g_prev
if ((sq(g - g_aver) > 4 * (g_sq_aver - sq(g_aver))) or
(sq(alpha - alpha_aver) > 4 * (alpha_sq_aver - sq(alpha_aver)))):
tau = tau_init
gamma_numer = aver(gamma_numer, (g - g_prev) * (g - g_aver), tau)
gamma_denom = aver(gamma_denom, (g - g_aver) * (g_prev - g_aver), tau)
gamma = 0
if (t >= warm_up):
gamma = gamma_numer / (gamma_denom + eps)
g_wave = (g + gamma * g_aver) / (1 + gamma)
g_aver = aver(g_aver, g, tau);
g_sq_aver = aver(g_sq_aver, sq(g), tau)
alpha_aver = aver(alpha_aver, alpha, tau)
alpha_sq_aver = aver(alpha_sq_aver, sq(alpha), tau)
alpha_delta_aver = aver(alpha_delta_aver, alpha * delta, tau)
delta_sq_aver = aver(delta_sq_aver, sq(delta), tau)
delta_aver = aver(delta_aver, delta, tau)
eta = 1e-3
if (t >= warm_up):
eta = np.sqrt(delta_sq_aver / (alpha_sq_aver + eps)) - alpha_delta_aver / (alpha_sq_aver + eps)
tau = (1 - sq(delta_aver) / (delta_sq_aver + eps)) * tau + 1
delta = -eta * g_wave
x += delta
g_prev = g
print('{0} {1} {2} {3} {4}'.format(t,tau,gamma,eta,x))
if __name__ == "__main__":
adasecant(10, 1000)