Ask ML: has anyone tried to reproduce ADASECANT?

here's a python translation of your code (minus the sign flip) that seems to converge:

import numpy as np

warm_up = 10
eps = 1e-5
tau_init = 2.2

def sq(x):
    return x*x

def aver(a, x, tau):
    return a * (1 - 1/tau) + x / tau

def gradient(x):
    return 2*x # np.random.uniform(-0.5,0.5)

def adasecant(x, t_max):
    gamma_numer = 0.
    gamma_denom = 0.
    g_aver = 0.
    g_sq_aver = 0.
    g_prev = 0.
    tau = tau_init
    alpha_aver = 0.
    alpha_sq_aver = 0.
    alpha_delta_aver = 0.
    delta_sq_aver = 0.
    delta_aver = 0.
    delta = 0.

    t = 0
    while t < t_max:
        t += 1
        g = gradient(x)

        alpha = g - g_prev

        if ((sq(g - g_aver) > 4 * (g_sq_aver - sq(g_aver))) or
           (sq(alpha - alpha_aver) > 4 * (alpha_sq_aver - sq(alpha_aver)))):
            tau = tau_init

        gamma_numer = aver(gamma_numer, (g - g_prev) * (g - g_aver), tau)
        gamma_denom = aver(gamma_denom, (g - g_aver) * (g_prev - g_aver), tau)

        gamma = 0
        if (t >= warm_up):
            gamma = gamma_numer / (gamma_denom + eps)

        g_wave = (g + gamma * g_aver) / (1 + gamma)
        g_aver = aver(g_aver, g, tau);

        g_sq_aver = aver(g_sq_aver, sq(g), tau)
        alpha_aver = aver(alpha_aver, alpha, tau)
        alpha_sq_aver = aver(alpha_sq_aver, sq(alpha), tau)
        alpha_delta_aver = aver(alpha_delta_aver, alpha * delta, tau)
        delta_sq_aver = aver(delta_sq_aver, sq(delta), tau)
        delta_aver = aver(delta_aver, delta, tau)

        eta = 1e-3
        if (t >= warm_up):
            eta = np.sqrt(delta_sq_aver / (alpha_sq_aver + eps)) - alpha_delta_aver / (alpha_sq_aver + eps)

        tau = (1 - sq(delta_aver) / (delta_sq_aver + eps)) * tau + 1

        delta = -eta * g_wave

        x += delta

        g_prev = g

        print('{0} {1} {2} {3} {4}'.format(t,tau,gamma,eta,x))

if __name__ == "__main__":
    adasecant(10, 1000)
/r/MachineLearning Thread