58 virtual void apply(arma::Cube<DNN_Dtype>& W,
59 arma::Mat<DNN_Dtype>& B,
60 const arma::Cube<DNN_Dtype>& Wgrad,
61 const arma::Mat<DNN_Dtype>& Bgrad ) = 0;
103 lr = lr_0/(1+lr_a*
it);
106 lr = lr_0*std::pow(lr_a,std::floor(it/lr_b));
109 lr = lr_0*std::exp(-lr_a*it);
159 void apply(arma::Cube<DNN_Dtype>& W,
160 arma::Mat<DNN_Dtype>& B,
161 const arma::Cube<DNN_Dtype>& Wgrad,
162 const arma::Mat<DNN_Dtype>& Bgrad )
182 std::snprintf(p,100,
"(%g)",
lr);
197 arma::Cube<DNN_Dtype>
v;
198 arma::Mat<DNN_Dtype>
vB;
225 void apply(arma::Cube<DNN_Dtype>& W,
226 arma::Mat<DNN_Dtype>& B,
227 const arma::Cube<DNN_Dtype>& Wgrad,
228 const arma::Mat<DNN_Dtype>& Bgrad )
232 v.set_size(arma::size(W));
234 vB.set_size(arma::size(B));
244 vB = mom*vB -
lr*Bgrad ;
259 std::snprintf(p,100,
"(%g,%g)",
lr,mom);
274 arma::Cube<DNN_Dtype>
v;
275 arma::Cube<DNN_Dtype>
vp;
276 arma::Mat<DNN_Dtype>
vB;
304 void apply(arma::Cube<DNN_Dtype>& W,
305 arma::Mat<DNN_Dtype>& B,
306 const arma::Cube<DNN_Dtype>& Wgrad,
307 const arma::Mat<DNN_Dtype>& Bgrad )
311 v.set_size(arma::size(W));
313 vp.set_size(arma::size(W));
315 vB.set_size(arma::size(B));
317 vBp.set_size(arma::size(B));
327 W = W - mom*vp + (1+mom)*v;
330 vB = mom*vB -
lr*Bgrad ;
331 B = B -mom*vBp +(1+mom)*vB;
344 std::snprintf(p,100,
"(%g,%g)",
lr,mom);
359 arma::Cube<DNN_Dtype>
v;
360 arma::Cube<DNN_Dtype>
m;
361 arma::Mat<DNN_Dtype>
vB;
362 arma::Mat<DNN_Dtype>
mB;
396 void apply(arma::Cube<DNN_Dtype>& W,
397 arma::Mat<DNN_Dtype>& B,
398 const arma::Cube<DNN_Dtype>& Wgrad,
399 const arma::Mat<DNN_Dtype>& Bgrad )
403 v.set_size(arma::size(W));
405 m.set_size(arma::size(W));
407 vB.set_size(arma::size(B));
409 mB.set_size(arma::size(B));
417 for (arma::uword k=0;k<v.n_elem;k++ )
419 m(k) = beta1*m(k) +(1-beta1)*dw(k);
420 v(k) = beta2*v(k) +(1-beta2)*dw(k)*dw(k);
421 W(k) = W(k) -
lr*(std::sqrt(1-beta2)/(1-beta1))*m(k)/(std::sqrt(v(k))+eps);
423 for (arma::uword k=0;k<vB.n_elem;k++ )
425 mB(k) = beta1*mB(k) +(1-beta1)*Bgrad(k);
426 vB(k) = beta2*vB(k) +(1-beta2)*Bgrad(k)*Bgrad(k);
427 B(k) = B(k) -
lr*(std::sqrt(1-beta2)/(1-beta1))*mB(k)/(std::sqrt(vB(k))+eps);
441 std::snprintf(p,100,
"(%g)",
lr);
456 arma::Cube<DNN_Dtype>
v;
457 arma::Cube<DNN_Dtype>
m;
458 arma::Mat<DNN_Dtype>
vB;
459 arma::Mat<DNN_Dtype>
mB;
493 void apply(arma::Cube<DNN_Dtype>& W,
494 arma::Mat<DNN_Dtype>& B,
495 const arma::Cube<DNN_Dtype>& Wgrad,
496 const arma::Mat<DNN_Dtype>& Bgrad )
500 v.set_size(arma::size(W));
502 m.set_size(arma::size(W));
504 vB.set_size(arma::size(B));
506 mB.set_size(arma::size(B));
514 for (arma::uword k=0;k<v.n_elem;k++ )
516 m(k) = beta1*m(k) +(1-beta1)*dw(k);
517 v(k) = (beta2*v(k)> std::abs(dw(k))) ? beta2*v(k) : std::abs(dw(k));
518 W(k) = W(k) -
lr*(std::sqrt(1-beta2)/(1-beta1))*m(k)/(v(k)+eps);
520 for (arma::uword k=0;k<vB.n_elem;k++ )
522 mB(k) = beta1*mB(k) +(1-beta1)*Bgrad(k);
523 vB(k) = (beta2*vB(k)> std::abs(Bgrad(k))) ? beta2*vB(k) : std::abs(Bgrad(k));
524 B(k) = B(k) -
lr*(std::sqrt(1-beta2)/(1-beta1))*mB(k)/(vB(k)+eps);
539 std::snprintf(p,100,
"(%g)",
lr);
554 arma::Cube<DNN_Dtype>
Ew;
555 arma::Cube<DNN_Dtype>
dW;
556 arma::Mat<DNN_Dtype>
Eb;
557 arma::Mat<DNN_Dtype>
dB;
587 void apply(arma::Cube<DNN_Dtype>& W,
588 arma::Mat<DNN_Dtype>& B,
589 const arma::Cube<DNN_Dtype>& Wgrad,
590 const arma::Mat<DNN_Dtype>& Bgrad )
594 Ew.set_size(arma::size(W));
596 dW.set_size(arma::size(W));
598 Eb.set_size(arma::size(B));
600 dB.set_size(arma::size(B));
608 for (arma::uword k=0 ;k<W.n_elem ; k++ )
610 Ew(k) = rho*Ew(k)+(1-rho)*w(k)*w(k);
612 DNN_Dtype upd =
lr*w(k)*sqrt(dW(k)+eps)/sqrt(Ew(k)+eps);
614 dW(k) = rho*dW(k)+(1-rho)*upd*upd;
616 for (arma::uword k=0 ;k<B.n_elem ; k++ )
618 Eb(k) = rho*Eb(k)+(1-rho)*Bgrad(k)*Bgrad(k);
620 DNN_Dtype upd =
lr*Bgrad(k)*sqrt(dB(k)+eps)/sqrt(Eb(k)+eps);;
622 dB(k) = rho*dB(k)+(1-rho)*upd*upd;
636 std::snprintf(p,100,
"(%g)",rho);
651 arma::Cube<DNN_Dtype>
v;
652 arma::Mat<DNN_Dtype>
vB;
680 void apply(arma::Cube<DNN_Dtype>& W,
681 arma::Mat<DNN_Dtype>& B,
682 const arma::Cube<DNN_Dtype>& Wgrad,
683 const arma::Mat<DNN_Dtype>& Bgrad )
687 v.set_size(arma::size(W));
689 vB.set_size(arma::size(B));
697 for (arma::uword k=0 ;k<W.n_elem ; k++ )
699 v(k) = v(k)+dw(k)*dw(k);
700 W(k) = W(k)-
lr*dw(k)/std::sqrt(v(k)+eps);
702 for (arma::uword k=0 ;k<B.n_elem ; k++ )
704 vB(k) = vB(k)+Bgrad(k)*Bgrad(k);
705 B(k) = B(k)-
lr*Bgrad(k)/std::sqrt(vB(k)+eps);
719 std::snprintf(p,100,
"(%g)",
lr);
734 arma::Cube<DNN_Dtype>
v;
735 arma::Mat<DNN_Dtype>
vB;
765 void apply(arma::Cube<DNN_Dtype>& W,
766 arma::Mat<DNN_Dtype>& B,
767 const arma::Cube<DNN_Dtype>& Wgrad,
768 const arma::Mat<DNN_Dtype>& Bgrad )
772 v.set_size(arma::size(W));
774 vB.set_size(arma::size(B));
782 for (arma::uword k=0 ;k<W.n_elem ; k++ )
784 v(k) = beta*v(k)+(1-beta)*dw(k)*dw(k);
785 W(k) = W(k)-
lr*dw(k)/std::sqrt(v(k)+eps);
787 for (arma::uword k=0 ;k<B.n_elem ; k++ )
789 vB(k) = beta*vB(k)+(1-beta)*Bgrad(k)*Bgrad(k);
790 B(k) = B(k)-
lr*Bgrad(k)/std::sqrt(vB(k)+eps);
804 std::snprintf(p,100,
"(%g,%g)",
lr,beta);
arma::Cube< DNN_Dtype > m
opt_SGD_momentum(DNN_Dtype s, DNN_Dtype m, DNN_Dtype l=0.0, DNN_Dtype a=0.0)
SGD with momentum constructor.
opt_SGD(DNN_Dtype s, DNN_Dtype l=0.0, DNN_Dtype a=0.0)
SGD constructor.
arma::Mat< DNN_Dtype > vB
arma::Cube< DNN_Dtype > v
arma::Mat< DNN_Dtype > mB
arma::Mat< DNN_Dtype > vB
opt_adagrad(DNN_Dtype s, DNN_Dtype l=0.0, DNN_Dtype a=0.0, DNN_Dtype e=1e-8)
ADAgrad constructor.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
opt_adadelta(DNN_Dtype r, DNN_Dtype s=1.0, DNN_Dtype l=0.0, DNN_Dtype a=0.0, DNN_Dtype e=1e-6)
ADAdelta constructor.
arma::Cube< DNN_Dtype > Ew
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
arma::uword it
Iteration counter.
arma::Cube< DNN_Dtype > v
Velocity internal variable for weight.
std::string get_algorithm(void)
Get the optimizer algorithm information.
DNN_Dtype lr_a
Internal parameter a.
arma::Mat< DNN_Dtype > vB
Velocity internal variable for bias.
arma::Mat< DNN_Dtype > mB
arma::Cube< DNN_Dtype > v
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
DNN_Dtype lr
Learning rate.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
std::string get_algorithm(void)
Get the optimizer algorithm information.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
void set_learn_rate_alg(LR_ALG alg, DNN_Dtype a=0.0, DNN_Dtype b=10.0)
Set learning rate algorithm.
Stochastic Gradient Descent with momentum optimizer class.
opt_adamax(DNN_Dtype s, DNN_Dtype l=0.0, DNN_Dtype a=0.0, DNN_Dtype b1=0.9, DNN_Dtype b2=0.999, DNN_Dtype e=1e-8)
ADAMax constructor.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
ADAdelta optimizer class.
DNN_Dtype reg_lambda
Regularisation parameter lambda.
DNN_Dtype get_learn_rate(void)
Get the learning rate.
float DNN_Dtype
Data type used in the network (float or double)
Stochastic Gradient Descent optimizer class.
std::string get_algorithm(void)
Get the optimizer algorithm information.
arma::Cube< DNN_Dtype > m
arma::Cube< DNN_Dtype > vp
DNN_Dtype lr_0
Init value for lr.
virtual void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)=0
Apply the optimizer to the layer parameters.
opt_adam(DNN_Dtype s, DNN_Dtype l=0.0, DNN_Dtype a=0.0, DNN_Dtype b1=0.9, DNN_Dtype b2=0.999, DNN_Dtype e=1e-8)
ADAM constructor.
arma::Cube< DNN_Dtype > v
std::string get_algorithm(void)
Get the optimizer algorithm information.
void apply(arma::Cube< DNN_Dtype > &W, arma::Mat< DNN_Dtype > &B, const arma::Cube< DNN_Dtype > &Wgrad, const arma::Mat< DNN_Dtype > &Bgrad)
Apply the optimizer to the layer parameters.
std::string get_algorithm(void)
Get the optimizer algorithm information.
arma::Cube< DNN_Dtype > dW
LR_ALG lr_alg
Learning rate schedule algorithm.
arma::Mat< DNN_Dtype > vB
std::string get_algorithm(void)
Get the optimizer algorithm information.
virtual std::string get_algorithm(void)
Get the optimizer algorithm information.
std::string get_algorithm(void)
Get the optimizer algorithm information.
arma::Cube< DNN_Dtype > v
std::string get_algorithm(void)
Get the optimizer algorithm information.
DNN_Dtype reg_alpha
Elastic net mix parameter - 0=ridge (L2) .. 1=LASSO (L1)
arma::Cube< DNN_Dtype > v
DNN_Dtype lr_b
Internal parameter b.
arma::Mat< DNN_Dtype > Eb
Stochastic Gradient Descent with Nesterov momentum optimizer class.
opt_SGD_nesterov(DNN_Dtype s, DNN_Dtype m, DNN_Dtype l=0.0, DNN_Dtype a=0.0)
SGD with Nesterov momentum constructor.
arma::Mat< DNN_Dtype > dB
opt_rmsprop(DNN_Dtype s, DNN_Dtype l=0.0, DNN_Dtype a=0.0, const DNN_Dtype b=0.9, DNN_Dtype e=1e-8)
RMSprop constructor.
void update_learn_rate(void)
Update learning rate.
arma::Mat< DNN_Dtype > vB
arma::Mat< DNN_Dtype > vBp
arma::Mat< DNN_Dtype > vB