% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kerasOptimizer.R
\name{optimizer_nadam}
\alias{optimizer_nadam}
\title{Nesterov Adam optimizer}
\usage{
optimizer_nadam(
  learning_rate = 0.002,
  beta_1 = 0.9,
  beta_2 = 0.999,
  epsilon = NULL,
  schedule_decay = 0.004,
  clipnorm = NULL,
  clipvalue = NULL,
  ...
)
}
\arguments{
\item{learning_rate}{float >= 0. Learning rate.}

\item{beta_1}{The exponential decay rate for the 1st moment estimates. float,
0 < beta < 1. Generally close to 1.}

\item{beta_2}{The exponential decay rate for the 2nd moment estimates. float,
0 < beta < 1. Generally close to 1.}

\item{epsilon}{float >= 0. Fuzz factor. If `NULL`, defaults to `k_epsilon()`.}

\item{schedule_decay}{Schedule deacy.}

\item{clipnorm}{Gradients will be clipped when their L2 norm exceeds this
value.}

\item{clipvalue}{Gradients will be clipped when their absolute value exceeds
this value.}

\item{...}{Unused, present only for backwards compatability}
}
\description{
Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop
with Nesterov momentum.
}
\details{
Default parameters follow those provided in the paper. It is
  recommended to leave the parameters of this optimizer at their default
  values.
}
\note{
To enable compatibility with the ranges of the learning rates
of the other optimizers, the learning rate \code{learning_rate}
is internally mapped to \code{2 * learning_rate}. That is,
a learning rat of 0.001 will be mapped to 0.002 (which is the default.)
}
\seealso{
[On the importance of initialization and momentum in deep
  learning](https://www.cs.toronto.edu/~fritz/absps/momentum.pdf).

Other optimizers: 
\code{\link{optimizer_adadelta}()},
\code{\link{optimizer_adagrad}()},
\code{\link{optimizer_adamax}()},
\code{\link{optimizer_adam}()},
\code{\link{optimizer_rmsprop}()},
\code{\link{optimizer_sgd}()}
}
\concept{optimizers}
