Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Pawian
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PWA
Pawian
Commits
51dde1fb
Commit
51dde1fb
authored
9 months ago
by
fhoelzken
Browse files
Options
Downloads
Patches
Plain Diff
added gradient clipping and adaptive learning rate to Adam
parent
92bea08a
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
MinFunctions/AdamMinimizer.cc
+89
-56
89 additions, 56 deletions
MinFunctions/AdamMinimizer.cc
MinFunctions/AdamMinimizer.hh
+3
-1
3 additions, 1 deletion
MinFunctions/AdamMinimizer.hh
with
92 additions
and
57 deletions
MinFunctions/AdamMinimizer.cc
+
89
−
56
View file @
51dde1fb
...
@@ -26,81 +26,114 @@
...
@@ -26,81 +26,114 @@
#include
<string>
#include
<string>
#include
<math.h>
#include
<math.h>
#include
<iomanip>
#include
<iomanip>
#include
"MinFunctions/AdamMinimizer.hh"
#include
"MinFunctions/AdamMinimizer.hh"
#include
"PwaUtils/GlobalEnv.hh"
#include
"PwaUtils/GlobalEnv.hh"
#include
"ErrLogger/ErrLogger.hh"
#include
"ErrLogger/ErrLogger.hh"
#include
"PwaUtils/GlobalEnv.hh"
#include
"PwaUtils/GlobalEnv.hh"
#include
"ConfigParser/ParserBase.hh"
#include
"ConfigParser/ParserBase.hh"
AdamMinimizer
::
AdamMinimizer
(
std
::
shared_ptr
<
AbsFcn
<
FCNGradientBase
>>
theAbsFcnPtr
,
std
::
shared_ptr
<
AbsPawianParameters
>
upar
)
:
AdamMinimizer
::
AdamMinimizer
(
std
::
shared_ptr
<
AbsFcn
<
FCNGradientBase
>>
theAbsFcnPtr
,
std
::
shared_ptr
<
AbsPawianParameters
>
upar
)
AbsPawianMinimizer
<
FCNGradientBase
>
(
theAbsFcnPtr
,
upar
)
:
AbsPawianMinimizer
<
FCNGradientBase
>
(
theAbsFcnPtr
,
upar
),
,
_max_iterations
(
1000
)
_max_iterations
(
5000
),
,
_s
(
upar
->
Params
().
size
(),
0.
)
_s
(
upar
->
Params
().
size
(),
0.0
),
,
_v
(
upar
->
Params
().
size
(),
0.
)
_v
(
upar
->
Params
().
size
(),
0.0
),
,
_learning_rate
(
0.2
)
_learning_rate
(
0.001
),
,
_iterations
(
0
)
_initial_lr
(
0.001
),
,
_currentPawianParams
(
std
::
shared_ptr
<
AbsPawianParameters
>
(
upar
->
Clone
()))
_decay_rate
(
0.01
),
,
_bestLH
(
1.e20
)
_iterations
(
0
),
,
_bestPawianParams
(
std
::
shared_ptr
<
AbsPawianParameters
>
(
upar
->
Clone
()))
_currentPawianParams
(
std
::
shared_ptr
<
AbsPawianParameters
>
(
upar
->
Clone
())),
,
_noItersWoImprovement
(
0
)
_bestLH
(
1.e20
),
{
_bestPawianParams
(
std
::
shared_ptr
<
AbsPawianParameters
>
(
upar
->
Clone
())),
_noItersWoImprovement
(
0
),
_patience
(
10
)
{
}
}
AdamMinimizer
::~
AdamMinimizer
()
AdamMinimizer
::~
AdamMinimizer
()
{}
{
}
void
AdamMinimizer
::
minimize
()
{
const
double
gradient_tolerance
=
1e-8
;
if
(
_iterations
==
0
)
{
double
currentLH
=
_absFcn
->
operator
()(
_bestPawianParams
->
Params
());
if
(
currentLH
<
_bestLH
)
_bestLH
=
currentLH
;
std
::
cout
<<
"Initial best LH: "
<<
_bestLH
<<
"
\t
noIters: "
<<
_iterations
<<
"
\t
noItersWoImprovement: "
<<
_noItersWoImprovement
<<
std
::
endl
;
}
while
(
_iterations
<=
_max_iterations
&&
_noItersWoImprovement
<
1000
)
{
if
(
_noItersWoImprovement
>
0
&&
_noItersWoImprovement
>=
_patience
)
{
_currentPawianParams
=
std
::
shared_ptr
<
AbsPawianParameters
>
(
_bestPawianParams
->
Clone
());
_patience
=
std
::
min
(
_patience
*
1.05
,
30.0
);
_noItersWoImprovement
=
0
;
}
std
::
vector
<
double
>
derivatives
=
_absFcn
->
Gradient
(
_currentPawianParams
->
Params
());
updateParameters
(
_currentPawianParams
,
derivatives
,
_s
,
_v
,
_iterations
);
void
AdamMinimizer
::
minimize
(){
double
currentLH
=
_absFcn
->
operator
()(
_currentPawianParams
->
Params
());
if
(
_iterations
==
0
){
double
currentLH
=
_absFcn
->
operator
()(
_bestPawianParams
->
Params
());
if
(
currentLH
<
_bestLH
)
{
if
(
currentLH
<
_bestLH
)
_bestLH
=
currentLH
;
_bestLH
=
currentLH
;
InfoMsg
<<
"best LH: "
<<
_bestLH
<<
"
\t
noIters: "
<<
_iterations
<<
"
\t
noItersWoImprovement: "
<<
_noItersWoImprovement
<<
endmsg
;
_bestPawianParams
=
std
::
shared_ptr
<
AbsPawianParameters
>
(
_currentPawianParams
->
Clone
());
}
_noItersWoImprovement
=
0
;
while
(
_iterations
<=
_max_iterations
&&
_noItersWoImprovement
<
100
){
}
else
{
if
(
_noItersWoImprovement
>
0
&&
_noItersWoImprovement
%
10
==
0
){
//continue with parameters of the best fit
++
_noItersWoImprovement
;
_currentPawianParams
=
std
::
shared_ptr
<
AbsPawianParameters
>
(
_bestPawianParams
->
Clone
());
}
}
std
::
vector
<
double
>
derivatives
=
_absFcn
->
Gradient
(
_currentPawianParams
->
Params
());
double
gradientNorm
=
0.0
;
for
(
const
auto
&
grad
:
derivatives
)
{
updateParameters
(
_currentPawianParams
,
derivatives
,
_s
,
_v
,
_iterations
);
gradientNorm
+=
grad
*
grad
;
double
currentLH
=
_absFcn
->
operator
()(
_currentPawianParams
->
Params
());
}
if
(
currentLH
<
_bestLH
){
gradientNorm
=
sqrt
(
gradientNorm
);
_bestLH
=
currentLH
;
_bestPawianParams
=
std
::
shared_ptr
<
AbsPawianParameters
>
(
_currentPawianParams
->
Clone
());
if
(
gradientNorm
<
gradient_tolerance
&&
std
::
abs
(
currentLH
-
_bestLH
)
/
std
::
abs
(
_bestLH
)
<
1e-4
)
{
_noItersWoImprovement
=
0
;
std
::
cout
<<
"Stopping early: Gradient norm below tolerance and no significant likelihood improvement."
<<
std
::
endl
;
}
break
;
else
++
_noItersWoImprovement
;
}
++
_iterations
;
_learning_rate
*=
0.999
;
InfoMsg
<<
"best LH: "
<<
_bestLH
<<
"
\t
noIters: "
<<
_iterations
<<
"
\t
noItersWoImprovement: "
<<
_noItersWoImprovement
<<
endmsg
;
++
_iterations
;
_learning_rate
*=
0.999
;
std
::
cout
<<
"best LH: "
<<
_bestLH
<<
"
\t
noIters: "
<<
_iterations
<<
"
\t
noItersWoImprovement: "
<<
_noItersWoImprovement
<<
std
::
endl
;
}
}
}
}
void
AdamMinimizer
::
updateParameters
(
std
::
shared_ptr
<
AbsPawianParameters
>
pawianParams
,
std
::
vector
<
double
>&
gradients
,
std
::
vector
<
double
>&
s
,
std
::
vector
<
double
>&
v
,
int
t
){
void
AdamMinimizer
::
updateParameters
(
std
::
shared_ptr
<
AbsPawianParameters
>
pawianParams
,
std
::
vector
<
double
>&
gradients
,
std
::
vector
<
double
>&
s
,
std
::
vector
<
double
>&
v
,
int
t
)
{
double
beta1
=
0.9
;
double
beta1
=
0.9
;
double
beta2
=
0.99
;
double
beta2
=
0.99
9
;
double
epsilon
=
1.
*
pow
(
10.
,
-
8
)
;
double
epsilon
=
1e
-8
;
for
(
unsigned
int
i
=
0
;
i
<
pawianParams
->
Params
().
size
();
++
i
){
for
(
unsigned
int
i
=
0
;
i
<
pawianParams
->
Params
().
size
();
++
i
)
{
if
(
pawianParams
->
IsFixed
(
i
))
continue
;
if
(
pawianParams
->
IsFixed
(
i
))
continue
;
s
.
at
(
i
)
=
beta1
*
s
.
at
(
i
)
+
(
1.0
-
beta1
)
*
gradients
.
at
(
i
);
v
.
at
(
i
)
=
beta2
*
v
.
at
(
i
)
+
(
1.0
-
beta2
)
*
gradients
.
at
(
i
)
*
gradients
.
at
(
i
);
double
clip_value
=
0.2
*
std
::
abs
(
pawianParams
->
Value
(
i
));
gradients
[
i
]
=
std
::
min
(
clip_value
,
std
::
max
(
-
clip_value
,
gradients
[
i
]));
double
s_hat
=
s
.
at
(
i
)
/
(
1.0
-
pow
(
beta1
,
(
t
+
1
))
);
s
.
at
(
i
)
=
beta1
*
s
.
at
(
i
)
+
(
1.0
-
beta1
)
*
gradients
.
at
(
i
);
double
v_hat
=
v
.
at
(
i
)
/
(
1.0
-
pow
(
beta2
,
(
t
+
1
))
);
v
.
at
(
i
)
=
beta2
*
v
.
at
(
i
)
+
(
1.0
-
beta2
)
*
gradients
.
at
(
i
)
*
gradients
.
at
(
i
);
double
newVal
=
pawianParams
->
Value
(
i
)
-
_learning_rate
*
s_hat
/
(
std
::
sqrt
(
v_hat
)
+
epsilon
);
if
(
pawianParams
->
HasLimits
(
i
)){
double
s_hat
=
s
.
at
(
i
)
/
(
1.0
-
pow
(
beta1
,
(
t
+
1
)));
if
(
newVal
>
pawianParams
->
UpperLimit
(
i
))
newVal
=
pawianParams
->
UpperLimit
(
i
);
double
v_hat
=
v
.
at
(
i
)
/
(
1.0
-
pow
(
beta2
,
(
t
+
1
)));
else
if
(
newVal
<
pawianParams
->
LowerLimit
(
i
))
newVal
=
pawianParams
->
LowerLimit
(
i
);
//double learning_rate_t = _initial_lr / (1.0 + _decay_rate * t);
//double adaptive_lr = learning_rate_t / (std::abs(pawianParams->Value(i)) + epsilon);
//double gradient_update = adaptive_lr * s_hat / (std::sqrt(v_hat) + epsilon);
double
newVal
=
pawianParams
->
Value
(
i
)
-
_learning_rate
*
s_hat
/
(
std
::
sqrt
(
v_hat
)
+
epsilon
);
if
(
pawianParams
->
HasLimits
(
i
))
{
if
(
newVal
>
pawianParams
->
UpperLimit
(
i
))
newVal
=
pawianParams
->
UpperLimit
(
i
);
else
if
(
newVal
<
pawianParams
->
LowerLimit
(
i
))
newVal
=
pawianParams
->
LowerLimit
(
i
);
}
pawianParams
->
SetValue
(
i
,
newVal
);
}
}
pawianParams
->
SetValue
(
i
,
newVal
);
}
}
}
void
AdamMinimizer
::
printFitResult
(
double
evtWeightSumData
){
void
AdamMinimizer
::
printFitResult
(
double
evtWeightSumData
){
InfoMsg
<<
"
\n\n
********************** Final fit parameters *************************"
<<
endmsg
;
InfoMsg
<<
"
\n\n
********************** Final fit parameters *************************"
<<
endmsg
;
_bestPawianParams
->
print
(
std
::
cout
,
true
);
_bestPawianParams
->
print
(
std
::
cout
,
true
);
...
...
This diff is collapsed.
Click to expand it.
MinFunctions/AdamMinimizer.hh
+
3
−
1
View file @
51dde1fb
...
@@ -61,12 +61,14 @@ protected:
...
@@ -61,12 +61,14 @@ protected:
std
::
vector
<
double
>
_s
;
std
::
vector
<
double
>
_s
;
std
::
vector
<
double
>
_v
;
std
::
vector
<
double
>
_v
;
double
_learning_rate
;
double
_learning_rate
;
double
_initial_lr
;
double
_decay_rate
;
unsigned
int
_iterations
;
unsigned
int
_iterations
;
std
::
shared_ptr
<
AbsPawianParameters
>
_currentPawianParams
;
std
::
shared_ptr
<
AbsPawianParameters
>
_currentPawianParams
;
double
_bestLH
;
double
_bestLH
;
std
::
shared_ptr
<
AbsPawianParameters
>
_bestPawianParams
;
std
::
shared_ptr
<
AbsPawianParameters
>
_bestPawianParams
;
unsigned
int
_noItersWoImprovement
;
unsigned
int
_noItersWoImprovement
;
double
_patience
;
void
updateParameters
(
std
::
shared_ptr
<
AbsPawianParameters
>
pawianParams
,
std
::
vector
<
double
>&
gradients
,
std
::
vector
<
double
>&
s
,
std
::
vector
<
double
>&
v
,
int
t
);
void
updateParameters
(
std
::
shared_ptr
<
AbsPawianParameters
>
pawianParams
,
std
::
vector
<
double
>&
gradients
,
std
::
vector
<
double
>&
s
,
std
::
vector
<
double
>&
v
,
int
t
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment