-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathuesnet.hpp
More file actions
155 lines (133 loc) · 5.17 KB
/
uesnet.hpp
File metadata and controls
155 lines (133 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/**
* @file uesnet.hpp
* @brief This file contains the implementation of the UESMANN network itself - at least, those
* parts which are different from a standard Rumelhart/Hinton/Williams MLP.
*
*/
#ifndef __UESNET_HPP
#define __UESNET_HPP
/**
* \brief The UESMANN network, which it itself based on the BPNet code as it has
* the same architecture as the plain MLP.
*/
class UESNet: public BPNet {
/**
* \brief the modulator value, initially 0
*/
double modulator;
public:
/**
* \brief The constructor is mostly identical to the BPNet constructor
*/
UESNet(int nlayers,const int *layerCounts) : BPNet(nlayers,layerCounts),
modulator(0) {
}
virtual void setH(double h){
modulator = h;
}
virtual double getH() const {
return modulator;
}
protected:
void calcError(double *in,double *out){
// first run the network forwards
setInputs(in);
update();
// first, calculate the error in the output layer
// This does the THIRD of the backprop equations, Eq. 4.15, giving dLj.
int ol = numLayers-1;
for(int i=0;i<layerSizes[ol];i++){
double o = outputs[ol][i];
errors[ol][i] = o*(1-o)*(o-out[i]);
}
// then work out the errors in all the other layers
// factoring in (rather inefficiently) the hormone.
// This is the FOURTH backprop equation, Eq. 4.16.
for(int l=1;l<numLayers-1;l++){
for(int j=0;j<layerSizes[l];j++){
double e = 0;
for(int i=0;i<layerSizes[l+1];i++)
e += errors[l+1][i]*getw(l+1,i,j);
// produce the \delta^l_i term where l is the layer and i
// the index of the node. Here is where we factor in the modulator.
errors[l][j] = e * (modulator+1.0) * outputs[l][j] * (1-outputs[l][j]);
}
}
}
virtual void update(){
double hfactor = modulator+1.0;
for(int i=1;i<numLayers;i++){
for(int j=0;j<layerSizes[i];j++){
double v = 0.0;
for(int k=0;k<layerSizes[i-1];k++){
v += getw(i,j,k) * outputs[i-1][k];
}
// factor in the hormone here
outputs[i][j]=sigmoid(v*hfactor+biases[i][j]);
}
}
}
virtual double trainBatch(ExampleSet& ex,int start,int num,double eta){
// zero average gradients
for(int j=0;j<numLayers;j++){
for(int k=0;k<layerSizes[j];k++)
gradAvgsBiases[j][k]=0;
for(int i=0;i<largestLayerSize*largestLayerSize;i++)
gradAvgsWeights[j][i]=0;
}
// reset total error
double totalError=0;
// iterate over examples
for(int nn=0;nn<num;nn++){
int exampleIndex = nn+start;
// set modulator
setH(ex.getH(exampleIndex));
// get outputs for this example
double *outs = ex.getOutputs(exampleIndex);
// build errors for each example
calcError(ex.getInputs(exampleIndex),outs);
// accumulate errors
for(int l=1;l<numLayers;l++){
for(int i=0;i<layerSizes[l];i++){
// this does the FIRST of the backprop equations,
// Eq. 4.13, calculating dC/dw(h+1), but the modulator
// is dealt with below.
for(int j=0;j<layerSizes[l-1];j++)
getavggradw(l,i,j) += errors[l][i]*outputs[l-1][j];
// this does the SECOND of the backprop equations,
// Eq. 4.14.
gradAvgsBiases[l][i] += errors[l][i];
}
}
// count up the total error
int ol = numLayers-1;
for(int i=0;i<layerSizes[ol];i++){
double o = outputs[ol][i];
double e = (o-outs[i]);
totalError += e*e;
}
}
// get modulator factor
double hfactor = modulator+1.0;
// for calculating average error - 1/number of examples trained
double factor = 1.0/(double)num;
// we now have a full set of running averages. Time to apply them.
for(int l=1;l<numLayers;l++){
for(int i=0;i<layerSizes[l];i++){
for(int j=0;j<layerSizes[l-1];j++){
// this does the modulation part of Eq. 4.13, but a little
// later than in the thesis.
double wdelta = eta*getavggradw(l,i,j)*factor*hfactor;
// printf("WCORR: %f factor %f\n",wdelta,getavggradw(l,i,j));
getw(l,i,j) -= wdelta;
}
// biases are not modulated
double bdelta = eta*gradAvgsBiases[l][i]*factor;
biases[l][i] -= bdelta;
}
}
// and return total error - this is the SUM of the MSE of each output
return totalError*factor;
}
};
#endif /* __UESNET_HPP */