@@ -167,7 +167,7 @@ np.mean(keras.losses.mean_squared_error(y_valid, y_pred))
167167
168168
169169
170- 0.004301323
170+ 0.0040804083
171171
172172
173173
@@ -194,7 +194,7 @@ simple_rnn.compile(
194194
195195history = simple_rnn.fit(
196196 X_train, y_train,
197- epochs = 20 ,
197+ epochs = 10 ,
198198 validation_data = (X_valid, y_valid),
199199 verbose = 0
200200)
@@ -219,7 +219,7 @@ np.mean(keras.losses.mean_squared_error(y_valid, y_pred))
219219
220220
221221
222- 0.014256743
222+ 0.011384344
223223
224224
225225
@@ -272,7 +272,7 @@ np.mean(keras.losses.mean_squared_error(y_valid, y_pred))
272272
273273
274274
275- 0.0032862981
275+ 0.0028538946
276276
277277
278278
@@ -285,7 +285,7 @@ One way to do this is to use the model trained above to predict the next step, t
285285
286286
287287``` python
288- np.random.seed(2 )
288+ np.random.seed(23 )
289289series = generate_time_series(1 , n_steps + 10 )
290290X_new, Y_new = series[:, :n_steps], series[:, n_steps:]
291291X = X_new
@@ -349,7 +349,7 @@ deep_rnn_10steps.compile(
349349
350350history = deep_rnn_10steps.fit(
351351 X_train, Y_train,
352- epochs = 20 ,
352+ epochs = 10 ,
353353 validation_data = (X_valid, Y_valid),
354354 verbose = 0
355355)
@@ -374,7 +374,7 @@ np.mean(keras.losses.mean_squared_error(Y_valid, Y_pred))
374374
375375
376376
377- 0.010989715
377+ 0.008615565
378378
379379
380380
@@ -462,7 +462,7 @@ s2s_rnn.compile(
462462
463463history = s2s_rnn.fit(
464464 X_train, Y_train,
465- epochs = 20 ,
465+ epochs = 13 ,
466466 validation_data = (X_valid, Y_valid),
467467 verbose = 0
468468)
@@ -487,7 +487,7 @@ np.mean(keras.losses.mean_squared_error(Y_valid, Y_pred))
487487
488488
489489
490- 0.02611927
490+ 0.024657644
491491
492492
493493
@@ -567,7 +567,7 @@ ln_rnn.compile(
567567
568568history = ln_rnn.fit(
569569 X_train, Y_train,
570- epochs = 20 ,
570+ epochs = 10 ,
571571 validation_data = (X_valid, Y_valid),
572572 verbose = 0
573573)
@@ -592,7 +592,7 @@ np.mean(keras.losses.mean_squared_error(Y_valid, Y_pred))
592592
593593
594594
595- 0.026189726
595+ 0.02566188
596596
597597
598598
@@ -618,6 +618,163 @@ plt.show()
618618![ png] ( homl_ch15_Processing-sequences-using-RNNs-and-CNNs_files/homl_ch15_Processing-sequences-using-RNNs-and-CNNs_46_0.png )
619619
620620
621+ ### Tackling the short-term memory problem
622+
623+ Some information is lost at each time-step of an RNN to the point where by the end of the sequence, there is no trace of the initial inputs.
624+ Various types of cells with long-term memory have been created to tackle this problem.
625+
626+ #### Long Short-Term Memory (LSTM) cells
627+
628+ The * Long Short-Term Memory* (LSTM) cell was created in 1997 and steadily improved since.
629+ With Keras, it can be used as a drop-in replacement for the ` SimpleRNN ` layers we have been using thus far.
630+ This will lead to better performance and faster training as well as retaining long=term dependencies in the data.
631+
632+
633+ ``` python
634+ lstm_rnn = keras.models.Sequential([
635+ keras.layers.LSTM(20 , return_sequences = True , input_shape = [None , 1 ]),
636+ keras.layers.LSTM(20 , return_sequences = True ),
637+ keras.layers.TimeDistributed(keras.layers.Dense(10 ))
638+ ])
639+
640+ lstm_rnn.compile(
641+ optimizer = keras.optimizers.Nadam(),
642+ loss = keras.losses.MeanSquaredError(),
643+ metrics = [last_time_step_mse]
644+ )
645+
646+ history = lstm_rnn.fit(
647+ X_train, Y_train,
648+ epochs = 10 ,
649+ validation_data = (X_valid, Y_valid),
650+ verbose = 0
651+ )
652+ ```
653+
654+
655+ ``` python
656+ pd.DataFrame(history.history).plot(figsize = (8 , 6 ))
657+ plt.show()
658+ ```
659+
660+
661+ ![ png] ( homl_ch15_Processing-sequences-using-RNNs-and-CNNs_files/homl_ch15_Processing-sequences-using-RNNs-and-CNNs_49_0.png )
662+
663+
664+
665+ ``` python
666+ Y_pred = lstm_rnn.predict(X_valid)
667+ np.mean(keras.losses.mean_squared_error(Y_valid, Y_pred))
668+ ```
669+
670+
671+
672+
673+ 0.026746394
674+
675+
676+
677+
678+ ``` python
679+ Y_pred = lstm_rnn.predict(X_new)
680+
681+ fig = plt.figure(figsize = (8 , 5 ))
682+ plt.plot(range (X.shape[1 ]), X[0 , :, 0 ], ' k-' )
683+
684+ for i in range (1 , Y_pred.shape[2 ] + 1 ):
685+ plt.plot(range (i, X_new.shape[1 ] + i),
686+ Y_pred[0 , :, i- 1 ],
687+ ' r--' , label = i, alpha = 0.5 )
688+
689+ plt.xlabel(' time step' , fontsize = 14 )
690+ plt.ylabel(' value' , fontsize = 14 )
691+ plt.title(' Forecasting several times steps ahead' , fontsize = 18 )
692+ plt.show()
693+ ```
694+
695+
696+ ![ png] ( homl_ch15_Processing-sequences-using-RNNs-and-CNNs_files/homl_ch15_Processing-sequences-using-RNNs-and-CNNs_51_0.png )
697+
698+
699+ The author continued on to explain LSTM in more detail, followed by explaining some of the popular variants including those with * peephole connections* and * Gated Recurrent Unit* (GRU) cells.
700+ He also provides explanations for implementing them both in Keras.
701+
702+ #### Using 1D convolutional layers to process sequences
703+
704+ It is also possible to use a convolutional layer to reduce the length of the sequence.
705+ Further, the filters are able to learn more subtle short-term patterns.
706+ Below is an example where the first layer is now a convolutional layer.
707+ Notice that the model learns much faster and performs better, too.
708+
709+
710+ ``` python
711+ conv_rnn = keras.models.Sequential([
712+ keras.layers.Conv1D(filters = 20 , kernel_size = 4 , strides = 2 , padding = " valid" ,
713+ input_shape = [None , 1 ]),
714+ keras.layers.LSTM(20 , return_sequences = True ),
715+ keras.layers.LSTM(20 , return_sequences = True ),
716+ keras.layers.TimeDistributed(keras.layers.Dense(10 ))
717+ ])
718+
719+ conv_rnn.compile(
720+ optimizer = keras.optimizers.Nadam(),
721+ loss = keras.losses.MeanSquaredError(),
722+ metrics = [last_time_step_mse]
723+ )
724+
725+ history = conv_rnn.fit(
726+ X_train, Y_train[:, 3 ::2 ],
727+ epochs = 10 ,
728+ validation_data = (X_valid, Y_valid[:, 3 ::2 ]),
729+ verbose = 0
730+ )
731+ ```
732+
733+
734+ ``` python
735+ pd.DataFrame(history.history).plot(figsize = (8 , 6 ))
736+ plt.show()
737+ ```
738+
739+
740+ ![ png] ( homl_ch15_Processing-sequences-using-RNNs-and-CNNs_files/homl_ch15_Processing-sequences-using-RNNs-and-CNNs_54_0.png )
741+
742+
743+
744+ ``` python
745+ Y_pred = conv_rnn.predict(X_valid)
746+ np.mean(keras.losses.mean_squared_error(Y_valid[:, 3 ::2 ], Y_pred))
747+ ```
748+
749+
750+
751+
752+ 0.021750728
753+
754+
755+
756+
757+ ``` python
758+ Y_pred = conv_rnn.predict(X_new)
759+
760+ fig = plt.figure(figsize = (8 , 5 ))
761+ plt.plot(range (X.shape[1 ]), X[0 , :, 0 ], ' k-' )
762+
763+ for i in range (1 , Y_pred.shape[2 ] + 1 ):
764+ plt.plot([x * 2 - i + 2 for x in range (i, Y_pred.shape[1 ] + i)],
765+ Y_pred[0 , :, i- 1 ],
766+ ' r--' , label = i, alpha = 0.5 )
767+
768+ plt.xlabel(' time step' , fontsize = 14 )
769+ plt.ylabel(' value' , fontsize = 14 )
770+ plt.title(' Forecasting several times steps ahead' , fontsize = 18 )
771+ plt.show()
772+ ```
773+
774+
775+ ![ png] ( homl_ch15_Processing-sequences-using-RNNs-and-CNNs_files/homl_ch15_Processing-sequences-using-RNNs-and-CNNs_56_0.png )
776+
777+
621778
622779``` python
623780
0 commit comments