From a68233fb5752e16e36c90ceb41e8f5d4b5e03921 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 9 Aug 2021 10:59:40 +0800 Subject: [PATCH 1/2] rm load_ckpt --- doc/use-deepmd-kit.md | 4 +- examples/fparam/train/input.json | 2 +- examples/fparam/train/input_aparam.json | 2 +- examples/water/train/input_v2_compat.json | 75 +++++++++++++++ examples/water/train/out.json | 92 +++++++++++++++++++ examples/water/train/polar.json | 2 +- examples/water/train/polar_se_a.json | 2 +- examples/water/train/wannier.json | 2 +- examples/water/train/water.json | 2 +- examples/water/train/water_se_a.json | 2 +- examples/water/train/water_se_ar.json | 2 +- examples/water/train/water_se_r.json | 2 +- examples/water/train/water_srtab_example.json | 2 +- 13 files changed, 179 insertions(+), 12 deletions(-) create mode 100644 examples/water/train/input_v2_compat.json create mode 100644 examples/water/train/out.json diff --git a/doc/use-deepmd-kit.md b/doc/use-deepmd-kit.md index cbf7a91cbc..a6e3915b3b 100644 --- a/doc/use-deepmd-kit.md +++ b/doc/use-deepmd-kit.md @@ -156,7 +156,7 @@ An example of `training` is "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, @@ -189,7 +189,7 @@ During the training, the error of the model is tested every **`disp_freq`** batc ``` The first column displays the number of batches. The second and third columns display the loss function evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The fourth and fifth columns display the RMS energy error (normalized by number of atoms) evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The sixth and seventh columns display the RMS force error (component-wise) evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The last column displays the current learning rate. -Checkpoints will be written to files with prefix **`save_ckpt`** every **`save_freq`** batches. If **`restart`** is set to `true`, then the training will start from the checkpoint named **`load_ckpt`**, rather than from scratch. +Checkpoints will be written to files with prefix **`save_ckpt`** every **`save_freq`** batches. Several command line options can be passed to `dp train`, which can be checked with ```bash diff --git a/examples/fparam/train/input.json b/examples/fparam/train/input.json index c57afdfb7f..0455f11b42 100644 --- a/examples/fparam/train/input.json +++ b/examples/fparam/train/input.json @@ -51,7 +51,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, diff --git a/examples/fparam/train/input_aparam.json b/examples/fparam/train/input_aparam.json index 86be27ef29..5774130fe1 100644 --- a/examples/fparam/train/input_aparam.json +++ b/examples/fparam/train/input_aparam.json @@ -51,7 +51,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, diff --git a/examples/water/train/input_v2_compat.json b/examples/water/train/input_v2_compat.json new file mode 100644 index 0000000000..382ac9ce3c --- /dev/null +++ b/examples/water/train/input_v2_compat.json @@ -0,0 +1,75 @@ +{ + "_comment": "that's all", + "model": { + "type_map": [ + "O", + "H" + ], + "descriptor": { + "type": "se_a", + "sel": [ + 46, + 92 + ], + "rcut_smth": 5.8, + "rcut": 6.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "seed": 1, + "_comment": " that's all" + }, + "fitting_net": { + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + "_comment": " that's all" + }, + "learning_rate": { + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-08, + "_comment": "that's all" + }, + "loss": { + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "_comment": " that's all" + }, + "training": { + "stop_batch": 1000000, + "seed": 1, + "_comment": "that's all", + "disp_file": "lcurve.out", + "disp_freq": 100, + "numb_test": 10, + "save_freq": 1000, + "save_ckpt": "model.ckpt", + "disp_training": true, + "time_training": true, + "profiling": false, + "profiling_file": "timeline.json", + "training_data": { + "systems": [ + "../data/" + ], + "set_prefix": "set", + "batch_size": 1 + } + } +} \ No newline at end of file diff --git a/examples/water/train/out.json b/examples/water/train/out.json new file mode 100644 index 0000000000..ee867d9db5 --- /dev/null +++ b/examples/water/train/out.json @@ -0,0 +1,92 @@ +{ + "model": { + "type_map": [ + "O", + "H" + ], + "descriptor": { + "type": "se_e2_a", + "sel": [ + 46, + 92 + ], + "rcut_smth": 5.8, + "rcut": 6.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "seed": 1, + "activation_function": "tanh", + "type_one_side": false, + "precision": "float64", + "trainable": true, + "exclude_types": [], + "set_davg_zero": false + }, + "fitting_net": { + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "type": "ener", + "numb_fparam": 0, + "numb_aparam": 0, + "activation_function": "tanh", + "precision": "float64", + "trainable": true, + "rcond": 0.001, + "atom_ener": [] + }, + "data_stat_nbatch": 10, + "data_stat_protect": 0.01 + }, + "learning_rate": { + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-08 + }, + "loss": { + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "type": "ener", + "start_pref_ae": 0.0, + "limit_pref_ae": 0.0 + }, + "training": { + "seed": 1, + "disp_file": "lcurve.out", + "disp_freq": 100, + "numb_test": 10, + "save_freq": 1000, + "save_ckpt": "model.ckpt", + "disp_training": true, + "time_training": true, + "profiling": false, + "profiling_file": "timeline.json", + "training_data": { + "systems": [ + "../data/" + ], + "set_prefix": "set", + "batch_size": 1, + "auto_prob": "prob_sys_size", + "sys_probs": null + }, + "numb_steps": 1000000, + "validation_data": null, + "tensorboard": false, + "tensorboard_log_dir": "log" + } +} \ No newline at end of file diff --git a/examples/water/train/polar.json b/examples/water/train/polar.json index 60e3fa3494..3437747e49 100644 --- a/examples/water/train/polar.json +++ b/examples/water/train/polar.json @@ -53,7 +53,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "_comment": "that's all" diff --git a/examples/water/train/polar_se_a.json b/examples/water/train/polar_se_a.json index dc90e481ce..e0c2939722 100644 --- a/examples/water/train/polar_se_a.json +++ b/examples/water/train/polar_se_a.json @@ -51,7 +51,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "_comment": "that's all" diff --git a/examples/water/train/wannier.json b/examples/water/train/wannier.json index f23f5e0d62..06b3bc906d 100644 --- a/examples/water/train/wannier.json +++ b/examples/water/train/wannier.json @@ -54,7 +54,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "_comment": "that's all" diff --git a/examples/water/train/water.json b/examples/water/train/water.json index 23ba559aed..ccc47a5d3a 100644 --- a/examples/water/train/water.json +++ b/examples/water/train/water.json @@ -61,7 +61,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "_comment": "that's all" diff --git a/examples/water/train/water_se_a.json b/examples/water/train/water_se_a.json index cb005530c1..5c9b2d8e70 100644 --- a/examples/water/train/water_se_a.json +++ b/examples/water/train/water_se_a.json @@ -56,7 +56,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, diff --git a/examples/water/train/water_se_ar.json b/examples/water/train/water_se_ar.json index 2173f2e1d9..c97b3d1f70 100644 --- a/examples/water/train/water_se_ar.json +++ b/examples/water/train/water_se_ar.json @@ -67,7 +67,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, diff --git a/examples/water/train/water_se_r.json b/examples/water/train/water_se_r.json index 7faf55a3c3..3954e0b9aa 100644 --- a/examples/water/train/water_se_r.json +++ b/examples/water/train/water_se_r.json @@ -56,7 +56,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "profiling": false, diff --git a/examples/water/train/water_srtab_example.json b/examples/water/train/water_srtab_example.json index f2a0a4a39c..3baabd44fd 100644 --- a/examples/water/train/water_srtab_example.json +++ b/examples/water/train/water_srtab_example.json @@ -64,7 +64,7 @@ "numb_test": 10, "save_freq": 1000, "save_ckpt": "model.ckpt", - "load_ckpt": "model.ckpt", + "disp_training":true, "time_training":true, "_comment": "that's all" From bb492d7cfabe0e05acf4f782855f2616ff5d6231 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 9 Aug 2021 11:05:04 +0800 Subject: [PATCH 2/2] rm wrong files --- examples/water/train/input_v2_compat.json | 75 ------------------ examples/water/train/out.json | 92 ----------------------- 2 files changed, 167 deletions(-) delete mode 100644 examples/water/train/input_v2_compat.json delete mode 100644 examples/water/train/out.json diff --git a/examples/water/train/input_v2_compat.json b/examples/water/train/input_v2_compat.json deleted file mode 100644 index 382ac9ce3c..0000000000 --- a/examples/water/train/input_v2_compat.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_comment": "that's all", - "model": { - "type_map": [ - "O", - "H" - ], - "descriptor": { - "type": "se_a", - "sel": [ - 46, - 92 - ], - "rcut_smth": 5.8, - "rcut": 6.0, - "neuron": [ - 25, - 50, - 100 - ], - "resnet_dt": false, - "axis_neuron": 16, - "seed": 1, - "_comment": " that's all" - }, - "fitting_net": { - "neuron": [ - 240, - 240, - 240 - ], - "resnet_dt": true, - "seed": 1, - "_comment": " that's all" - }, - "_comment": " that's all" - }, - "learning_rate": { - "type": "exp", - "decay_steps": 5000, - "start_lr": 0.001, - "stop_lr": 3.51e-08, - "_comment": "that's all" - }, - "loss": { - "start_pref_e": 0.02, - "limit_pref_e": 1, - "start_pref_f": 1000, - "limit_pref_f": 1, - "start_pref_v": 0, - "limit_pref_v": 0, - "_comment": " that's all" - }, - "training": { - "stop_batch": 1000000, - "seed": 1, - "_comment": "that's all", - "disp_file": "lcurve.out", - "disp_freq": 100, - "numb_test": 10, - "save_freq": 1000, - "save_ckpt": "model.ckpt", - "disp_training": true, - "time_training": true, - "profiling": false, - "profiling_file": "timeline.json", - "training_data": { - "systems": [ - "../data/" - ], - "set_prefix": "set", - "batch_size": 1 - } - } -} \ No newline at end of file diff --git a/examples/water/train/out.json b/examples/water/train/out.json deleted file mode 100644 index ee867d9db5..0000000000 --- a/examples/water/train/out.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "model": { - "type_map": [ - "O", - "H" - ], - "descriptor": { - "type": "se_e2_a", - "sel": [ - 46, - 92 - ], - "rcut_smth": 5.8, - "rcut": 6.0, - "neuron": [ - 25, - 50, - 100 - ], - "resnet_dt": false, - "axis_neuron": 16, - "seed": 1, - "activation_function": "tanh", - "type_one_side": false, - "precision": "float64", - "trainable": true, - "exclude_types": [], - "set_davg_zero": false - }, - "fitting_net": { - "neuron": [ - 240, - 240, - 240 - ], - "resnet_dt": true, - "seed": 1, - "type": "ener", - "numb_fparam": 0, - "numb_aparam": 0, - "activation_function": "tanh", - "precision": "float64", - "trainable": true, - "rcond": 0.001, - "atom_ener": [] - }, - "data_stat_nbatch": 10, - "data_stat_protect": 0.01 - }, - "learning_rate": { - "type": "exp", - "decay_steps": 5000, - "start_lr": 0.001, - "stop_lr": 3.51e-08 - }, - "loss": { - "start_pref_e": 0.02, - "limit_pref_e": 1, - "start_pref_f": 1000, - "limit_pref_f": 1, - "start_pref_v": 0, - "limit_pref_v": 0, - "type": "ener", - "start_pref_ae": 0.0, - "limit_pref_ae": 0.0 - }, - "training": { - "seed": 1, - "disp_file": "lcurve.out", - "disp_freq": 100, - "numb_test": 10, - "save_freq": 1000, - "save_ckpt": "model.ckpt", - "disp_training": true, - "time_training": true, - "profiling": false, - "profiling_file": "timeline.json", - "training_data": { - "systems": [ - "../data/" - ], - "set_prefix": "set", - "batch_size": 1, - "auto_prob": "prob_sys_size", - "sys_probs": null - }, - "numb_steps": 1000000, - "validation_data": null, - "tensorboard": false, - "tensorboard_log_dir": "log" - } -} \ No newline at end of file