diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py index c914ee46a8..d72c270667 100644 --- a/deepmd/pd/train/training.py +++ b/deepmd/pd/train/training.py @@ -599,6 +599,49 @@ def warm_up_linear(step, warmup_steps): else: raise ValueError(f"Not supported optimizer type '{self.opt_type}'") + # NOTE: to_static + compiler should be before distributed wrapper + if CINN: + from paddle import ( + jit, + static, + ) + + backend = "CINN" if CINN else None + self.wrapper.forward = jit.to_static( + backend=backend, + input_spec=[ + static.InputSpec([1, -1, 3], "float64", name="coord"), # coord + static.InputSpec([1, -1], "int32", name="atype"), # atype + None, # spin + static.InputSpec([1, 9], "float64", name="box"), # box + static.InputSpec([], "float64", name="cur_lr"), # cur_lr + { + "find_box": np.float32(1.0), + "find_coord": np.float32(1.0), + "find_numb_copy": np.float32(0.0), + "numb_copy": static.InputSpec( + [1, 1], "int64", name="numb_copy" + ), + "find_energy": np.float32(1.0), + "energy": static.InputSpec([1, 1], "float64", name="energy"), + "find_force": np.float32(1.0), + "force": static.InputSpec([1, -1, 3], "float64", name="force"), + "natoms": static.InputSpec([1, -1], "int32", name="natoms"), + }, # label, + # None, # task_key + # False, # inference_only + # False, # do_atomic_virial + # None, # fparam + # None, # aparam + ], + full_graph=True, + )(self.wrapper.forward) + + log.info( + "Enable CINN during training, there may be some additional " + "compilation time in the first traning step." + ) + if dist.is_available() and dist.is_initialized(): # DDP will guarantee the model parameters are identical across all processes self.wrapper = fleet.distributed_model( @@ -631,20 +674,6 @@ def warm_up_linear(step, warmup_steps): self.profiling_file = training_params.get("profiling_file", "timeline.json") def run(self) -> None: - if CINN: - from paddle import ( - jit, - ) - - backend = "CINN" if CINN else None - self.wrapper.forward = jit.to_static(full_graph=True, backend=backend)( - self.wrapper.forward - ) - log.info( - "Enable CINN during training, there may be some additional " - "compilation time in the first traning step." - ) - fout = ( open( self.disp_file, diff --git a/source/api_cc/src/DeepPotPD.cc b/source/api_cc/src/DeepPotPD.cc index c5f9391ca9..3a3d880c4b 100644 --- a/source/api_cc/src/DeepPotPD.cc +++ b/source/api_cc/src/DeepPotPD.cc @@ -120,6 +120,24 @@ void DeepPotPD::init(const std::string& model, std::cout << "load model from: " << model << " to gpu:" << gpu_id << std::endl; } + if (config->cinn_enabled()) { + std::cout << "model.forward will be compiled with cinn." << std::endl; + } else { + std::cout << "NOTE: You can try: \n'export FLAGS_prim_all=true" + " FLAGS_enable_pir_in_executor=1" + " FLAGS_prim_enable_dynamic=true FLAGS_use_cinn=true'\n" + "to speed up C++ inference with paddle backend" + << std::endl; + } + if (config_fl->cinn_enabled()) { + std::cout << "model.forward_lower will be compiled with cinn." << std::endl; + } else { + std::cout << "NOTE: You can try: \n'export FLAGS_prim_all=true" + " FLAGS_enable_pir_in_executor=1" + " FLAGS_prim_enable_dynamic=true FLAGS_use_cinn=true'\n" + "to speed up C++ inference with paddle backend" + << std::endl; + } // NOTE: Both set to 1 now. // get_env_nthreads(num_intra_nthreads,