WIP - reproduce train script, run into known issue

google · yuktakul04 · Jun 11, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 23, 2025
commit 43490b45e16583e81a9bf382467a8c9f8e8fc620
diff --git a/.gitignore b/.gitignore
@@ -39,7 +39,10 @@ smart_control/simulator/videos
 smart_control/reinforcement_learning/data/starter_buffers/*
 !smart_control/reinforcement_learning/data/starter_buffers/.gitkeep
 
-smart_control/reinforcement_learning/experiment_results/
+smart_control/reinforcement_learning/experiment_results/*
+
+smart_control/reinforcement_learning/data/experiment_results/*
+!smart_control/reinforcement_learning/data/experiment_results/.gitkeep
 
 # jupyter notebook checkpoints:
 smart_control/notebooks/.ipynb_checkpoints/

diff --git a/docs/guides/reinforcement_learning/scripts.md b/docs/guides/reinforcement_learning/scripts.md
@@ -53,6 +53,14 @@ python -m smart_control.reinforcement_learning.scripts.populate_starter_buffer \
 
 ## Training
 
+Train a reinforcement learning agent.
+
+Using default configuration:
+
+```sh
+python -m smart_control.reinforcement_learning.scripts.train --experiment_name my-experiment-1
+```
+
 ```sh
 python -m smart_control.reinforcement_learning.scripts.train \
     --starter-buffer-path path/to/the/starter/buffer

diff --git a/smart_control/reinforcement_learning/scripts/populate_starter_buffer.py b/smart_control/reinforcement_learning/scripts/populate_starter_buffer.py
@@ -4,7 +4,7 @@
 bootstrap the training process.
 """
 
-# from datetime import datetime
+from datetime import datetime
 import logging
 import os
 from typing import Sequence
@@ -23,7 +23,7 @@
 from smart_control.reinforcement_learning.observers.print_status_observer import PrintStatusObserver
 from smart_control.reinforcement_learning.policies.schedule_policy import create_baseline_schedule_policy
 from smart_control.reinforcement_learning.replay_buffer.replay_buffer import ReplayBufferManager
-from smart_control.reinforcement_learning.utils.constants import DEFAULT_CONFIG_FILEPATH
+from smart_control.reinforcement_learning.utils.constants import ONE_DAY_CONFIG_FILEPATH
 from smart_control.reinforcement_learning.utils.constants import RL_STARTER_BUFFERS_DIR
 from smart_control.reinforcement_learning.utils.environment import create_and_setup_environment
 from smart_control.utils.constants import ROOT_DIR
@@ -75,7 +75,7 @@
 )
 CONFIG_FILEPATH = flags.DEFINE_string(
     name='config_filepath',
-    default=DEFAULT_CONFIG_FILEPATH,
+    default=ONE_DAY_CONFIG_FILEPATH,
     help='Environment config file',
 )
 CAPACITY = flags.DEFINE_integer(
@@ -260,10 +260,9 @@ def main(argv: Sequence[str]):
     config_filepath = os.path.join(ROOT_DIR, config_filepath)
 
   buffer_name = FLAGS.buffer_name
-  # if buffer_filename is None:
-  #  buffer_filename = 'buffer_' + datetime.now().strftime('%Y%m%d_%H%M%S')
-  if not os.path.isabs(buffer_name):
-    buffer_dirpath = os.path.join(RL_STARTER_BUFFERS_DIR, buffer_name)
+  if buffer_name is None:
+    buffer_name = 'buffer_' + datetime.now().strftime('%Y%m%d_%H%M%S')
+  buffer_dirpath = os.path.join(RL_STARTER_BUFFERS_DIR, buffer_name)
 
   populate_replay_buffer(
       buffer_dirpath=buffer_dirpath,  # pylint:disable=possibly-used-before-assignment

diff --git a/smart_control/reinforcement_learning/scripts/populate_starter_buffer_test.py b/smart_control/reinforcement_learning/scripts/populate_starter_buffer_test.py
@@ -12,7 +12,7 @@
 from tf_agents.trajectories.trajectory import Trajectory
 
 from smart_control.reinforcement_learning.scripts.populate_starter_buffer import populate_replay_buffer
-from smart_control.reinforcement_learning.utils.constants import DEFAULT_CONFIG_FILEPATH
+from smart_control.reinforcement_learning.utils.constants import ONE_DAY_CONFIG_FILEPATH
 
 
 class StarterBufferPopulationTest(absltest.TestCase):
@@ -34,7 +34,7 @@ def test_starter_buffer_population(self):
     steps_per_run = 5  # default:100
     replay_buffer = populate_replay_buffer(
         buffer_dirpath=self.buffer_dirpath,
-        config_filepath=DEFAULT_CONFIG_FILEPATH,
+        config_filepath=ONE_DAY_CONFIG_FILEPATH,
         buffer_capacity=capacity,
         steps_per_run=steps_per_run,
         num_runs=1,  # default:5