Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
f5e64e8
feat: reinforcement learning PR#2; several additions/improvements to …
gabriel-trigo Jun 11, 2025
625a807
Update pyproject.toml
s2t2 Jun 12, 2025
931a1cf
fix: fix linting errors of previous commit
gabriel-trigo Jun 12, 2025
e637727
Update PR Template
s2t2 Jun 23, 2025
e3494ce
Update PR Template
s2t2 Jun 23, 2025
9bcd483
Restore original formatting
s2t2 Jun 23, 2025
ee3aa54
Restore original formatting
s2t2 Jun 23, 2025
aed2490
Clean top of files
s2t2 Jun 24, 2025
365f5bb
Refactor filepaths
s2t2 Jun 24, 2025
2d7eb23
Refactor filepaths
s2t2 Jun 24, 2025
737ada8
Refactor and test temp conversion functions; closes #25
s2t2 Jun 24, 2025
fec51ad
Refactor temp conversion tests
s2t2 Jun 24, 2025
59115bc
Review eval script
s2t2 Jun 24, 2025
8ab99ea
Remove redundant variable setting
s2t2 Jun 24, 2025
1b5a354
Fix failing test
s2t2 Jun 24, 2025
667157c
Repro generate configs script; use absl flags because argparse not wo…
s2t2 Jun 26, 2025
649600d
Update gitignore
s2t2 Jun 26, 2025
5da4ccd
Test config file generation
s2t2 Jun 26, 2025
4b3f27e
Test read config file
s2t2 Jun 26, 2025
b9ae207
Fix file names - remove quote
s2t2 Jul 10, 2025
5d12c7c
Describe the config generation script
s2t2 Jul 10, 2025
b47ef32
Flags WIP
s2t2 Jul 11, 2025
94762ef
Attempt to reproduce starter buffer script; fix #115
s2t2 Jul 28, 2025
5173c3e
Test starter buffer population
s2t2 Jul 29, 2025
4edffd8
Refactor test: use setup, teardown, and temp dir
s2t2 Jul 29, 2025
43490b4
WIP - reproduce train script, run into known issue
s2t2 Aug 11, 2025
9426717
Hotfix known issue
s2t2 Aug 11, 2025
9238d38
Generate example starter buffers for training and testing
s2t2 Aug 12, 2025
f4fb406
WIP - refactor and test RL agent trainer
s2t2 Aug 12, 2025
58dceef
Regenerate starter buffer for testing
s2t2 Aug 13, 2025
c1d92a0
Decrease number of training steps when testing
s2t2 Aug 13, 2025
51992ff
WIP - reproducing eval script - encounter env config errors
s2t2 Aug 15, 2025
9027045
Reproduce eval script
s2t2 Aug 22, 2025
752dbf7
WIP - refactor eval script; need to save schedule policy results char…
s2t2 Aug 22, 2025
15b9a81
feat(rl): fix replay buffer integration, add seeding & tests, and har…
yuktakul04 Oct 14, 2025
dc09ccf
fix(replay): make dm-reverb optional; fallback to TFUniform on macOS/…
yuktakul04 Oct 14, 2025
097acf9
fix(replay): TFUniform fallback with batched observer; dm-reverb opti…
yuktakul04 Oct 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP - reproduce train script, run into known issue
  • Loading branch information
s2t2 authored and yuktakul04 committed Oct 17, 2025
commit 43490b45e16583e81a9bf382467a8c9f8e8fc620
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ smart_control/simulator/videos
smart_control/reinforcement_learning/data/starter_buffers/*
!smart_control/reinforcement_learning/data/starter_buffers/.gitkeep

smart_control/reinforcement_learning/experiment_results/
smart_control/reinforcement_learning/experiment_results/*

smart_control/reinforcement_learning/data/experiment_results/*
!smart_control/reinforcement_learning/data/experiment_results/.gitkeep

# jupyter notebook checkpoints:
smart_control/notebooks/.ipynb_checkpoints/
Expand Down
8 changes: 8 additions & 0 deletions docs/guides/reinforcement_learning/scripts.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ python -m smart_control.reinforcement_learning.scripts.populate_starter_buffer \

## Training

Train a reinforcement learning agent.

Using default configuration:

```sh
python -m smart_control.reinforcement_learning.scripts.train --experiment_name my-experiment-1
```

```sh
python -m smart_control.reinforcement_learning.scripts.train \
--starter-buffer-path path/to/the/starter/buffer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
bootstrap the training process.
"""

# from datetime import datetime
from datetime import datetime
import logging
import os
from typing import Sequence
Expand All @@ -23,7 +23,7 @@
from smart_control.reinforcement_learning.observers.print_status_observer import PrintStatusObserver
from smart_control.reinforcement_learning.policies.schedule_policy import create_baseline_schedule_policy
from smart_control.reinforcement_learning.replay_buffer.replay_buffer import ReplayBufferManager
from smart_control.reinforcement_learning.utils.constants import DEFAULT_CONFIG_FILEPATH
from smart_control.reinforcement_learning.utils.constants import ONE_DAY_CONFIG_FILEPATH
from smart_control.reinforcement_learning.utils.constants import RL_STARTER_BUFFERS_DIR
from smart_control.reinforcement_learning.utils.environment import create_and_setup_environment
from smart_control.utils.constants import ROOT_DIR
Expand Down Expand Up @@ -75,7 +75,7 @@
)
CONFIG_FILEPATH = flags.DEFINE_string(
name='config_filepath',
default=DEFAULT_CONFIG_FILEPATH,
default=ONE_DAY_CONFIG_FILEPATH,
help='Environment config file',
)
CAPACITY = flags.DEFINE_integer(
Expand Down Expand Up @@ -260,10 +260,9 @@ def main(argv: Sequence[str]):
config_filepath = os.path.join(ROOT_DIR, config_filepath)

buffer_name = FLAGS.buffer_name
# if buffer_filename is None:
# buffer_filename = 'buffer_' + datetime.now().strftime('%Y%m%d_%H%M%S')
if not os.path.isabs(buffer_name):
buffer_dirpath = os.path.join(RL_STARTER_BUFFERS_DIR, buffer_name)
if buffer_name is None:
buffer_name = 'buffer_' + datetime.now().strftime('%Y%m%d_%H%M%S')
buffer_dirpath = os.path.join(RL_STARTER_BUFFERS_DIR, buffer_name)

populate_replay_buffer(
buffer_dirpath=buffer_dirpath, # pylint:disable=possibly-used-before-assignment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tf_agents.trajectories.trajectory import Trajectory

from smart_control.reinforcement_learning.scripts.populate_starter_buffer import populate_replay_buffer
from smart_control.reinforcement_learning.utils.constants import DEFAULT_CONFIG_FILEPATH
from smart_control.reinforcement_learning.utils.constants import ONE_DAY_CONFIG_FILEPATH


class StarterBufferPopulationTest(absltest.TestCase):
Expand All @@ -34,7 +34,7 @@ def test_starter_buffer_population(self):
steps_per_run = 5 # default:100
replay_buffer = populate_replay_buffer(
buffer_dirpath=self.buffer_dirpath,
config_filepath=DEFAULT_CONFIG_FILEPATH,
config_filepath=ONE_DAY_CONFIG_FILEPATH,
buffer_capacity=capacity,
steps_per_run=steps_per_run,
num_runs=1, # default:5
Expand Down
Loading