optimize codecov

BalaBalaYi · BalaBalaYi · commit 8b5acb24546f · 2025-05-13T15:25:11.000+08:00
diff --git a/codecov.yml b/codecov.yml
@@ -4,7 +4,6 @@ coverage:
   range: 60..90
   round: down
   ignore:
-    - "dlrover/python/tests"  # test codes
     - "dlrover/python/rl/tests"  # test codes
     - "dlrover/trainer/tests"  # test codes
     - "dlrover/trainer/torch/flash_checkpoint/megatron_dist_ckpt.py"
diff --git a/dlrover/python/rl/common/constant.py b/dlrover/python/rl/common/constant.py
@@ -14,7 +14,7 @@
 
 class RLMasterConstant(object):
     JOB_CONTEXT_STATE_KEY = "job-context"
-    SCHEDULING_TIMEOUT_MIN_SECS = 30
+    SCHEDULING_TIMEOUT_MIN_SECS = 60
     SCHEDULING_TIMEOUT_PER_ACTOR_SECS = 2
     SETUP_TIMEOUT_MIN_SECS = 10
     SETUP_TIMEOUT_PER_ACTOR_SECS = 1
diff --git a/dlrover/python/rl/master/scheduler.py b/dlrover/python/rl/master/scheduler.py
@@ -176,6 +176,7 @@ def __create_actor_by_vertex(
                 num_cpus=vertex.resource.cpu,
                 memory=vertex.resource.memory,
                 num_gpus=vertex.resource.gpu,
+                max_restarts=-1,
                 runtime_env=self.__get_runtime_env(vertex),
             )
 
diff --git a/dlrover/python/rl/tests/api/test_api.py b/dlrover/python/rl/tests/api/test_api.py
@@ -88,20 +88,49 @@ def test_basic(self):
         self.assertEqual(len(rl_config["workload_group"]), 1)
 
     def test_validation(self):
+        # without node num
         with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().build()
+
+        # without device per node
+        with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().node_num(1).build()
+
+        # invalid device type
+        with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().node_num(1).device_per_node(1).device_type(
                 "mem"
             ).build()
+
+        # without config
+        with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().node_num(1).device_per_node(1).build()
+
+        # without trainer
+        with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().node_num(1).device_per_node(1).config(
                 {"k1": "v1"}
             ).build()
+
+        # without actor
+        with self.assertRaises(InvalidRLConfiguration):
             RLJobBuilder().node_num(1).device_per_node(1).config(
                 {"k1": "v1"}
             ).trainer("m0", "c0").build()
 
+        # invalid collocation
+        with self.assertRaises(InvalidRLConfiguration):
+            RLJobBuilder().node_num(1).device_per_node(1).config(
+                {"k1": "v1"}
+            ).trainer("m0", "c0").actor("m1", "c1").rollout("m2", "c2").reward(
+                "m3", "c3"
+            ).with_collocation(
+                "actor", "rollout"
+            ).with_collocation(
+                "rollout", "reward"
+            ).build()
+
+        # a minimum valid
         RLJobBuilder().node_num(1).device_per_node(1).config(
             {"k1": "v1"}
         ).trainer("m0", "c0").actor("m1", "c1").build()
diff --git a/dlrover/python/rl/tests/master/test_graph.py b/dlrover/python/rl/tests/master/test_graph.py
@@ -21,7 +21,12 @@
 from dlrover.python.rl.common.constant import RLMasterConstant
 from dlrover.python.rl.common.enums import RLRoleType
 from dlrover.python.rl.common.rl_context import RLContext
-from dlrover.python.rl.master.graph import RLExecutionGraph
+from dlrover.python.rl.master.graph import (
+    FunctionInfo,
+    RLExecutionEdge,
+    RLExecutionGraph,
+    VertexInvocationMeta,
+)
 from dlrover.python.rl.master.scheduler import GroupOrderedScheduler
 from dlrover.python.rl.tests.test_class import TestActor, TestRollout
 from dlrover.python.rl.tests.test_data import TestData
@@ -53,6 +58,8 @@ def test_basic(self):
         self.assertEqual(len(graph.get_all_vertices()), 1 + 1 + 1 + 1)
         self.assertEqual(len(graph.name_vertex_mapping), 1 + 1 + 1 + 1)
         self.assertEqual(len(graph.name_actor_mapping), 0)
+        # not used for now
+        self.assertEqual(len(graph.execution_edges), 0)
 
         actor_vertices = graph.get_vertices_by_role_type(RLRoleType.ACTOR)
         self.assertEqual(len(actor_vertices), 1)
@@ -69,6 +76,8 @@ def test_basic(self):
         self.assertEqual(rollout_vertex_0.rank, 0)
         self.assertEqual(rollout_vertex_0.world_size, 1)
 
+        self.assertIsNotNone(graph.get_unit_resource_by_role(RLRoleType.ACTOR))
+
         now = int(time.time())
         rollout_vertex_0.update_runtime_info(
             create_time=now, hostname="test.com", restart_count=2
@@ -199,3 +208,28 @@ def test_serialization(self):
                 vertex.pg_bundle_index,
                 graph.name_vertex_mapping[name].pg_bundle_index,
             )
+
+    def test_vertex_invocation_meta(self):
+        def test_input():
+            pass
+
+        function_info = FunctionInfo("test", test_input)
+        self.assertIsNotNone(function_info)
+        self.assertEqual(function_info.name, "test")
+
+        vertex_invocation_meta = VertexInvocationMeta(
+            {function_info.name: function_info}
+        )
+        self.assertIsNotNone(vertex_invocation_meta)
+        self.assertEqual(
+            vertex_invocation_meta.get_func("test"), function_info
+        )
+
+    def test_edge_basic(self):
+        edge = RLExecutionEdge(0, RLRoleType.ACTOR, RLRoleType.ROLLOUT, "test")
+        self.assertIsNotNone(edge)
+        self.assertEqual(edge.index, 0)
+        self.assertEqual(edge.from_role, RLRoleType.ACTOR)
+        self.assertEqual(edge.to_role, RLRoleType.ROLLOUT)
+        self.assertEqual(edge.invocation_name, "test")
+        self.assertIsNone(edge.async_group)
diff --git a/dlrover/python/rl/tests/trainer/test_trainer.py b/dlrover/python/rl/tests/trainer/test_trainer.py
@@ -11,11 +11,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+from unittest import mock
 
 from dlrover.python.rl.common.enums import RLRoleType
 from dlrover.python.rl.tests.test_class import (
     TestActor,
     TestInteractiveTrainer,
+    TestRollout,
 )
 from dlrover.python.rl.trainer.trainer import RoleGroupProxy
 
@@ -57,7 +59,32 @@ def test_role_group_proxy(self):
         self.assertTrue(role_group._can_shard_invocation())
         with self.assertRaises(AttributeError):
             role_group.test0()
+        with self.assertRaises(AttributeError):
             role_group.test1()
+        with self.assertRaises(AttributeError):
             role_group.test2()
+        with self.assertRaises(AttributeError):
             role_group.test3()
+        with self.assertRaises(AttributeError):
             role_group.test4()
+
+        trainer = TestInteractiveTrainer(
+            {RLRoleType.ACTOR: [], RLRoleType.ROLLOUT: []},
+            {
+                RLRoleType.ACTOR: (TestActor, 1),
+                RLRoleType.ROLLOUT: (TestRollout, 1),
+            },
+            {},
+        )
+        self.assertIsNotNone(trainer)
+
+        trainer.RG_ACTOR._actor_handles = mock.MagicMock(
+            return_value=[mock.Mock()]
+        )
+        trainer.RG_ACTOR.test0()
+        trainer.RG_ACTOR.test1()
+        trainer.RG_ACTOR.test2()
+        with self.assertRaises(Exception):
+            trainer.RG_ACTOR.test3()
+        with self.assertRaises(Exception):
+            trainer.RG_ACTOR.test4()
diff --git a/dlrover/python/util/common_util.py b/dlrover/python/util/common_util.py
@@ -193,7 +193,7 @@ def get_methods_by_class(clz: type, with_protect=False):
 
     result = []
     for name, method in inspect.getmembers(clz):
-        if not inspect.isfunction(method):
+        if not inspect.ismethod(method) and not inspect.isfunction(method):
             continue
         if not with_protect and name.startswith("_"):
             continue

Original file line number	Diff line number	Diff line change
`@@ -176,6 +176,7 @@ def __create_actor_by_vertex(`
`176`	`176`	`num_cpus=vertex.resource.cpu,`
`177`	`177`	`memory=vertex.resource.memory,`
`178`	`178`	`num_gpus=vertex.resource.gpu,`
	`179`	`+ max_restarts=-1,`
`179`	`180`	`runtime_env=self.__get_runtime_env(vertex),`
`180`	`181`	`)`
`181`	`182`