Make fast mode as an option in cfg

Jianhao-zheng · Jianhao-zheng · commit f45ba24d226c · 2025-03-06T13:38:49.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -9,7 +9,7 @@ __pycache__/
 
 pretrained/
 
-output/
+output*/
 
 .vscode/
 
diff --git a/configs/Dynamic/Wild_SLAM_Mocap/ball.yaml b/configs/Dynamic/Wild_SLAM_Mocap/ball.yaml
@@ -2,4 +2,4 @@ inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
 scene: our_basketball
 
 data:
-  input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/basketball
+  input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/ball
diff --git a/configs/Dynamic/Wild_SLAM_Mocap/person_tracking.yaml b/configs/Dynamic/Wild_SLAM_Mocap/person_tracking.yaml
@@ -1,8 +1,8 @@
 inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
-scene: person_tracking2
+scene: person_tracking
 
 data:
-  input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/person_tracking2
+  input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/person_tracking
   
 cam: 
   fx: 647.5684814453125
diff --git a/configs/wildgs_slam.yaml b/configs/wildgs_slam.yaml
@@ -3,13 +3,14 @@ gui: False
 stride: 1            # use every X image from the dataset 
 max_frames: -1       # use the first X images from the dataset, -1 means using all
 setup_seed: 43
+fast_mode: False
 device: "cuda:0"
 
 mapping:
   online_plotting: False # render and save images online
   full_resolution: False # if using the full resolution for mapping, but we always keep downsampled size for tracking
-  final_refine_iters: 3000 # iterations of final refinement
-  eval_before_final_ba: True
+  final_refine_iters: 20000 # iterations of final refinement, it will be forced to be 3000 if fast_mode is on
+  eval_before_final_ba: False
   deform_gaussians: True # apply transformation on Gaussians to account for loop closure and BA
   pcd_downsample: 32 # downsamples the unprojected depth map --> point cloud
   pcd_downsample_init: 16 # first frame downsampling factor is smaller
@@ -140,7 +141,7 @@ cam:
 mono_prior:
   # Metric depth model, only support: 
   #     metric3d_vit_small, metric3d_vit_large and metric3d_vit_giant2
-  #     dpt2_{vits,vitb,vitl}_{hypersim,vkitti}_{20,80}  (see src/mono_estimator.py for detail)
+  #     dpt2_{vits,vitb,vitl}_{hypersim,vkitti}_{20,80}  (see src/utils/mono_priors/metric_depth_estimators.py for detail)
   #             e.g. dpt2_vitl_hypersim_20, dpt2_vitl_vkitti_80
   depth: 'metric3d_vit_large'
 
diff --git a/run.py b/run.py
@@ -26,6 +26,9 @@ def setup_seed(seed):
 
     cfg = config.load_config(args.config)
     setup_seed(cfg['setup_seed'])
+    if cfg['fast_mode']:
+        # Force the final refine iterations to be 3000 if in fast mode
+        cfg['mapping']['final_refine_iters'] = 3000
 
     output_dir = cfg['data']['output']
     output_dir = output_dir+f"/{cfg['scene']}"
diff --git a/src/frontend.py b/src/frontend.py
@@ -61,8 +61,9 @@ def __update(self, force_to_add_keyframe):
         for itr in range(self.iters1):
             self.graph.update(None, None, use_inactive=True)
 
-            # if itr == 1 and self.video.metric_depth_reg and self.cfg['tracking']["uncertainty_params"]['activate']:
-            #     self.video.filter_high_err_mono_depth(self.t1-1,self.graph.ii,self.graph.jj)
+            if not self.cfg['fast_mode']:
+                if itr == 1 and self.video.metric_depth_reg and self.cfg['tracking']["uncertainty_params"]['activate']:
+                    self.video.filter_high_err_mono_depth(self.t1-1,self.graph.ii,self.graph.jj)
 
         d = self.video.distance([self.t1-2], [self.t1-1], beta=self.beta, bidirectional=True)
         # Ssee self.max_consecutive_drop_of_keyframes in initi for explanation of the following process
diff --git a/src/mapper.py b/src/mapper.py
@@ -241,12 +241,20 @@ def run(self):
             self.keyframe_optimizers = torch.optim.Adam(opt_params)
 
             with Lock():
-                if video_idx % 4 == 0:
+                if self.config['fast_mode']:
+                    # We are in fast mode,
+                    # update map and uncertainty MLP every 4 key frames
+                    if video_idx % 4 == 0:
+                        gaussian_split = self.map_opt_online(
+                            self.current_window, iters=self.mapping_itr_num
+                        )
+                    else:
+                        self._update_occ_aware_visibility(self.current_window)
+                else:
                     gaussian_split = self.map_opt_online(
                         self.current_window, iters=self.mapping_itr_num
                     )
-                else:
-                    self._update_occ_aware_visibility(self.current_window)
+
                 if gaussian_split:
                     # do one more iteration after densify and prune
                     self.map_opt_online(self.current_window, iters=1)
diff --git a/src/slam.py b/src/slam.py
@@ -209,6 +209,7 @@ def terminate(self):
             self.stream,
             self.logger,
             self.printer,
+            self.cfg['fast_mode'],
         )
 
         self.mapper.gaussians.save_ply(f"{self.save_dir}/final_gs.ply")
diff --git a/src/utils/eval_traj.py b/src/utils/eval_traj.py
@@ -140,18 +140,20 @@ def kf_traj_eval(npz_path, plot_parent_dir,plot_name, stream, logger,printer):
     return ape_statistics, s, r_a, t_a
 
 
-def full_traj_eval(traj_filler, mapper, plot_parent_dir, plot_name, stream,logger,printer):
+def full_traj_eval(traj_filler, mapper, plot_parent_dir, plot_name, stream, logger, printer, fast_mode=False):
     traj_est_inv, dino_feats = traj_filler(stream)
     traj_est_lietorch = traj_est_inv.inv()
     traj_est = traj_est_lietorch.matrix().data.cpu().numpy()
 
-    # ## refine non-keyframe-traj from the mapping
-    # for i in tqdm(range(traj_est.shape[0])):
-    #     img_feat = dino_feats[i]
-    #     w2c_refined = mapper.refine_pose_non_key_frame(i,
-    #                                                 torch.tensor(np.linalg.inv(traj_est[i])),
-    #                                                 features=img_feat)
-    #     traj_est[i] = np.linalg.inv(w2c_refined.cpu().numpy())
+    if not fast_mode:
+        # refine non-keyframe-traj from the mapping
+        # this is time-consuming with minimal tracking improvement
+        for i in tqdm(range(traj_est.shape[0])):
+            img_feat = dino_feats[i]
+            w2c_refined = mapper.refine_pose_non_key_frame(i,
+                                                        torch.tensor(np.linalg.inv(traj_est[i])),
+                                                        features=img_feat)
+            traj_est[i] = np.linalg.inv(w2c_refined.cpu().numpy())
 
     kf_num = traj_filler.video.counter.value
     kf_timestamps = traj_filler.video.timestamp[:kf_num].cpu().int().numpy()

Original file line number	Diff line number	Diff line change
`@@ -209,6 +209,7 @@ def terminate(self):`
`209`	`209`	`self.stream,`
`210`	`210`	`self.logger,`
`211`	`211`	`self.printer,`
	`212`	`+ self.cfg['fast_mode'],`
`212`	`213`	`)`
`213`	`214`
`214`	`215`	`self.mapper.gaussians.save_ply(f"{self.save_dir}/final_gs.ply")`