upload nocs evaluation code

qq456cvb · qq456cvb · commit 177936fa705a · 2022-03-11T10:40:14.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 checkpoints/
-**/__pycache__
+**/__pycache__
+data/nocs*
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ CVPR 2022
 <a href='https://qq456cvb.github.io/projects/cppf'>
   <img src='https://img.shields.io/badge/Project-Page-green?style=flat&logo=googlechrome&logoColor=green' alt='Project Page'>
 </a>
-  <a href='https://colab.research.google.com/'>
+  <a href='#'>
     <img src='https://colab.research.google.com/assets/colab-badge.svg' alt='Google Colab'>
   </a>
 <br>
@@ -86,24 +86,62 @@ python train.py category=bathtub,bed,bookshelf,chair,sofa,table -m
 
 For Laptops, geometry alone cannot determine the pose unambiguously, we rely on an auxiliary segmentation network that segments out the lid and the keyboard base.
 
-To train the segmenter network, first download our Blender physically rendered laptop images from [Google Drive]() and place it under ``data/laptop``. Then run the following command:
+To train the segmenter network, first download our Blender physically rendered laptop images from [Google Drive](https://drive.google.com/file/d/1gRHGt47nP9arDAu3hwnDNgfwJMxJYtCa/view?usp=sharing) and place it under ``data/laptop``. Then run the following command:
 ```
 python train_laptop_aux.py
 ```
 </details>
 
 
 # Pretrained Models
+Pretrained models for various ShapeNet categories can be downloaded from [Google Drive](https://drive.google.com/drive/folders/11wm5WHDjmSBfhng6emxCBBYZexmLoxLk?usp=sharing).
 # Test on NOCS REAL275
 
 <details>
-<summary>With Instance Segmentation Mask</summary>
+<summary>Data Preparation</summary>
+
+First download the detection priors from [Google Drive](https://drive.google.com/file/d/1cvGiXG_2ya8CMHss1IDobdL81qeODOrE/view?usp=sharing), which is used for evaluation with instance segmentation or bounding box masks. Put the directory under ``data/nocs_seg``.
+
+Then download RGB-D images from [NOCS REAL275](http://download.cs.stanford.edu/orion/nocs/real_test.zip) dataset and put it under ``data/nocs``.
 
+Place (pre-)trained models under ``checkpoints``.
 </details>
 
-## With Bounding Box Mask
+<details>
+<summary>Evaluate with Instance Segmentation Mask</summary>
+
+First save inference outputs:
+```
+python nocs/inference.py
+``` 
 
-## Zero-Shot Instance Segmentation and Pose Estimation
+Then evaluate mAP: 
+```
+python nocs/eval.py | tee nocs/map.txt
+```
+</details>
+
+<details>
+<summary> Evaluate with Bounding Box Mask</summary>
+
+First save inference outputs with bounding box mask enabled:
+```
+python nocs/inference.py --bbox_mask
+``` 
+
+Then evaluate mAP: 
+```
+python nocs/eval.py | tee nocs/map_bbox.txt
+```
+</details>
+
+<details>
+<summary> Zero-Shot Instance Segmentation and Pose Estimation</summary>
+Coming soon.
+
+</details>
 # Test on SUN RGB-D
+Coming soon.
 
-# Train on Your Own Object Collections
+# Train on Your Own Object Collections
+Coming soon.
diff --git a/nocs/eval.py b/nocs/eval.py
@@ -27,7 +27,6 @@
             else:
                 assert len(result['gt_handle_visibility']) == len(result['gt_class_ids']), "{} {}".format(result['gt_handle_visibility'], result['gt_class_ids'])
 
-
         if type(result) is list:
             final_results += result
         elif type(result) is dict:
diff --git a/nocs/inference.py b/nocs/inference.py
@@ -31,11 +31,13 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--seg_dir', default='data/nocs_seg', help='Segmentation PKL files for NOCS')
+    parser.add_argument('--nocs_dir', default='data/nocs', help='NOCS real test image path')
     parser.add_argument('--out_dir', default='data/nocs_prediction', help='Output directory for predictions')
     parser.add_argument('--cp_device', type=int, default=0, help='GPU device number for custom voting algorithms')
     parser.add_argument('--ckpt_path', default='checkpoints', help='Model checkpoint path')
     parser.add_argument('--angle_prec', type=float, default=1.5, help='Angle precision in orientation voting')
     parser.add_argument('--num_rots', type=int, default=72, help='Number of candidate center votes generated for a given point pair')
+    parser.add_argument('--bbox_mask', action='store_true', help='Whether to use bbox mask instead of instance segmentations.')
     args = parser.parse_args()
 
     cp_device = args.cp_device
@@ -102,16 +104,19 @@
     bcelogits = torch.nn.BCEWithLogitsLoss()
 
     for res in tqdm(final_results):
-        img = cv2.imread(res['image_path'] + '_color.png')[:, :, ::-1]
-        depth = cv2.imread(res['image_path'] + '_depth.png', -1)
+        img = cv2.imread(os.path.join(args.nocs_dir, res['image_path'][5:] + '_color.png'))[:, :, ::-1]
+        depth = cv2.imread(os.path.join(args.nocs_dir, res['image_path'][5:] + '_depth.png'), -1)
 
         bboxs = res['pred_bboxes']
-        masks = res['pred_masks']
+        masks = res['pred_masks'].copy()
         RTs = np.zeros((len(bboxs), 4, 4), dtype=np.float32)
         scales = np.zeros((len(bboxs), 3), dtype=np.float32)
         cls_ids = res['pred_class_ids']
         
         for i, bbox in enumerate(bboxs):
+            if args.bbox_mask:
+                masks[:, :, i][bbox[0]:bbox[2], bbox[1]:bbox[3]] = True
+
             cls_id = cls_ids[i]
             cls_name = synset_names[cls_id]
             
@@ -297,10 +302,10 @@
             if cfg.regress_right:
                 right = final_directions[1]
                 right -= np.dot(up, right) * up
-                right /= np.linalg.norm(right)
+                right /= (np.linalg.norm(right) + 1e-9)
             else:
                 right = np.array([0, -up[2], up[1]])
-                right /= np.linalg.norm(right)
+                right /= (np.linalg.norm(right) + 1e-9)
             
             if (cls_name == 'laptop') and (laptop_up is not None):
                 if np.dot(up, laptop_up) + np.dot(right, laptop_up) < np.dot(up, -laptop_up) + np.dot(right, -laptop_up):
@@ -311,16 +316,23 @@
                     right = up
                     up = laptop_up
                     right -= np.dot(up, right) * up
-                    right /= np.linalg.norm(right)
-                    
+                    right /= (np.linalg.norm(right) + 1e-9)
+            
+            if np.linalg.norm(right) < 1e-7: # right is zero
+                right = np.random.randn(3)
+                right -= right.dot(up) * up
+                right /= np.linalg.norm(right)
+
             if cfg.z_right:
                 R_est = np.stack([np.cross(up, right), up, right], -1)
             else:
                 R_est = np.stack([right, up, np.cross(right, up)], -1)
 
             pred_scale = np.exp(preds_scale[0].mean(0).cpu().numpy()) * cfg.scale_mean * 2
             scale_norm = np.linalg.norm(pred_scale)
+            assert scale_norm > 0
             RTs[i][:3, :3] = R_est * scale_norm
+            RTs[i][3, 3] = 1.
             scales[i, :] = pred_scale / scale_norm
             
         res['pred_RTs'] = RTs
diff --git a/utils/util.py b/utils/util.py
@@ -799,7 +799,7 @@ def compute_degree_cm_mAP(final_results, synset_names, log_dir, degree_threshold
         pred_bboxes[...] = 1
         if len(pred_RTs) > 0:
             norm_pred_scales = np.stack([np.cbrt(np.linalg.det(pred_RT[:3, :3])) for pred_RT in pred_RTs])
-            pred_RTs[:, :3, :3] = pred_RTs[:, :3, :3] / norm_pred_scales[:, None, None]
+            pred_RTs[:, :3, :3] = pred_RTs[:, :3, :3] / (norm_pred_scales[:, None, None] + 1e-9)
             pred_scales = pred_scales * norm_pred_scales[:, None]
         #print(pred_bboxes.shape[0], pred_class_ids.shape[0], pred_scores.shape[0], pred_RTs.shape[0])