lmb-freiburg
diff --git a/‎README.md‎
Lines changed: 8 additions & 8 deletions b/‎README.md‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎eval.py‎
Lines changed: 19 additions & 3 deletions b/‎eval.py‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎eval_all.sh‎
Lines changed: 6 additions & 0 deletions b/‎eval_all.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎inference.py‎
Lines changed: 2 additions & 0 deletions b/‎inference.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rmvd/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎rmvd/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rmvd/data/README.md‎
Lines changed: 1 addition & 1 deletion b/‎rmvd/data/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rmvd/data/transforms.py‎
Lines changed: 3 additions & 2 deletions b/‎rmvd/data/transforms.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎rmvd/eval/multi_view_depth_evaluation.py‎
Lines changed: 28 additions & 19 deletions b/‎rmvd/eval/multi_view_depth_evaluation.py‎
Lines changed: 28 additions & 19 deletions
diff --git a/‎rmvd/eval/robust_mvd_benchmark.py‎
Lines changed: 10 additions & 3 deletions b/‎rmvd/eval/robust_mvd_benchmark.py‎
Lines changed: 10 additions & 3 deletions
@@ -42,23 +42,23 @@ The setup and interface of the models is explained in [rmvd/models/README.md](rm
 ### Evaluation script
 Evaluation is done with the script `eval.py`, for example on ETH3D:
 ```bash
-python eval.py --model robust_mvd --dataset eth3d --eval_type mvd --input poses intrinsics --output /tmp/eval_output --input_size 768 1152 
+python eval.py --model robust_mvd --dataset eth3d --eval_type mvd --inputs poses intrinsics --output /tmp/eval_output --input_size 768 1152 
 ```
 On KITTI:
 ```bash
-python eval.py --model robust_mvd --dataset kitti --eval_type mvd --input poses intrinsics --output /tmp/eval_output --input_size 384 1280
+python eval.py --model robust_mvd --dataset kitti --eval_type mvd --inputs poses intrinsics --output /tmp/eval_output --input_size 384 1280
 ```
 On DTU:
 ```bash
-python eval.py --model robust_mvd --dataset dtu --eval_type mvd --input poses intrinsics --output /tmp/eval_output --input_size 896 1216
+python eval.py --model robust_mvd --dataset dtu --eval_type mvd --inputs poses intrinsics --output /tmp/eval_output --input_size 896 1216
 ```
 On ScanNet:
 ```bash
-python eval.py --model robust_mvd --dataset scannet --eval_type mvd --input poses intrinsics --output /tmp/eval_output --input_size 448 640
+python eval.py --model robust_mvd --dataset scannet --eval_type mvd --inputs poses intrinsics --output /tmp/eval_output --input_size 448 640
 ```
 On Tanks and Temples:
 ```bash
-python eval.py --model robust_mvd --dataset tanks_and_temples --eval_type mvd --input poses intrinsics --output /tmp/eval_output --input_size 704 1280
+python eval.py --model robust_mvd --dataset tanks_and_temples --eval_type mvd --inputs poses intrinsics --output /tmp/eval_output --input_size 704 1280
 ```
 
 The parameters `model`, `dataset` and `eval_type` are required. 
@@ -133,7 +133,7 @@ The following describes how to evaluate on the benchmark.
 ### Evaluation of models within the `rmvd` framework
 Evaluation on the benchmark is done with the script `eval.py`:
 ```bash
-python eval.py --model robust_mvd --eval_type robustmvd --input poses intrinsics --output /tmp/eval_benchmark --eth3d_size 768 1152 --kitti_size 384 1280 --dtu_size 896 1216 --scannet_size 448 640 --tanks_and_temples_size 704 1280
+python eval.py --model robust_mvd --eval_type robustmvd --inputs poses intrinsics --output /tmp/eval_benchmark --eth3d_size 768 1152 --kitti_size 384 1280 --dtu_size 896 1216 --scannet_size 448 640 --tanks_and_temples_size 704 1280
 ```
 
 ### Programmatic evaluation
@@ -171,9 +171,9 @@ format and to call the model. For details about these functions, see [rmvd/model
 
 ## Citation
 This is the official repository for the publication:
-> **A Benchmark and a Baseline for Robust Multi-view Depth Estimation**
+> **[A Benchmark and a Baseline for Robust Multi-view Depth Estimation](http://arxiv.org/abs/2209.06681)**
 >
-> [Philipp Schröppel](https://lmb.informatik.uni-freiburg.de/people/schroepp), [Jan Bechtold](https://lmb.informatik.uni-freiburg.de/people/bechtolj), [Artemij Amiranashvili](https://lmb.informatik.uni-freiburg.de/people/amiranas) and [Thomas Brox](https://lmb.informatik.uni-freiburg.de/people/brox)
+> [Philipp Schröppel](https://lmb.informatik.uni-freiburg.de/people/schroepp), [Jan Bechtold](https://lmb.informatik.uni-freiburg.de/people/bechtolj), [Artemij Amiranashvili](https://lmb.informatik.uni-freiburg.de/people/amiranas), [Thomas Brox](https://lmb.informatik.uni-freiburg.de/people/brox)
 > 
 > **3DV 2022**
 
 
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import argparse
 import sys
 import os.path as osp
@@ -36,6 +38,10 @@ def eval(args):
     eval = create_evaluation(evaluation_type=args.eval_type,
                              out_dir=args.output,
                              inputs=args.inputs,
+                             alignment=args.alignment,
+                             view_ordering=args.view_ordering,
+                             min_source_views=args.min_source_views,
+                             max_source_views=args.max_source_views,
                              eval_uncertainty=args.eval_uncertainty)
 
     with open(osp.join(args.output, "cmd.txt"), 'a') as f:
@@ -55,19 +61,29 @@ def eval(args):
     parser.add_argument('--weights', help="Path to weights of the model. Optional. If None, default weights are used.")
     parser.add_argument('--num_gpus', type=int, help="Number of GPUs. 0 means use CPU. Default: use 1 GPU.", default=1)
     parser.add_argument('--eval_type', help=f"Evaluation setting. Options are: {', '.join(list_evaluations())}")
-    parser.add_argument('--input', nargs='*',
+    parser.add_argument('--inputs', nargs='*',
                         help=f"Model inputs. Images are always provided to the model. "
                              f"It is possible to specify multiple additional inputs, "
-                             f"e.g. --input intrinsics --input poses. "
+                             f"e.g. --inputs intrinsics poses. "
                              f"Options for additional model inputs are: intrinsics, poses, depth_range.",
-                        type=str, dest='inputs')
+                        type=str)
     parser.add_argument('--output', help="Path to folder for output data.")
 
     parser.add_argument('--num_samples', type=int, help='Number of samples to be evaluated. Default: evaluate all.')
     parser.add_argument('--samples', type=int, nargs='*',
                         help='Index of sample that should be evaluated. Ignored if num_samples is used. '
                              'Default: evaluate all.')
 
+    parser.add_argument('--max_source_views', type=int, help='Maximum number of source views to use for evaluation. '
+                                                             'Default: use all available source views.')
+    parser.add_argument('--min_source_views', type=int, default=1,
+                        help='Minimum number of source views to use for evaluation. Default: 1.')
+    parser.add_argument('--view_ordering', default="quasi-optimal",
+                        help=f"Source view ordering. Options are: quasi-optimal (default), nearest.")
+    parser.add_argument('--alignment',
+                        help=f"Alignment between predicted and ground truth depths. "
+                             f"Options are None, median, translation. Default: None")
+
     parser.add_argument('--num_qualitatives', type=int, default=10,
                         help='Number of qualitatives to be output. Negative values output all qualitatives. '
                              'Ignored if --qualitative is used. Default: 10.')
 
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+python eval.py --model robust_mvd --eval_type robustmvd --inputs poses intrinsics --output /tmp/eval_benchmark --eth3d_size 768 1152 --kitti_size 384 1280 --dtu_size 896 1216 --scannet_size 448 640 --tanks_and_temples_size 704 1280
+python eval.py --model robust_mvd_5M --eval_type robustmvd --inputs poses intrinsics --output /tmp/eval_benchmark --eth3d_size 768 1152 --kitti_size 384 1280 --dtu_size 896 1216 --scannet_size 448 640 --tanks_and_temples_size 704 1280
+python eval.py --model monodepth2_mono_stereo_1024x320_wrapped --eval_type robustmvd --output /tmp/eval_benchmark --max_source_views 0 --alignment median
+python eval.py --model monodepth2_mono_stereo_640x192_wrapped --eval_type robustmvd --output /tmp/eval_benchmark --max_source_views 0 --alignment median
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import argparse
 import os
 import os.path as osp
 
@@ -1,4 +1,5 @@
 torch>=1.9.0
+torchvision
 numpy
 pillow
 matplotlib
 
@@ -3,6 +3,6 @@
 from .data import list_datasets, list_base_datasets, list_dataset_types, list_splits, has_dataset, create_dataset, \
     create_compound_dataset
 
-from .models import list_models, has_model, create_model
+from .models import list_models, has_model, create_model, prepare_custom_model
 from .eval import list_evaluations, create_evaluation
 from .train import list_trainings, create_training
@@ -25,7 +25,7 @@ target directory to download the dataset:
 ```bash
 ./scripts/download_eth3d.sh /path/to/eth3d
 ```
-Then specify the download directory (`/path/to/eth3d`) in the `paths.toml` file.
+Then specify the download directory `/path/to/eth3d` in the `paths.toml` file.
 
 ### KITTI
 Download the KITTI raw data from <https://www.cvlibs.net/datasets/kitti/raw_data.php> using
 
@@ -32,8 +32,9 @@ def __call__(self, sample):
         sample["images"] = images
 
         # resize intrinsics:
-        scale_arr = np.array([[wd / orig_wd]*3, [ht / orig_ht]*3, [1.]*3], dtype=np.float32)  # 3, 3
-        sample["intrinsics"] = [intrinsic * scale_arr for intrinsic in sample["intrinsics"]]
+        if "intrinsics" in sample:
+            scale_arr = np.array([[wd / orig_wd]*3, [ht / orig_ht]*3, [1.]*3], dtype=np.float32)  # 3, 3
+            sample["intrinsics"] = [intrinsic * scale_arr for intrinsic in sample["intrinsics"]]
 
         sample["orig_width"] = orig_wd
         sample["orig_height"] = orig_ht
 
@@ -14,9 +14,6 @@
 from .metrics import m_rel_ae, pointwise_rel_ae, thresh_inliers, sparsification
 
 
-# TODO: add tensorboard logging
-
-
 class MultiViewDepthEvaluation:
     """Multi-view depth evaluation.
 
@@ -30,6 +27,9 @@ class MultiViewDepthEvaluation:
     A typical depth-from-video model would set
         inputs=["images", "intrinsics"], alignment="median".
 
+    A typical depth-from-single-view model would set
+        inputs=["images"], max_source_views=0, alignment="median".
+
     Args:
         out_dir: Directory where results will be written. If None, results are not written to disk.
         inputs: List of input modalities that are supplied to the algorithm.
@@ -39,9 +39,12 @@ class MultiViewDepthEvaluation:
             None evaluates predictions without any alignment.
             "median" scales predicted depth maps with the ratio of medians of predicted and ground truth depth maps.
             "translation" scales predicted depth maps with the ratio of the predicted and ground truth translation.
+        max_source_views: Maximum number of source views to be considered. None means all available source views are
+            considered. Default: None.
+        min_source_views. Minimum number of source views provided to the model.
+            If max_source_views is not None, is set to min(min_source_views, max_source_views). Default: 1.
         view_ordering: Ordering of source views during the evaluation.
-            Options are "quasi-optimal", "nearest" and None. Default: "quasi-optimal".
-            None: supply all source views to the model and evaluate predicted depth map.
+            Options are "quasi-optimal" and "nearest". Default: "quasi-optimal".
             "quasi-optimal": evaluate predicted depth maps for all (keyview, sourceview) pairs.
                 Order source views according to the prediction accuracy. Increase source view set based on
                 the obtained ordering and re-evaluate for each additional source view.
@@ -50,8 +53,6 @@ class MultiViewDepthEvaluation:
                 view set based on the ordering of views in the sample, i.e. based on the distance between source
                 view indices and the keyview index. Log results based on the number of source views.
                 Log best results as overall results.
-        max_source_views: Maximum number of source views to be considered in case view_ordering is
-            "quasi-optimal" or "nearest". None means all available source views are considered.
         eval_uncertainty: Evaluate predicted uncertainty (pred_depth_uncertainty) if available.
             Increases evaluation time.
         clip_pred_depth: Clip model predictions before evaluation to a reasonable range. This makes sense to reduce
@@ -64,12 +65,14 @@ def __init__(self,
                  out_dir: Optional[str] = None,
                  inputs: Sequence[str] = None,
                  alignment: Optional[str] = None,
-                 view_ordering: str = "quasi-optimal",
                  max_source_views: Optional[int] = None,
+                 min_source_views: int = 1,
+                 view_ordering: str = "quasi-optimal",
                  eval_uncertainty: bool = True,
                  clip_pred_depth: Union[bool, Tuple[float, float]] = True,
                  sparse_pred: bool = False,
                  verbose: bool = True,
+                 **_
                  ):
 
         self.verbose = verbose
@@ -89,8 +92,9 @@ def __init__(self,
 
         self.inputs = list(set(inputs + ["images"])) if inputs is not None else ["images"]
         self.alignment = alignment
-        self.view_ordering = view_ordering
         self.max_source_views = max_source_views
+        self.min_source_views = min_source_views if max_source_views is None else min(min_source_views, max_source_views)
+        self.view_ordering = view_ordering if (self.max_source_views is None) or (self.max_source_views > 0) else None
         self.eval_uncertainty = eval_uncertainty
         self.clip_pred_depth = clip_pred_depth
         self.sparse_pred = sparse_pred
@@ -120,8 +124,9 @@ def __str__(self):
         ret = f"{self.name} with settings:"
         ret += f"\n\tInputs: {self.inputs}"
         ret += f"\n\tAlignment: {self.alignment}"
-        ret += f"\n\tView ordering: {self.view_ordering}"
+        ret += f"\n\tMin source views: {self.min_source_views}"
         ret += f"\n\tMax source views: {self.max_source_views}"
+        ret += f"\n\tView ordering: {self.view_ordering}"
         ret += f"\n\tEvaluate uncertainty: {self.eval_uncertainty}"
         ret += f"\n\tClip predicted depth: {self.clip_pred_depth}"
         ret += f"\n\tPredicted depth is sparse: {self.sparse_pred}"
@@ -222,7 +227,7 @@ def _evaluate(self):
             ordered_source_indices = self._get_source_view_ordering(sample_inputs=sample_inputs, sample_gt=sample_gt)
             max_source_views = min(len(ordered_source_indices), self.max_source_views) \
                 if self.max_source_views is not None else len(ordered_source_indices)
-            min_source_views = 1 if self.view_ordering is not None else max_source_views
+            min_source_views = self.min_source_views
 
             best_metrics = None
             best_num_source_views = np.nan
@@ -234,7 +239,7 @@ def _evaluate(self):
                 cur_keyview_idx = cur_view_indices.index(keyview_idx)
 
                 if self.verbose:
-                    print(f"\tEvaluating with {num_source_views} / {len(ordered_source_indices)} source views:")
+                    print(f"\tEvaluating with {num_source_views} / {max_source_views} source views:")
                     print(f"\t\tSource view indices: {cur_source_indices}.")
 
                 self._reset_memory_stats()
@@ -371,7 +376,7 @@ def _init_results(self):
     def _get_source_view_ordering(self, sample_inputs, sample_gt):
         if self.view_ordering == 'quasi-optimal':
             return self._get_quasi_optimal_source_view_ordering(sample_inputs=sample_inputs, sample_gt=sample_gt)
-        else:
+        elif (self.view_ordering == 'nearest') or (self.view_ordering is None):
             return self._get_nearest_source_view_ordering(sample_inputs=sample_inputs, sample_gt=sample_gt)
 
     def _get_nearest_source_view_ordering(self, sample_inputs, sample_gt):
@@ -389,13 +394,17 @@ def _get_quasi_optimal_source_view_ordering(self, sample_inputs, sample_gt):
             # construct temporary sample with a single source view:
             cur_sample_inputs = deepcopy(sample_inputs)
             cur_sample_gt = deepcopy(sample_gt)
-            cur_sample_inputs['images'] = [cur_sample_inputs['images'][keyview_idx],
-                                           cur_sample_inputs['images'][source_idx]]
-            cur_sample_inputs['poses'] = [cur_sample_inputs['poses'][keyview_idx],
-                                          cur_sample_inputs['poses'][source_idx]]
-            cur_sample_inputs['intrinsics'] = [cur_sample_inputs['intrinsics'][keyview_idx],
-                                               cur_sample_inputs['intrinsics'][source_idx]]
+            if "images" in self.inputs:
+                cur_sample_inputs['images'] = [cur_sample_inputs['images'][keyview_idx],
+                                               cur_sample_inputs['images'][source_idx]]
+            if "poses" in self.inputs:
+                cur_sample_inputs['poses'] = [cur_sample_inputs['poses'][keyview_idx],
+                                              cur_sample_inputs['poses'][source_idx]]
+            if "intrinsics" in self.inputs:
+                cur_sample_inputs['intrinsics'] = [cur_sample_inputs['intrinsics'][keyview_idx],
+                                                   cur_sample_inputs['intrinsics'][source_idx]]
             cur_sample_inputs['keyview_idx'] = np.array([0])
+            # depth_range is not changed
 
             # run model:
             pred, _, _ = self._run_model(cur_sample_inputs)
 
@@ -32,8 +32,10 @@ class RobustMultiViewDepthBenchmark:
             None evaluates predictions without any alignment.
             "median" scales predicted depth maps with the ratio of medians of predicted and ground truth depth maps.
             "translation" scales predicted depth maps with the ratio of the predicted and ground truth translation.
-        max_source_views: Maximum number of source views to be considered in case view_ordering is
-            "quasi-optimal" or "nearest". None means all available source views are considered.
+        max_source_views: Maximum number of source views to be considered. None means all available source views are
+            considered. Default: None.
+        min_source_views. Minimum number of source views provided to the model.
+            If max_source_views is not None, is set to min(min_source_views, max_source_views). Default: 1.
         eval_uncertainty: Evaluate predicted uncertainty (pred_depth_uncertainty) if available.
             Increases evaluation time.
         sparse_pred: Predicted depth is sparse. Invalid predictions are indicated by 0 values and ignored in
@@ -45,9 +47,11 @@ def __init__(self,
                  inputs: Sequence[str] = None,
                  alignment: Optional[str] = None,
                  max_source_views: Optional[int] = None,
+                 min_source_views: int = 1,
                  eval_uncertainty: bool = True,
                  sparse_pred: bool = False,
                  verbose: bool = True,
+                 **_
                  ):
 
         self.verbose = verbose
@@ -59,9 +63,10 @@ def __init__(self,
         if self.out_dir is not None:
             os.makedirs(self.out_dir, exist_ok=True)
 
-        self.inputs = inputs
+        self.inputs = list(set(inputs + ["images"])) if inputs is not None else ["images"]
         self.alignment = alignment
         self.max_source_views = max_source_views
+        self.min_source_views = min_source_views if max_source_views is None else min(min_source_views, max_source_views)
         self.eval_uncertainty = eval_uncertainty
         self.sparse_pred = sparse_pred
 
@@ -79,6 +84,7 @@ def __str__(self):
         ret += f"\n\tInputs: {self.inputs}"
         ret += f"\n\tAlignment: {self.alignment}"
         ret += f"\n\tMax source views: {self.max_source_views}"
+        ret += f"\n\tMin source views: {self.min_source_views}"
         ret += f"\n\tEvaluate uncertainty: {self.eval_uncertainty}"
         ret += f"\n\tPredicted depth is sparse: {self.sparse_pred}"
         if self.out_dir is not None:
@@ -145,6 +151,7 @@ def __call__(self,
 
             eval = MultiViewDepthEvaluation(out_dir=out_dir, inputs=self.inputs, alignment=self.alignment,
                                             view_ordering="quasi-optimal", max_source_views=self.max_source_views,
+                                            min_source_views=self.min_source_views,
                                             eval_uncertainty=self.eval_uncertainty, clip_pred_depth=True,
                                             sparse_pred=self.sparse_pred, verbose=self.verbose)
             # TODO: pass tqdm progress bar and set verbose to False
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+`
`1`	`3`	`import argparse`
`2`	`4`	`import os`
`3`	`5`	`import os.path as osp`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`torch>=1.9.0`
	`2`	`+torchvision`
`2`	`3`	`numpy`
`3`	`4`	`pillow`
`4`	`5`	`matplotlib`