wayveai
diff --git a/‎README.md‎
Lines changed: 18 additions & 10 deletions b/‎README.md‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎droid_slam/depth_video.py‎
Lines changed: 16 additions & 12 deletions b/‎droid_slam/depth_video.py‎
Lines changed: 16 additions & 12 deletions
diff --git a/‎droid_slam/droid.py‎
Lines changed: 1 addition & 1 deletion b/‎droid_slam/droid.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎droid_slam/droid_backend.py‎
Lines changed: 3 additions & 3 deletions b/‎droid_slam/droid_backend.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎droid_slam/droid_frontend.py‎
Lines changed: 8 additions & 2 deletions b/‎droid_slam/droid_frontend.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎droid_slam/factor_graph.py‎
Lines changed: 35 additions & 16 deletions b/‎droid_slam/factor_graph.py‎
Lines changed: 35 additions & 16 deletions
diff --git a/‎droid_slam/geom/projective_ops.py‎
Lines changed: 2 additions & 0 deletions b/‎droid_slam/geom/projective_ops.py‎
Lines changed: 2 additions & 0 deletions
@@ -15,7 +15,7 @@ Zachary Teed and Jia Deng
 }
 ```
 
-**Initial Code Release:** This repo currently provides a single GPU implementation of our monocular SLAM system. It also contains demos, training, and evaluation scripts. Stereo, RGB-D, and multi-GPU code will be added on **September 7**.
+**Initial Code Release:** This repo currently provides a single GPU implementation of our monocular SLAM system. It currently contains demos, training, and evaluation scripts. 
 
 
 ## Requirements
@@ -83,25 +83,33 @@ fx fy cx cy [k1 k2 p1 p2 [ k3 [ k4 k5 k6 ]]]
 ```
 with parameters in brackets optional.
 
-## Evaluation (Monocular)
-We provide evaluation scripts for TartanAir, EuRoC, and TUM. EuRoC and TUM can be run on a 1080Ti. The TartanAir validation script will require 24G of memory.
+## Evaluation
+We provide evaluation scripts for TartanAir, EuRoC, and TUM. EuRoC and TUM can be run on a 1080Ti. The TartanAir and ETH will require 24G of memory.
 
-### EuRoC
+### TartanAir (Mono + Stereo)
+Download the [TartanAir](https://theairlab.org/tartanair-dataset/) dataset using the script `thirdparty/tartanair_tools/download_training.py` and put them in `datasets/TartanAir`
+```Bash
+./tools/validate_tartanair.sh --plot_curve            # monocular eval
+./tools/validate_tartanair.sh --plot_curve  --stereo  # stereo eval
+```
+
+### EuRoC (Mono + Stereo)
 Download the [EuRoC](https://projects.asl.ethz.ch/datasets/doku.php?id=kmavvisualinertialdatasets) sequences (ASL format) and put them in `datasets/EuRoC`
 ```Bash
-./tools/evaluate_euroc.sh
+./tools/evaluate_euroc.sh                             # monocular eval
+./tools/evaluate_euroc.sh --stereo                    # stereo eval
 ```
 
-### TUM-RGBD
+### TUM-RGBD (Mono)
 Download the fr1 sequences from [TUM-RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) and put them in `datasets/TUM-RGBD`
 ```Bash
-./tools/evaluate_tum.sh 
+./tools/evaluate_tum.sh                               # monocular eval
 ```
 
-### TartanAir
-Download the [TartanAir](https://theairlab.org/tartanair-dataset/) dataset using the script `thirdparty/tartanair_tools/download_training.py` and put them in `datasets/TartanAir`
+### ETH3D (RGB-D)
+Download the [ETH3D](https://www.eth3d.net/slam_datasets) dataset
 ```Bash
-./tools/validate_tartanair.sh
+./tools/evaluate_eth3d.sh                             # RGB-D eval
 ```
 
 ## Training
 
@@ -10,7 +10,7 @@
 import geom.projective_ops as pops
 
 class DepthVideo:
-    def __init__(self, image_size=[480, 640], buffer=1024, device="cuda:0"):
+    def __init__(self, image_size=[480, 640], buffer=1024, stereo=False, device="cuda:0"):
 
         # current keyframe count
         self.counter = Value('i', 0)
@@ -25,11 +25,15 @@ def __init__(self, image_size=[480, 640], buffer=1024, device="cuda:0"):
         self.red = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
         self.poses = torch.zeros(buffer, 7, device="cuda", dtype=torch.float).share_memory_()
         self.disps = torch.ones(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
+        self.disps_sens = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
         self.disps_up = torch.zeros(buffer, ht, wd, device="cuda", dtype=torch.float).share_memory_()
         self.intrinsics = torch.zeros(buffer, 4, device="cuda", dtype=torch.float).share_memory_()
 
+        self.stereo = stereo
+        c = 1 if not self.stereo else 2
+
         ### feature attributes ###
-        self.fmaps = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
+        self.fmaps = torch.zeros(buffer, c, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
         self.nets = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
         self.inps = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
 
@@ -57,16 +61,20 @@ def __item_setter(self, index, item):
             self.disps[index] = item[3]
 
         if item[4] is not None:
-            self.intrinsics[index] = item[4]
+            depth = item[4][3::8,3::8]
+            self.disps_sens[index] = torch.where(depth>0, 1.0/depth, depth)
 
-        if len(item) > 5:
-            self.fmaps[index] = item[5]
+        if item[5] is not None:
+            self.intrinsics[index] = item[5]
 
         if len(item) > 6:
-            self.nets[index] = item[6]
+            self.fmaps[index] = item[6]
 
         if len(item) > 7:
-            self.inps[index] = item[7]
+            self.nets[index] = item[7]
+
+        if len(item) > 8:
+            self.inps[index] = item[8]
 
     def __setitem__(self, index, item):
         with self.get_lock():
@@ -179,11 +187,7 @@ def ba(self, target, weight, eta, ii, jj, t0=1, t1=None, itrs=2, lm=1e-4, ep=0.1
             if t1 is None:
                 t1 = max(ii.max().item(), jj.max().item()) + 1
 
-            if eta is None:
-                k = torch.unique(torch.cat([ii, jj], 0)).shape[0]
-                eta = 1e-7 * torch.ones([k, self.ht//8, self.wd//8], device="cuda")
-
-            droid_backends.ba(self.poses, self.disps, self.intrinsics[0], 
+            droid_backends.ba(self.poses, self.disps, self.intrinsics[0], self.disps_sens,
                 target, weight, eta, ii, jj, t0, t1, itrs, lm, ep, motion_only)
 
             self.disps.clamp_(min=0.001)
@@ -21,7 +21,7 @@ def __init__(self, args):
         self.disable_vis = args.disable_vis
 
         # store images, depth, poses, intrinsics (shared between processes)
-        self.video = DepthVideo(args.image_size, args.buffer)
+        self.video = DepthVideo(args.image_size, args.buffer, stereo=args.stereo)
 
         # filter incoming frames so that there is enough motion
         self.filterx = MotionFilter(self.net, self.video, thresh=args.filter_thresh)
 
@@ -25,9 +25,10 @@ def __call__(self, steps=12):
         """ main update """
 
         t = self.video.counter.value
-        self.video.normalize()
+        if not self.video.stereo and not torch.any(self.video.disps_sens):
+             self.video.normalize()
 
-        graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=100000)
+        graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t)
 
         graph.add_proximity_factors(rad=self.backend_radius, 
                                     nms=self.backend_nms, 
@@ -37,4 +38,3 @@ def __call__(self, steps=12):
         graph.update_lowmem(steps=steps)
         graph.clear_edges()
         self.video.dirty[:t] = True
-
@@ -44,6 +44,9 @@ def __update(self):
         self.graph.add_proximity_factors(self.t1-5, max(self.t1-self.frontend_window, 0), 
             rad=self.frontend_radius, nms=self.frontend_nms, thresh=self.frontend_thresh, beta=self.beta, remove=True)
 
+        self.video.disps[self.t1-1] = torch.where(self.video.disps_sens[self.t1-1] > 0, 
+           self.video.disps_sens[self.t1-1], self.video.disps[self.t1-1])
+
         for itr in range(self.iters1):
             self.graph.update(None, None, use_inactive=True)
 
@@ -80,11 +83,12 @@ def __initialize(self):
         for itr in range(8):
             self.graph.update(1, use_inactive=True)
 
-        self.graph.add_proximity_factors(0, 0, rad=2, nms=2, thresh=self.frontend_thresh)
+        self.graph.add_proximity_factors(0, 0, rad=2, nms=2, thresh=self.frontend_thresh, remove=False)
 
-        for itr in range(12):
+        for itr in range(8):
             self.graph.update(1, use_inactive=True)
 
+
         # self.video.normalize()
         self.video.poses[self.t1] = self.video.poses[self.t1-1].clone()
         self.video.disps[self.t1] = self.video.disps[self.t1-4:self.t1].mean()
@@ -99,6 +103,8 @@ def __initialize(self):
             self.video.ready.value = 1
             self.video.dirty[:self.t1] = True
 
+        self.graph.rm_factors(self.graph.ii < self.warmup-4, store=True)
+
     def __call__(self):
         """ main update """
 
 
@@ -109,8 +109,9 @@ def add_factors(self, ii, jj, remove=False):
 
         # correlation volume for new edges
         if self.corr_impl == "volume":
-            fmap1 = self.video.fmaps[ii].to(self.device).unsqueeze(0)
-            fmap2 = self.video.fmaps[jj].to(self.device).unsqueeze(0)
+            c = (ii == jj).long()
+            fmap1 = self.video.fmaps[ii,0].to(self.device).unsqueeze(0)
+            fmap2 = self.video.fmaps[jj,c].to(self.device).unsqueeze(0)
             corr = CorrBlock(fmap1, fmap2)
             self.corr = corr if self.corr is None else self.corr.cat(corr)
 
@@ -167,20 +168,27 @@ def rm_keyframe(self, ix):
         with self.video.get_lock():
             self.video.poses[ix] = self.video.poses[ix+1]
             self.video.disps[ix] = self.video.disps[ix+1]
+            self.video.disps_sens[ix] = self.video.disps_sens[ix+1]
             self.video.intrinsics[ix] = self.video.intrinsics[ix+1]
 
             self.video.nets[ix] = self.video.nets[ix+1]
             self.video.inps[ix] = self.video.inps[ix+1]
             self.video.fmaps[ix] = self.video.fmaps[ix+1]
 
+        m = (self.ii_inac == ix) | (self.jj_inac == ix)
+        self.ii_inac[self.ii_inac >= ix] -= 1
+        self.jj_inac[self.jj_inac >= ix] -= 1
+
+        if torch.any(m):
+            self.ii_inac = self.ii_inac[~m]
+            self.jj_inac = self.jj_inac[~m]
+            self.target_inac = self.target_inac[:,~m]
+            self.weight_inac = self.weight_inac[:,~m]
+
         m = (self.ii == ix) | (self.jj == ix)
 
         self.ii[self.ii >= ix] -= 1
         self.jj[self.jj >= ix] -= 1
-
-        self.ii_inac[self.ii_inac >= ix] -= 1
-        self.jj_inac[self.jj_inac >= ix] -= 1
-
         self.rm_factors(m, store=False)
 
 
@@ -239,7 +247,9 @@ def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, s
 
         # alternate corr implementation
         t = self.video.counter.value
-        corr_op = AltCorrBlock(self.video.fmaps[None,:t])
+
+        num, rig, ch, ht, wd = self.video.fmaps.shape
+        corr_op = AltCorrBlock(self.video.fmaps.view(1, num*rig, ch, ht, wd))
 
         for step in range(steps):
             print("Global BA Iteration #{}".format(step+1))
@@ -253,11 +263,12 @@ def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, s
                 v = (self.ii >= i) & (self.ii < i + s)
                 iis = self.ii[v]
                 jjs = self.jj[v]
-                
+
                 ht, wd = self.coords0.shape[0:2]
-                corr1 = corr_op(coords1[:,v], iis, jjs)
+                corr1 = corr_op(coords1[:,v], rig * iis, rig * jjs + (iis == jjs).long())
 
-                with torch.cuda.amp.autocast(enabled=True):                        
+                with torch.cuda.amp.autocast(enabled=True):
+                 
                     net, delta, weight, damping, _ = \
                         self.update_op(self.net[:,v], self.video.inps[None,iis], corr1, motn[:,v], iis, jjs)
 
@@ -267,7 +278,7 @@ def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, s
                 self.weight[:,v] = weight.float()
                 self.damping[torch.unique(iis)] = damping
 
-            damping = self.damping[torch.unique(self.ii)].contiguous() + EP
+            damping = .2 * self.damping[torch.unique(self.ii)].contiguous() + EP
             target = self.target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
             weight = self.weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
 
@@ -277,15 +288,16 @@ def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, s
 
             self.video.dirty[:t] = True
 
-
     def add_neighborhood_factors(self, t0, t1, r=3):
         """ add edges between neighboring frames within radius r """
 
         ii, jj = torch.meshgrid(torch.arange(t0,t1), torch.arange(t0,t1))
         ii = ii.reshape(-1).to(dtype=torch.long, device=self.device)
         jj = jj.reshape(-1).to(dtype=torch.long, device=self.device)
 
-        keep = ((ii - jj).abs() > 0) & ((ii - jj).abs() <= r)
+        c = 1 if self.video.stereo else 0
+
+        keep = ((ii - jj).abs() > c) & ((ii - jj).abs() <= r)
         self.add_factors(ii[keep], jj[keep])
 
 
@@ -307,8 +319,6 @@ def add_proximity_factors(self, t0=0, t1=0, rad=2, nms=2, beta=0.25, thresh=16.0
         ii1 = torch.cat([self.ii, self.ii_bad, self.ii_inac], 0)
         jj1 = torch.cat([self.jj, self.jj_bad, self.jj_inac], 0)
         for i, j in zip(ii1.cpu().numpy(), jj1.cpu().numpy()):
-            if abs(i - j) <= 2:
-                continue
             for di in range(-nms, nms+1):
                 for dj in range(-nms, nms+1):
                     if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
@@ -318,17 +328,26 @@ def add_proximity_factors(self, t0=0, t1=0, rad=2, nms=2, beta=0.25, thresh=16.0
                         if (t0 <= i1 < t) and (t1 <= j1 < t):
                             d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
 
+
         es = []
         for i in range(t0, t):
-            for j in range(i+1, min(i+rad+1, t)):
+            if self.video.stereo:
+                es.append((i, i))
+                d[(i-t0)*(t-t1) + (i-t1)] = np.inf
+
+            for j in range(max(i-rad-1,0), i):
                 es.append((i,j))
                 es.append((j,i))
+                d[(i-t0)*(t-t1) + (j-t1)] = np.inf
 
         ix = torch.argsort(d)
         for k in ix:
             if d[k].item() > thresh:
                 continue
 
+            if len(es) > self.max_factors:
+                break
+
             i = ii[k]
             j = jj[k]
 
 
@@ -101,6 +101,8 @@ def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, retu
 
     # transform
     Gij = poses[:,jj] * poses[:,ii].inv()
+
+    Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
     X1, Ja = actp(Gij, X0, jacobian=jacobian)
 
     # project (pinhole)