kp-forks
diff --git a/‎README.md‎
Lines changed: 18 additions & 4 deletions b/‎README.md‎
Lines changed: 18 additions & 4 deletions
diff --git a/‎data_preprocessing/RAFT/core/__init__.py‎ b/‎data_preprocessing/RAFT/core/__init__.py‎
diff --git a/‎data_preprocessing/RAFT/core/corr.py‎
Lines changed: 91 additions & 0 deletions b/‎data_preprocessing/RAFT/core/corr.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎data_preprocessing/RAFT/core/datasets.py‎
Lines changed: 235 additions & 0 deletions b/‎data_preprocessing/RAFT/core/datasets.py‎
Lines changed: 235 additions & 0 deletions
@@ -43,9 +43,23 @@ We have provided some videos [here](https://drive.google.com/file/d/1cKZF6ILeokC
 
 ### Customize your own data
 
-*Stay tuned for data preparation scripts.*
+We segement video sequences using [SAM-Track](https://github.com/z-x-yang/Segment-and-Track-Anything). Once you obtain the mask files, place them in the folder `all_sequences/{YOUR_SEQUENCE_NAME}/{YOUR_SEQUENCE_NAME}_masks`. Next, execute the following command:
 
-Please organize your own data as follows:
+```shell
+cd data_preprocessing
+python preproc_mask.py
+```
+
+We extract optical flows of video sequences using [RAFT](https://github.com/princeton-vl/RAFT). To get started, please follow the instructions provided [here](https://github.com/princeton-vl/RAFT#demos) to download their pretrained model. Once downloaded, place the model in the `data_preprocessing/RAFT/models` folder. After that, you can execute the following command:
+
+```shell
+cd data_preprocessing/RAFT
+./run_raft.sh
+```
+
+Remember to update the sequence name and root directory in both `data_preprocessing/preproc_mask.py` and `data_preprocessing/RAFT/run_raft.sh` accordingly.
+
+After obtaining the files, please organize your own data as follows:
 
 ```
 CoDeF
@@ -131,7 +145,7 @@ After running the script, the reconstructed videos can be found in `results/all_
 ## Test video translation
 
 After obtaining the canonical image through [this step](#anchor), use your preferred text prompts to transfer it using [ControlNet](https://github.com/lllyasviel/ControlNet).
-Once you have the transferred canonical image, place it in `all_sequences/${NAME}/${EXP_NAME}_control` (i.e. `CANONICAL_DIR` in `scripts/test_canonical.sh`). 
+Once you have the transferred canonical image, place it in `all_sequences/${NAME}/${EXP_NAME}_control` (i.e. `CANONICAL_DIR` in `scripts/test_canonical.sh`).
 
 Then run
 
@@ -147,7 +161,7 @@ The transferred results can be seen in `results/all_sequences/{NAME}/{EXP_NAME}_
 
 ```bibtex
 @article{ouyang2023codef,
-      title={CoDeF: Content Deformation Fields for Temporally Consistent Video Processing}, 
+      title={CoDeF: Content Deformation Fields for Temporally Consistent Video Processing},
       author={Hao Ouyang and Qiuyu Wang and Yuxi Xiao and Qingyan Bai and Juntao Zhang and Kecheng Zheng and Xiaowei Zhou and Qifeng Chen and Yujun Shen},
       journal={arXiv preprint arXiv:2308.07926},
       year={2023}
 
@@ -0,0 +1,91 @@
+import torch
+import torch.nn.functional as F
+from utils.utils import bilinear_sampler, coords_grid
+
+try:
+    import alt_cuda_corr
+except:
+    # alt_cuda_corr is not compiled
+    pass
+
+
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+
+        batch, h1, w1, dim, h2, w2 = corr.shape
+        corr = corr.reshape(batch*h1*w1, dim, h2, w2)
+        
+        self.corr_pyramid.append(corr)
+        for i in range(self.num_levels-1):
+            corr = F.avg_pool2d(corr, 2, stride=2)
+            self.corr_pyramid.append(corr)
+
+    def __call__(self, coords):
+        r = self.radius
+        coords = coords.permute(0, 2, 3, 1)
+        batch, h1, w1, _ = coords.shape
+
+        out_pyramid = []
+        for i in range(self.num_levels):
+            corr = self.corr_pyramid[i]
+            dx = torch.linspace(-r, r, 2*r+1, device=coords.device)
+            dy = torch.linspace(-r, r, 2*r+1, device=coords.device)
+            delta = torch.stack(torch.meshgrid(dy, dx), axis=-1)
+
+            centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
+            delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
+            coords_lvl = centroid_lvl + delta_lvl
+
+            corr = bilinear_sampler(corr, coords_lvl)
+            corr = corr.view(batch, h1, w1, -1)
+            out_pyramid.append(corr)
+
+        out = torch.cat(out_pyramid, dim=-1)
+        return out.permute(0, 3, 1, 2).contiguous().float()
+
+    @staticmethod
+    def corr(fmap1, fmap2):
+        batch, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.view(batch, dim, ht*wd)
+        fmap2 = fmap2.view(batch, dim, ht*wd) 
+        
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        corr = corr.view(batch, ht, wd, 1, ht, wd)
+        return corr  / torch.sqrt(torch.tensor(dim).float())
+
+
+class AlternateCorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+
+        self.pyramid = [(fmap1, fmap2)]
+        for i in range(self.num_levels):
+            fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
+            fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
+            self.pyramid.append((fmap1, fmap2))
+
+    def __call__(self, coords):
+        coords = coords.permute(0, 2, 3, 1)
+        B, H, W, _ = coords.shape
+        dim = self.pyramid[0][0].shape[1]
+
+        corr_list = []
+        for i in range(self.num_levels):
+            r = self.radius
+            fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
+            fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
+
+            coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
+            corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
+            corr_list.append(corr.squeeze(1))
+
+        corr = torch.stack(corr_list, dim=1)
+        corr = corr.reshape(B, -1, H, W)
+        return corr / torch.sqrt(torch.tensor(dim).float())
@@ -0,0 +1,235 @@
+# Data loading based on https://github.com/NVIDIA/flownet2-pytorch
+
+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+
+import os
+import math
+import random
+from glob import glob
+import os.path as osp
+
+from utils import frame_utils
+from utils.augmentor import FlowAugmentor, SparseFlowAugmentor
+
+
+class FlowDataset(data.Dataset):
+    def __init__(self, aug_params=None, sparse=False):
+        self.augmentor = None
+        self.sparse = sparse
+        if aug_params is not None:
+            if sparse:
+                self.augmentor = SparseFlowAugmentor(**aug_params)
+            else:
+                self.augmentor = FlowAugmentor(**aug_params)
+
+        self.is_test = False
+        self.init_seed = False
+        self.flow_list = []
+        self.image_list = []
+        self.extra_info = []
+
+    def __getitem__(self, index):
+
+        if self.is_test:
+            img1 = frame_utils.read_gen(self.image_list[index][0])
+            img2 = frame_utils.read_gen(self.image_list[index][1])
+            img1 = np.array(img1).astype(np.uint8)[..., :3]
+            img2 = np.array(img2).astype(np.uint8)[..., :3]
+            img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+            img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+            return img1, img2, self.extra_info[index]
+
+        if not self.init_seed:
+            worker_info = torch.utils.data.get_worker_info()
+            if worker_info is not None:
+                torch.manual_seed(worker_info.id)
+                np.random.seed(worker_info.id)
+                random.seed(worker_info.id)
+                self.init_seed = True
+
+        index = index % len(self.image_list)
+        valid = None
+        if self.sparse:
+            flow, valid = frame_utils.readFlowKITTI(self.flow_list[index])
+        else:
+            flow = frame_utils.read_gen(self.flow_list[index])
+
+        img1 = frame_utils.read_gen(self.image_list[index][0])
+        img2 = frame_utils.read_gen(self.image_list[index][1])
+
+        flow = np.array(flow).astype(np.float32)
+        img1 = np.array(img1).astype(np.uint8)
+        img2 = np.array(img2).astype(np.uint8)
+
+        # grayscale images
+        if len(img1.shape) == 2:
+            img1 = np.tile(img1[...,None], (1, 1, 3))
+            img2 = np.tile(img2[...,None], (1, 1, 3))
+        else:
+            img1 = img1[..., :3]
+            img2 = img2[..., :3]
+
+        if self.augmentor is not None:
+            if self.sparse:
+                img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
+            else:
+                img1, img2, flow = self.augmentor(img1, img2, flow)
+
+        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+        flow = torch.from_numpy(flow).permute(2, 0, 1).float()
+
+        if valid is not None:
+            valid = torch.from_numpy(valid)
+        else:
+            valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000)
+
+        return img1, img2, flow, valid.float()
+
+
+    def __rmul__(self, v):
+        self.flow_list = v * self.flow_list
+        self.image_list = v * self.image_list
+        return self
+        
+    def __len__(self):
+        return len(self.image_list)
+        
+
+class MpiSintel(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/Sintel', dstype='clean'):
+        super(MpiSintel, self).__init__(aug_params)
+        flow_root = osp.join(root, split, 'flow')
+        image_root = osp.join(root, split, dstype)
+
+        if split == 'test':
+            self.is_test = True
+
+        for scene in os.listdir(image_root):
+            image_list = sorted(glob(osp.join(image_root, scene, '*.png')))
+            for i in range(len(image_list)-1):
+                self.image_list += [ [image_list[i], image_list[i+1]] ]
+                self.extra_info += [ (scene, i) ] # scene and frame_id
+
+            if split != 'test':
+                self.flow_list += sorted(glob(osp.join(flow_root, scene, '*.flo')))
+
+
+class FlyingChairs(FlowDataset):
+    def __init__(self, aug_params=None, split='train', root='datasets/FlyingChairs_release/data'):
+        super(FlyingChairs, self).__init__(aug_params)
+
+        images = sorted(glob(osp.join(root, '*.ppm')))
+        flows = sorted(glob(osp.join(root, '*.flo')))
+        assert (len(images)//2 == len(flows))
+
+        split_list = np.loadtxt('chairs_split.txt', dtype=np.int32)
+        for i in range(len(flows)):
+            xid = split_list[i]
+            if (split=='training' and xid==1) or (split=='validation' and xid==2):
+                self.flow_list += [ flows[i] ]
+                self.image_list += [ [images[2*i], images[2*i+1]] ]
+
+
+class FlyingThings3D(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/FlyingThings3D', dstype='frames_cleanpass'):
+        super(FlyingThings3D, self).__init__(aug_params)
+
+        for cam in ['left']:
+            for direction in ['into_future', 'into_past']:
+                image_dirs = sorted(glob(osp.join(root, dstype, 'TRAIN/*/*')))
+                image_dirs = sorted([osp.join(f, cam) for f in image_dirs])
+
+                flow_dirs = sorted(glob(osp.join(root, 'optical_flow/TRAIN/*/*')))
+                flow_dirs = sorted([osp.join(f, direction, cam) for f in flow_dirs])
+
+                for idir, fdir in zip(image_dirs, flow_dirs):
+                    images = sorted(glob(osp.join(idir, '*.png')) )
+                    flows = sorted(glob(osp.join(fdir, '*.pfm')) )
+                    for i in range(len(flows)-1):
+                        if direction == 'into_future':
+                            self.image_list += [ [images[i], images[i+1]] ]
+                            self.flow_list += [ flows[i] ]
+                        elif direction == 'into_past':
+                            self.image_list += [ [images[i+1], images[i]] ]
+                            self.flow_list += [ flows[i+1] ]
+      
+
+class KITTI(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/KITTI'):
+        super(KITTI, self).__init__(aug_params, sparse=True)
+        if split == 'testing':
+            self.is_test = True
+
+        root = osp.join(root, split)
+        images1 = sorted(glob(osp.join(root, 'image_2/*_10.png')))
+        images2 = sorted(glob(osp.join(root, 'image_2/*_11.png')))
+
+        for img1, img2 in zip(images1, images2):
+            frame_id = img1.split('/')[-1]
+            self.extra_info += [ [frame_id] ]
+            self.image_list += [ [img1, img2] ]
+
+        if split == 'training':
+            self.flow_list = sorted(glob(osp.join(root, 'flow_occ/*_10.png')))
+
+
+class HD1K(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/HD1k'):
+        super(HD1K, self).__init__(aug_params, sparse=True)
+
+        seq_ix = 0
+        while 1:
+            flows = sorted(glob(os.path.join(root, 'hd1k_flow_gt', 'flow_occ/%06d_*.png' % seq_ix)))
+            images = sorted(glob(os.path.join(root, 'hd1k_input', 'image_2/%06d_*.png' % seq_ix)))
+
+            if len(flows) == 0:
+                break
+
+            for i in range(len(flows)-1):
+                self.flow_list += [flows[i]]
+                self.image_list += [ [images[i], images[i+1]] ]
+
+            seq_ix += 1
+
+
+def fetch_dataloader(args, TRAIN_DS='C+T+K+S+H'):
+    """ Create the data loader for the corresponding trainign set """
+
+    if args.stage == 'chairs':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.1, 'max_scale': 1.0, 'do_flip': True}
+        train_dataset = FlyingChairs(aug_params, split='training')
+    
+    elif args.stage == 'things':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.4, 'max_scale': 0.8, 'do_flip': True}
+        clean_dataset = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        final_dataset = FlyingThings3D(aug_params, dstype='frames_finalpass')
+        train_dataset = clean_dataset + final_dataset
+
+    elif args.stage == 'sintel':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.6, 'do_flip': True}
+        things = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        sintel_clean = MpiSintel(aug_params, split='training', dstype='clean')
+        sintel_final = MpiSintel(aug_params, split='training', dstype='final')        
+
+        if TRAIN_DS == 'C+T+K+S+H':
+            kitti = KITTI({'crop_size': args.image_size, 'min_scale': -0.3, 'max_scale': 0.5, 'do_flip': True})
+            hd1k = HD1K({'crop_size': args.image_size, 'min_scale': -0.5, 'max_scale': 0.2, 'do_flip': True})
+            train_dataset = 100*sintel_clean + 100*sintel_final + 200*kitti + 5*hd1k + things
+
+        elif TRAIN_DS == 'C+T+K/S':
+            train_dataset = 100*sintel_clean + 100*sintel_final + things
+
+    elif args.stage == 'kitti':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.4, 'do_flip': False}
+        train_dataset = KITTI(aug_params, split='training')
+
+    train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, 
+        pin_memory=False, shuffle=True, num_workers=4, drop_last=True)
+
+    print('Training with %d image pairs' % len(train_dataset))
+    return train_loader
+