diff --git a/README.md b/README.md index 098d2e5..e04282c 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,13 @@ The default hyperparameters may not be optimal. Refer to the below tips for repr * One key to leanring a policy that actually works is to adjust the dataset distribution via resampling. Finding the right resampling scheme requires trials and errors. The procedure is dataset-dependent and our implementation is in the ```SeqDataset``` class in ```dataset.py```. * Using a small batch size (> 8 samples per GPU) may bring troubles. If you observe the test loss increases while training loss decreases, try using a larger batch size or commenting out ```model.eval()``` before evaluating. If this helps, the problem is the incorrect batch statistics tracked by the norm layers. Solutions: (1) Use a larger batch size. (2) Implement batch norm layers that synchronize statistics across GPUs (```nn.SyncBatchNorm``` might be useful, not tested yet). (3) Use [Group Norm](https://arxiv.org/abs/1803.08494) (```nn.GroupNorm```) for all layers and tune the hyperparam ```num_groups```. +## Examples +To train the original I-Net, the following command can be run: +`python main.py --dataset-path --num-frames 1 --batch-size 128 --modes 3 --num-modes 3 --model inet --gpu 0,1` + +To switch to DECISION, it is necessary to ensure `num_frames > 1` to provide some history for the ConvLSTM to process. A suggestion is to run: +`python main.py --dataset-path --num-frames 35 --batch-size 32 --modes 3 --num-modes 3 --model decision --gpu 0,1` + ## Citation ```bibtex @INPROCEEDINGS{9811598, diff --git a/dataset.py b/dataset.py index 4bf57de..2373c67 100644 --- a/dataset.py +++ b/dataset.py @@ -354,17 +354,18 @@ def __iter__(self): # self.train_set.init_dataset() # bugs here? Batch intention messed up. Manual init preferred. self.forward, self.left, self.right, self.elevator = self.group_samples() if self.shuffle_on: - self.shuffle() - all_groups = [] + self.shuffle() + batch_lists = [] for group in [self.forward, self.left, self.right, self.elevator]: # for each group. easy samples at first when no shuffle for value in group.values(): - all_groups.append(chunk_by_max_len(value, self.batch_size, drop_last=self.drop_last)) - all = sum(all_groups, []) + batch_by_seq_len = chunk_by_max_len(value, self.batch_size, drop_last=self.drop_last) + for batch_list in batch_by_seq_len: + batch_lists.append(batch_list) if self.shuffle_on: - random.shuffle(all) - all = sum(all, []) - return iter(all) + random.shuffle(batch_lists) + flattened = [idx for batch_list in batch_lists for idx in batch_list] + return iter(flattened) def __len__(self): return self.length diff --git a/main.py b/main.py index b7a046c..94eca27 100644 --- a/main.py +++ b/main.py @@ -14,7 +14,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description=f'Training INet models') parser.add_argument('--model', type=str, help='the cuda devices used for training', - choices=['inet, lstm, decision'], default='decision') + choices=['inet', 'lstm', 'decision'], default='decision') parser.add_argument('--modes', type=int, help='number of modes', default=4) parser.add_argument('--k1', type=int, help='value of k1 for TBPTT', default=2) parser.add_argument('--k2-n', type=int, help='the multiplicative factor of k1 to obtain k2 in TBPTT', default=5) @@ -27,7 +27,7 @@ parser.add_argument('--frame-interval', help='sample 1 frame every x frames', type=int, default=1) parser.add_argument('--dropout', type=int, default=0.7) parser.add_argument('--intent-feat', help='whether or not to use intention features', type=bool, default=True) - parser.add_argument('--num-modes', type=bool, default=4) + parser.add_argument('--num-modes', type=int, default=4) parser.add_argument('--exp-log-path', help='path to log experiment data', type=str, default='exp/inet') parser.add_argument('--dataset-path', help='path to dataset', type=str, default='sample_dataset') parser.add_argument('--downsample-ratio', help='the ratio by which to downsample particular samples in the dataset',