diff --git a/README.md b/README.md
index 098d2e5..e04282c 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,13 @@ The default hyperparameters may not be optimal. Refer to the below tips for repr
 * One key to leanring a policy that actually works is to adjust the dataset distribution via resampling. Finding the right resampling scheme requires trials and errors. The procedure is dataset-dependent and our implementation is in the ```SeqDataset``` class in ```dataset.py```. 
 * Using a small batch size (> 8 samples per GPU) may bring troubles. If you observe the test loss increases while training loss decreases, try using a larger batch size or commenting out ```model.eval()``` before evaluating. If this helps, the problem is the incorrect batch statistics tracked by the norm layers. Solutions: (1) Use a larger batch size. (2) Implement batch norm layers that synchronize statistics across GPUs (```nn.SyncBatchNorm``` might be useful, not tested yet). (3) Use [Group Norm](https://arxiv.org/abs/1803.08494) (```nn.GroupNorm```) for all layers and tune the hyperparam ```num_groups```. 
 
+## Examples
+To train the original I-Net, the following command can be run:
+`python main.py --dataset-path <path/to/dataset> --num-frames 1 --batch-size 128 --modes 3 --num-modes 3 --model inet --gpu 0,1`
+
+To switch to DECISION, it is necessary to ensure `num_frames > 1` to provide some history for the ConvLSTM to process. A suggestion is to run:
+`python main.py --dataset-path <path/to/dataset> --num-frames 35 --batch-size 32 --modes 3 --num-modes 3 --model decision --gpu 0,1`
+
 ## Citation
 ```bibtex
 @INPROCEEDINGS{9811598,
diff --git a/dataset.py b/dataset.py
index 4bf57de..2373c67 100644
--- a/dataset.py
+++ b/dataset.py
@@ -354,17 +354,18 @@ def __iter__(self):
         # self.train_set.init_dataset()   # bugs here? Batch intention messed up. Manual init preferred.
         self.forward, self.left, self.right, self.elevator = self.group_samples()
         if self.shuffle_on:
-            self.shuffle()
-        all_groups = []
+            self.shuffle()        
+        batch_lists = []
         for group in [self.forward, self.left, self.right, self.elevator]:
             # for each group. easy samples at first when no shuffle
             for value in group.values():
-                all_groups.append(chunk_by_max_len(value, self.batch_size, drop_last=self.drop_last))
-        all = sum(all_groups, [])
+                batch_by_seq_len = chunk_by_max_len(value, self.batch_size, drop_last=self.drop_last)
+                for batch_list in batch_by_seq_len:
+                    batch_lists.append(batch_list)
         if self.shuffle_on:
-            random.shuffle(all)
-        all = sum(all, [])
-        return iter(all)
+            random.shuffle(batch_lists)
+        flattened = [idx for batch_list in batch_lists for idx in batch_list]
+        return iter(flattened)
 
     def __len__(self):
         return self.length
diff --git a/main.py b/main.py
index b7a046c..94eca27 100644
--- a/main.py
+++ b/main.py
@@ -14,7 +14,7 @@
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description=f'Training INet models')
     parser.add_argument('--model', type=str, help='the cuda devices used for training',
-                        choices=['inet, lstm, decision'], default='decision')
+                        choices=['inet', 'lstm', 'decision'], default='decision')
     parser.add_argument('--modes', type=int, help='number of modes', default=4)
     parser.add_argument('--k1', type=int, help='value of k1 for TBPTT', default=2)
     parser.add_argument('--k2-n', type=int, help='the multiplicative factor of k1 to obtain k2 in TBPTT', default=5)
@@ -27,7 +27,7 @@
     parser.add_argument('--frame-interval', help='sample 1 frame every x frames', type=int, default=1)
     parser.add_argument('--dropout', type=int, default=0.7)
     parser.add_argument('--intent-feat', help='whether or not to use intention features', type=bool, default=True)
-    parser.add_argument('--num-modes', type=bool, default=4)
+    parser.add_argument('--num-modes', type=int, default=4)
     parser.add_argument('--exp-log-path', help='path to log experiment data', type=str, default='exp/inet')
     parser.add_argument('--dataset-path', help='path to dataset', type=str, default='sample_dataset')
     parser.add_argument('--downsample-ratio', help='the ratio by which to downsample particular samples in the dataset',