unify train scripts

mindee · Oct 8, 2024 · 1b2f311 · 1b2f311
1 parent 2ba9e80
commit 1b2f311
Show file tree

Hide file tree

Showing 14 changed files with 27 additions and 43 deletions.
diff --git a/.github/workflows/references.yml b/.github/workflows/references.yml
@@ -114,16 +114,16 @@ jobs:
           unzip toy_recogition_set-036a4d80.zip -d reco_set
       - if: matrix.framework == 'tensorflow'
         name: Train for a short epoch (TF) (document orientation)
-        run: python references/classification/train_tensorflow_orientation.py ./det_set ./det_set resnet18 page -b 2 --epochs 1
+        run: python references/classification/train_tensorflow_orientation.py resnet18 --type page --train_path ./det_set --val_path ./det_set  -b 2 --epochs 1
       - if: matrix.framework == 'pytorch'
         name: Train for a short epoch (PT) (document orientation)
-        run: python references/classification/train_pytorch_orientation.py ./det_set ./det_set resnet18 page -b 2 --epochs 1
+        run: python references/classification/train_pytorch_orientation.py resnet18 --type page --train_path ./det_set --val_path ./det_set  -b 2 --epochs 1
       - if: matrix.framework == 'tensorflow'
         name: Train for a short epoch (TF) (crop orientation)
-        run: python references/classification/train_tensorflow_orientation.py ./reco_set ./reco_set resnet18 crop -b 4 --epochs 1
+        run: python references/classification/train_tensorflow_orientation.py resnet18 --type crop --train_path ./reco_set --val_path ./reco_set  -b 4 --epochs 1
       - if: matrix.framework == 'pytorch'
         name: Train for a short epoch (PT) (crop orientation)
-        run: python references/classification/train_pytorch_orientation.py ./reco_set ./reco_set resnet18 crop -b 4 --epochs 1
+        run: python references/classification/train_pytorch_orientation.py resnet18 --type crop --train_path ./reco_set --val_path ./reco_set  -b 4 --epochs 1
 
   train-text-recognition:
     runs-on: ${{ matrix.os }}
@@ -318,10 +318,10 @@ jobs:
           unzip toy_detection_set-bbbb4243.zip -d det_set
       - if: matrix.framework == 'tensorflow'
         name: Train for a short epoch (TF)
-        run: python references/detection/train_tensorflow.py --train_path ./det_set --val_path ./det_set linknet_resnet18 -b 2 --epochs 1
+        run: python references/detection/train_tensorflow.py linknet_resnet18 --train_path ./det_set --val_path ./det_set -b 2 --epochs 1
       - if: matrix.framework == 'pytorch'
         name: Train for a short epoch (PT)
-        run: python references/detection/train_pytorch.py ./det_set ./det_set db_mobilenet_v3_large -b 2 --epochs 1
+        run: python references/detection/train_pytorch.py db_mobilenet_v3_large --train_path ./det_set --val_path ./det_set -b 2 --epochs 1
 
   evaluate-text-detection:
     runs-on: ${{ matrix.os }}

diff --git a/doctr/models/classification/magc_resnet/tensorflow.py b/doctr/models/classification/magc_resnet/tensorflow.py
@@ -115,7 +115,7 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor:
         # Context modeling: B, H, W, C  ->  B, 1, 1, C
         context = self.context_modeling(inputs)
         # Transform: B, 1, 1, C  ->  B, 1, 1, C
-        transformed = self.transform(context)
+        transformed = self.transform(context, **kwargs)
         return inputs + transformed
 
 

diff --git a/doctr/models/detection/linknet/tensorflow.py b/doctr/models/detection/linknet/tensorflow.py
@@ -85,10 +85,10 @@ def __init__(
             for in_chan, out_chan, s, in_shape in zip(i_chans, o_chans, strides, in_shapes[::-1])
         ]
 
-    def call(self, x: List[tf.Tensor]) -> tf.Tensor:
+    def call(self, x: List[tf.Tensor], **kwargs: Any) -> tf.Tensor:
         out = 0
         for decoder, fmap in zip(self.decoders, x[::-1]):
-            out = decoder(out + fmap)
+            out = decoder(out + fmap, **kwargs)
         return out
 
     def extra_repr(self) -> str:

diff --git a/doctr/models/factory/hub.py b/doctr/models/factory/hub.py
@@ -27,8 +27,6 @@
 
 if is_torch_available():
     import torch
-elif is_tf_available():
-    pass
 
 __all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
 

diff --git a/references/classification/README.md b/references/classification/README.md
@@ -30,13 +30,13 @@ python references/classification/train_pytorch_character.py mobilenet_v3_large -
 You can start your training in TensorFlow:
 
 ```shell
-python references/classification/train_tensorflow_orientation.py path/to/your/train_set path/to/your/val_set resnet18 page --epochs 5
+python references/classification/train_tensorflow_orientation.py resnet18 --type page --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5
 ```
 
 or PyTorch:
 
 ```shell
-python references/classification/train_pytorch_orientation.py path/to/your/train_set path/to/your/val_set resnet18 page --epochs 5
+python references/classification/train_pytorch_orientation.py resnet18 --type page --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5
 ```
 
 The type can be either `page` for document images or `crop` for word crops.

diff --git a/references/classification/train_pytorch_orientation.py b/references/classification/train_pytorch_orientation.py
@@ -375,10 +375,10 @@ def parse_args():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument("train_path", type=str, help="path to training data folder")
-    parser.add_argument("val_path", type=str, help="path to validation data folder")
     parser.add_argument("arch", type=str, help="classification model to train")
-    parser.add_argument("type", type=str, choices=["page", "crop"], help="type of data to train on")
+    parser.add_argument("--type", type=str, required=True, choices=["page", "crop"], help="type of data to train on")
+    parser.add_argument("--train_path", type=str, required=True, help="path to training data folder")
+    parser.add_argument("--val_path", type=str, required=True, help="path to validation data folder")
     parser.add_argument("--name", type=str, default=None, help="Name of your training experiment")
     parser.add_argument("--epochs", type=int, default=10, help="number of epochs to train the model on")
     parser.add_argument("-b", "--batch_size", type=int, default=2, help="batch size for training")

diff --git a/references/classification/train_tensorflow_orientation.py b/references/classification/train_tensorflow_orientation.py
@@ -338,7 +338,7 @@ def main(args):
 
     if args.export_onnx:
         print("Exporting model to ONNX...")
-        if args.arch == "vit_b":
+        if args.arch in ["vit_s", "vit_b"]:
             # fixed batch size for vit
             dummy_input = [tf.TensorSpec([1, *(input_size), 3], tf.float32, name="input")]
         else:
@@ -356,10 +356,10 @@ def parse_args():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument("train_path", type=str, help="path to training data folder")
-    parser.add_argument("val_path", type=str, help="path to validation data folder")
     parser.add_argument("arch", type=str, help="classification model to train")
-    parser.add_argument("type", type=str, choices=["page", "crop"], help="type of data to train on")
+    parser.add_argument("--type", type=str, required=True, choices=["page", "crop"], help="type of data to train on")
+    parser.add_argument("--train_path", type=str, help="path to training data folder")
+    parser.add_argument("--val_path", type=str, required=True, help="path to validation data folder")
     parser.add_argument("--name", type=str, default=None, help="Name of your training experiment")
     parser.add_argument("--epochs", type=int, default=10, help="number of epochs to train the model on")
     parser.add_argument("-b", "--batch_size", type=int, default=2, help="batch size for training")

diff --git a/references/detection/README.md b/references/detection/README.md
@@ -16,13 +16,13 @@ pip install -r references/requirements.txt
 You can start your training in TensorFlow:
 
 ```shell
-python references/detection/train_tensorflow.py path/to/your/train_set path/to/your/val_set db_resnet50 --epochs 5
+python references/detection/train_tensorflow.py db_resnet50 --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5
 ```
 
 or PyTorch:
 
 ```shell
-python references/detection/train_pytorch.py path/to/your/train_set path/to/your/val_set db_resnet50 --epochs 5 --device 0
+python references/detection/train_pytorch.py db_resnet50 --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5
 ```
 
 ## Data format

diff --git a/references/detection/evaluate_tensorflow.py b/references/detection/evaluate_tensorflow.py
@@ -40,7 +40,7 @@ def evaluate(model, val_loader, batch_transforms, val_metric):
     for images, targets in tqdm(val_loader):
         images = batch_transforms(images)
         targets = [{CLASS_NAME: t} for t in targets]
-        out = model(images, targets, training=False, return_preds=True)
+        out = model(images, target=targets, training=False, return_preds=True)
         # Compute metric
         loc_preds = out["preds"]
         for target, loc_pred in zip(targets, loc_preds):

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
@@ -427,9 +427,9 @@ def parse_args():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument("train_path", type=str, help="path to training data folder")
-    parser.add_argument("val_path", type=str, help="path to validation data folder")
     parser.add_argument("arch", type=str, help="text-detection model to train")
+    parser.add_argument("--train_path", type=str, required=True, help="path to training data folder")
+    parser.add_argument("--val_path", type=str, required=True, help="path to validation data folder")
     parser.add_argument("--name", type=str, default=None, help="Name of your training experiment")
     parser.add_argument("--epochs", type=int, default=10, help="number of epochs to train the model on")
     parser.add_argument("-b", "--batch_size", type=int, default=2, help="batch size for training")

diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
@@ -31,7 +31,7 @@
 from doctr.datasets import DataLoader, DetectionDataset
 from doctr.models import detection
 from doctr.utils.metrics import LocalizationConfusion
-from utils import EarlyStopper, load_backbone, plot_recorder, plot_samples
+from utils import EarlyStopper, plot_recorder, plot_samples
 
 
 def record_lr(
@@ -195,11 +195,6 @@ def main(args):
     if isinstance(args.resume, str):
         model.load_weights(args.resume)
 
-    if isinstance(args.pretrained_backbone, str):
-        print("Loading backbone weights.")
-        model = load_backbone(model, args.pretrained_backbone)
-        print("Done.")
-
     # Metrics
     val_metric = LocalizationConfusion(use_polygons=args.rotation and not args.eval_straight)
 
@@ -409,7 +404,7 @@ def parse_args():
 
     parser.add_argument("arch", type=str, help="text-detection model to train")
     parser.add_argument("--train_path", type=str, required=True, help="path to training data folder")
-    parser.add_argument("--val_path", type=str, help="path to validation data folder")
+    parser.add_argument("--val_path", type=str, required=True, help="path to validation data folder")
     parser.add_argument("--name", type=str, default=None, help="Name of your training experiment")
     parser.add_argument("--epochs", type=int, default=10, help="number of epochs to train the model on")
     parser.add_argument("-b", "--batch_size", type=int, default=2, help="batch size for training")
@@ -419,7 +414,6 @@ def parse_args():
     parser.add_argument("--input_size", type=int, default=1024, help="model input size, H = W")
     parser.add_argument("--lr", type=float, default=0.001, help="learning rate for the optimizer (Adam)")
     parser.add_argument("--resume", type=str, default=None, help="Path to your checkpoint")
-    parser.add_argument("--pretrained-backbone", type=str, default=None, help="Path to your backbone weights")
     parser.add_argument("--test-only", dest="test_only", action="store_true", help="Run the validation loop")
     parser.add_argument(
         "--freeze-backbone", dest="freeze_backbone", action="store_true", help="freeze model backbone for fine-tuning"

diff --git a/references/detection/utils.py b/references/detection/utils.py
@@ -3,7 +3,6 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-import pickle
 from typing import Dict, List
 
 import cv2
@@ -86,13 +85,6 @@ def plot_recorder(lr_recorder, loss_recorder, beta: float = 0.95, **kwargs) -> N
     plt.show(**kwargs)
 
 
-def load_backbone(model, weights_path):
-    pretrained_backbone_weights = pickle.load(open(weights_path, "rb"))
-    model.feat_extractor.set_weights(pretrained_backbone_weights[0])
-    model.fpn.set_weights(pretrained_backbone_weights[1])
-    return model
-
-
 class EarlyStopper:
     def __init__(self, patience: int = 5, min_delta: float = 0.01):
         self.patience = patience

diff --git a/references/recognition/README.md b/references/recognition/README.md
@@ -22,7 +22,7 @@ python references/recognition/train_tensorflow.py crnn_vgg16_bn --train_path pat
 or PyTorch:
 
 ```shell
-python references/recognition/train_pytorch.py crnn_vgg16_bn --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5 --device 0
+python references/recognition/train_pytorch.py crnn_vgg16_bn --train_path path/to/your/train_set --val_path path/to/your/val_set --epochs 5
 ```
 
 ### Multi-GPU support (PyTorch only - Experimental)

diff --git a/references/recognition/evaluate_tensorflow.py b/references/recognition/evaluate_tensorflow.py
@@ -38,7 +38,7 @@ def evaluate(model, val_loader, batch_transforms, val_metric):
     for images, targets in tqdm(val_iter):
         try:
             images = batch_transforms(images)
-            out = model(images, targets, return_preds=True, training=False)
+            out = model(images, target=targets, return_preds=True, training=False)
             # Compute metric
             if len(out["preds"]):
                 words, _ = zip(*out["preds"])