Merge pull request #435 from Living-with-machines/dev_text_spotting

Dev text spotting
maps-as-data · Jul 5, 2024 · add823a · add823a
2 parents 1ee4fdf + 1d76188
commit add823a
Show file tree

Hide file tree

Showing 9 changed files with 605 additions and 640 deletions.
diff --git a/docs/source/User-guide/Load.rst b/docs/source/User-guide/Load.rst
@@ -180,6 +180,22 @@ e.g. to slice your maps into 50 x 50 meter patches:
 This will save your patches as ``.png`` files in a directory called ``patches_50_meters``.
 As above, you can use the ``path_save`` argument to change where these patches are saved.
 
+MapReader also contains an option to create some overlap between your patches.
+This can be useful for text spotting tasks where text may be cut off at the edges of patches.
+
+To add overlap to your patches, use the ``overlap`` argument:
+
+.. code-block:: python
+
+    #EXAMPLE
+    my_files.patchify_all(patch_size=1024, overlap=0.1)
+
+This will create 1024 x 1024 pixel patches with 10% overlap between each patch.
+
+.. note::
+    Greater overlaps will create more patches and result in greater computational costs when running.
+    You should be aware of this when choosing your overlap size.
+
 .. admonition:: Advanced usage
     :class: dropdown
 

diff --git a/docs/source/User-guide/Spot-text.rst b/docs/source/User-guide/Spot-text.rst
@@ -34,8 +34,18 @@ To install, run the following commands in your terminal:
 
     Since both the DPText-DETR and DeepSolo repos are built ontop of `AdelaiDet <https://github.com/aim-uofa/AdelaiDet>`__, you won't be able to install both at the same. To get around this, you can set up two different conda environments, one for each.
 
+.. admonition:: Detectron2 issues for windows users
+    :class: dropdown
+
+    If you are on a windows machine and are having trouble installing Detectron2, you can try the following:
+    - Install `Visual Studio Build Tools <https://visualstudio.microsoft.com/downloads/?q=build+tools>`__.
+    - Follow instructions `here <https://stackoverflow.com/questions/64261546/how-to-solve-error-microsoft-visual-c-14-0-or-greater-is-required-when-inst>`__ to install the required packages. (The format might be different in newer versions of Visual Studio Build Tools, so you might need to look up the specific package names.)
+
+    Once this is done, retry installing Detectron2.
+
 You should then pick one of the following to install:
 
+
 DPTextDETR
 ~~~~~~~~~~~
 
@@ -63,6 +73,20 @@ To install, run the following commands in your terminal:
     pip install .
 
 
+Advice for patch size
+---------------------
+
+When running the text spotting models, we recommend using a patch size of 1024x1024 pixels.
+This is the size used in the training of the models, and so should give the best results.
+
+You may also want to create some overlap between your patches as this should minimise cut off text at the edges of patches.
+You will need to experiment with the amount of overlap to find the best results for your maps.
+
+.. note::
+    Greater overlaps will create more patches and result in greater computational costs when running.
+
+See the :doc:`Load </User-guide/Load>` user guide for more information on how to create patches.
+
 Set-up the runner
 -----------------
 
@@ -133,11 +157,32 @@ If you'd like to return a dataframe instead, use the ``return_dataframe`` argume
 
     patch_preds_df = my_runner.run_all(return_dataframe=True)
 
+MapReader will automatically run a deduplication algorithm to remove overlapping bounding boxes, based on a minimum intersection of area (IoA) for each overlapping polygon.
+If two polygons overlap with intersection over area greater than the minimum IoA, the the one with the lower IoA will be kept (i.e. the larger of the two polygons).
+
+Below are two examples of this:
+
+.. image:: ../figures/IoA.png
+    :width: 400px
+
+.. image:: ../figures/IoA_0.9.png
+    :width: 400px
+
+By default, the minimum IoA is set to 0.7 so the deduplication algorithm will only remove the smaller polygon in the second example.
+
+You can adjust the minimum IoA by setting the ``min_ioa`` argument:
+
+.. code-block:: python
+
+    patch_preds_df = my_runner.run_all(return_dataframe=True, min_ioa=0.9)
+
+Higher ``min_ioa``values will mean a tighter threshold for identifying two polygons as duplicates.
+
 If you'd like to run the runner on a single patch, you can also just run on one image:
 
 .. code-block:: python
 
-    patch_preds = my_runner.run_on_image("path/to/your/image.png")
+    patch_preds = my_runner.run_on_image("path/to/your/image.png", min_ioa=0.7)
 
 Again, this will return a dictionary by default but you can use the ``return_dataframe`` argument to return a dataframe instead.
 
@@ -181,6 +226,15 @@ If you'd like to return a dataframe instead, use the ``return_dataframe`` argume
 
     parent_preds_df = my_runner.convert_to_parent_pixel_bounds(return_dataframe=True)
 
+If you have created patches with overlap, then you should deduplicate at the parent level as well.
+You can do this by setting the ``deduplicate`` argument and passing a ``min_ioa`` value:
+
+.. code-block:: python
+
+    parent_preds_df = my_runner.convert_to_parent_pixel_bounds(return_dataframe=True, deduplicate=True, min_ioa=0.7)
+
+This will help resolve any issues with predictions being cut-off at the edges of patches since the overlap should help find the full piece of text.
+
 Again, to view the predictions, you can use the ``show`` method.
 You should pass a parent image ID as the ``image_id`` argument:
 

diff --git a/docs/source/figures/IoA.png b/docs/source/figures/IoA.png
diff --git a/docs/source/figures/IoA_0.9.png b/docs/source/figures/IoA_0.9.png
diff --git a/mapreader/load/images.py b/mapreader/load/images.py
@@ -990,6 +990,7 @@ def patchify_all(
         output_format: str | None = "png",
         rewrite: bool | None = False,
         verbose: bool | None = False,
+        overlap: int = 0,
     ) -> None:
         """
         Patchify all images in the specified ``tree_level`` and (if ``add_to_parents=True``) add the patches to the MapImages instance's ``images`` dictionary.
@@ -1024,6 +1025,8 @@ def patchify_all(
         verbose : bool, optional
             If True, progress updates will be printed throughout, by default
             ``False``.
+        overlap : int, optional
+            Fractional overlap between patches, by default ``0``.
 
         Returns
         -------
@@ -1088,6 +1091,7 @@ def patchify_all(
                     output_format=output_format,
                     rewrite=rewrite,
                     verbose=verbose,
+                    overlap=overlap,
                 )
 
     def _patchify_by_pixel(
@@ -1100,6 +1104,7 @@ def _patchify_by_pixel(
         output_format: str | None = "png",
         rewrite: bool | None = False,
         verbose: bool | None = False,
+        overlap: int | None = 0,
     ):
         """Patchify one image and (if ``add_to_parents=True``) add the patch to the MapImages instance's ``images`` dictionary.
 
@@ -1123,6 +1128,8 @@ def _patchify_by_pixel(
         verbose : bool, optional
             If True, progress updates will be printed throughout, by default
             ``False``.
+        overlap : int, optional
+            Fractional overlap between patches, by default ``0``.
         """
         tree_level = self._get_tree_level(image_id)
 
@@ -1140,15 +1147,14 @@ def _patchify_by_pixel(
 
         height, width = img.height, img.width
 
-        for x in range(0, width, patch_size):
-            for y in range(0, height, patch_size):
+        x = 0
+        while x < width:
+            y = 0
+            while y < height:
                 max_x = min(x + patch_size, width)
                 max_y = min(y + patch_size, height)
 
-                min_x = x
-                min_y = y
-
-                patch_id = f"patch-{min_x}-{min_y}-{max_x}-{max_y}-#{image_id}#.{output_format}"
+                patch_id = f"patch-{x}-{y}-{max_x}-{max_y}-#{image_id}#.{output_format}"
                 patch_path = os.path.join(path_save, patch_id)
                 patch_path = os.path.abspath(patch_path)
 
@@ -1158,7 +1164,7 @@ def _patchify_by_pixel(
                     )
 
                 else:
-                    patch = img.crop((min_x, min_y, max_x, max_y))
+                    patch = img.crop((x, y, max_x, max_y))
                     if max_x == width:
                         patch = ImageOps.pad(
                             patch, (patch_size, patch.height), centering=(0, 0)
@@ -1181,11 +1187,15 @@ def _patchify_by_pixel(
                         image_path=patch_path,
                         parent_path=parent_path,
                         tree_level="patch",
-                        pixel_bounds=(min_x, min_y, max_x, max_y),
+                        pixel_bounds=(x, y, max_x, max_y),
                     )
                     self._add_patch_coords_id(patch_id)
                     self._add_patch_polygons_id(patch_id)
 
+                overlap_pixels = int(patch_size * overlap)
+                y = y + patch_size - overlap_pixels
+            x = x + patch_size - overlap_pixels
+
     def _patchify_by_pixel_square(
         self,
         image_id: str,