Creating testing workflow for github actions

weecology · Aug 8, 2024 · 697286f · 697286f
1 parent ff3cfad
commit 697286f
Show file tree

Hide file tree

Showing 29 changed files with 855 additions and 462 deletions.
diff --git a/.github/workflows/.readthedocs.yml b/.github/workflows/.readthedocs.yml
@@ -0,0 +1,12 @@
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.7"
+
+python:
+  install:
+    - requirements: requirements.txt
+submodules:
+  include: all
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,56 @@
+name: Python package
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+
+jobs:
+  ci:
+    name: "CI setup"
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.11"
+
+        os:
+          - "ubuntu-latest"
+
+    runs-on: "${{ matrix.os }}"
+
+    # use bash everywhere
+    defaults:
+      run:
+        shell: "bash -l {0}"
+
+    steps:
+      - name: "Checkout code"
+        uses: "actions/checkout@v4"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          
+      - name: Run pytest
+        run: pytest -v
+
+      - name: Check style
+        run: yapf -d --recursive milliontrees/ --style=.style.yapf
+
+      #- name: Check notebook style 
+      #  run: nbqa yapf --in-place docs/examples/*.ipynb --style=.style.yapf
+
+      # - name: Check notebook build
+      #   run: pytest --nbmake docs/*.ipynb
+
+      #- name: Run docformatter
+      #  run: docformatter --check --recursive milliontrees/
+
+      #- name: Test Docs
+      #  run: |
+      #    cd docs && make clean && make html SPHINXOPTS="-W"
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.style.yapf b/.style.yapf
@@ -0,0 +1,4 @@
+[style]
+# YAPF uses the chromium style
+based_on_style = google
+COLUMN_LIMIT = 80
diff --git a/README.md b/README.md
@@ -1,7 +1,9 @@
+[![Github Actions](https://github.com/weecology/MillionTrees/actions/workflows/python-package.yml/badge.svg)](https://github.com/weecology/MillionTrees/actions/workflows/python-package.yml)
 
+[![Documentation Status](https://readthedocs.org/projects/MillionTrees/badge/?version=latest)](http://MillionTrees.readthedocs.io/en/latest/?badge=latest)
+[![Version](https://img.shields.io/pypi/v/MillionTrees.svg)](https://pypi.python.org/pypi/MillionTrees)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/MillionTrees)](https://pypi.python.org/pypi/MillionTrees)
 
-[![PyPI](https://img.shields.io/pypi/v/wilds)](https://pypi.org/project/wilds/)
-[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/p-lambda/wilds/blob/master/LICENSE)
 
 # Overview
 

diff --git a/data_prep/ArcGISRestServer.py b/data_prep/ArcGISRestServer.py
@@ -4,19 +4,21 @@
 import sys
 import time
 
+#"usda":"https://gis.apfo.usda.gov/arcgis/rest/services/NAIP/USDA_CONUS_PRIME/ImageServer"
+
 sites = {
     "calgary": "https://gis.calgary.ca/arcgis/rest/services/pub_Orthophotos/CurrentOrthophoto/ImageServer/exportImage?bbox=-7403.92%2C5641545.49%2C-7236.74%2C5641645.36&bboxSR=&size=&imageSR=&time=&format=tif&pixelType=U8&noData=&noDataInterpretation=esriNoDataMatchAny&interpolation=+RSP_BilinearInterpolation&compression=&compressionQuality=&bandIds=&sliceId=&mosaicRule=&renderingRule=&adjustAspectRatio=true&validateExtent=false&lercVersion=1&compressionTolerance=&f=html",
     "charlottesville":"https://gismaps.vdem.virginia.gov/arcgis/rest/services/VBMP_Imagery/MostRecentImagery_WGS_Tile_Index/MapServer",
     "bloomington":"https://imageserver.gisdata.mn.gov/cgi-bin/wms?",
-    "new_york":"https://orthos.its.ny.gov/arcgis/rest/services/wms/Latest/MapServer",
-    "Washington_DC":"https://www.arcgis.com/apps/mapviewer/index.html?panel=gallery&layers=586a475ec2b74defa1ac2ec3221a6aaf", # Not clear if this is the right link
-    "Edmonton":"https://gis.edmonton.ca/site1/rest/services/Imagery_Public/2020_Spring_RGB_Pictometry_Tiled/MapServer", # Also downloadable latest from https://drive.google.com/drive/folders/1c8rapuHkDuec_HzQoK27Bl3EIKovtDut
+    "new_york":"https://orthos.its.ny.gov/arcgis/rest/services/2018_4Band/ImageServer",
+    "Washington_DC":"https://imagery.dcgis.dc.gov/dcgis/rest/services/Ortho/Ortho_2023/ImageServer",
+    "Edmonton":"https://gis.edmonton.ca/site1/rest/services/Imagery_Public/2019_RGB_Pictometry/ImageServer", # Also downloadable latest from https://drive.google.com/drive/folders/1c8rapuHkDuec_HzQoK27Bl3EIKovtDut
     "Pittsurgh":"https://imagery.pasda.psu.edu/arcgis/services/pasda/AlleghenyCountyImagery2017/MapServer/WMSServer?SERVICE=WMS&request=getcapabilities",#3 inch but leaf off
     "Sioux_Falls":"https://siouxfalls-my.sharepoint.com/personal/cityofsfgis_siouxfalls_gov/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fcityofsfgis%5Fsiouxfalls%5Fgov%2FDocuments%2FOpen%20Data%2FImagery%2F2023%2Fthree%2Dinch", # 3 inch but leaf off
     "Vancouver":"https://opendata.vancouver.ca/explore/dataset/orthophoto-imagery-2015/table/?location=12,49.25627,-123.1388",
-    "Minneapolis":"https://gis.hennepin.us/arcgis/rest/services/Imagery/UTM_Aerial_2018/MapServer",
-    "Tempe":"https://data.tempe.gov/maps/cea011e890c847d297678a3176869bf3/explore?location=33.395241%2C-111.926973%2C19.00",
+    "Tempe":"https://data.tempe.gov/maps/cea011e890c847d297678a3176869bf3/explore?location=33.395241%2C-111.926973%2C19.00", # Can't download
     "Massachusetts":"https://tiles.arcgis.com/tiles/hGdibHYSPO59RG1h/arcgis/rest/services/orthos2021/MapServer",
+    "Hennepin":"https://gis.hennepin.us/arcgis/rest/services/Imagery/UTM_Aerial_2022/MapServer",
     }
 
 # Oregon server https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2022

diff --git a/milliontrees/__init__.py b/milliontrees/__init__.py
@@ -7,8 +7,7 @@
     'TreePolygons',
 ]
 
-additional_datasets = [
-]
+additional_datasets = []
 
 supported_datasets = benchmark_datasets + additional_datasets
 
@@ -17,8 +16,5 @@
 ]
 
 unlabeled_splits = [
-    'train_unlabeled',
-    'val_unlabeled',
-    'test_unlabeled',
-    'extra_unlabeled'
-]
+    'train_unlabeled', 'val_unlabeled', 'test_unlabeled', 'extra_unlabeled'
+]
diff --git a/milliontrees/common/data_loaders.py b/milliontrees/common/data_loaders.py
@@ -3,8 +3,15 @@
 from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler
 from milliontrees.common.utils import get_counts, split_into_groups
 
-def get_train_loader(loader, dataset, batch_size,
-        uniform_over_groups=None, grouper=None, distinct_groups=True, n_groups_per_batch=None, **loader_kwargs):
+
+def get_train_loader(loader,
+                     dataset,
+                     batch_size,
+                     uniform_over_groups=None,
+                     grouper=None,
+                     distinct_groups=True,
+                     n_groups_per_batch=None,
+                     **loader_kwargs):
     """
     Constructs and returns the data loader for training.
     Args:
@@ -29,24 +36,25 @@ def get_train_loader(loader, dataset, batch_size,
         if uniform_over_groups is None or not uniform_over_groups:
             return DataLoader(
                 dataset,
-                shuffle=True, # Shuffle training dataset
+                shuffle=True,  # Shuffle training dataset
                 sampler=None,
                 collate_fn=dataset.collate,
                 batch_size=batch_size,
                 **loader_kwargs)
         else:
             assert grouper is not None
             groups, group_counts = grouper.metadata_to_group(
-                dataset.metadata_array,
-                return_counts=True)
+                dataset.metadata_array, return_counts=True)
             group_weights = 1 / group_counts
             weights = group_weights[groups]
 
             # Replacement needs to be set to True, otherwise we'll run out of minority samples
-            sampler = WeightedRandomSampler(weights, len(dataset), replacement=True)
+            sampler = WeightedRandomSampler(weights,
+                                            len(dataset),
+                                            replacement=True)
             return DataLoader(
                 dataset,
-                shuffle=False, # The WeightedRandomSampler already shuffles
+                shuffle=False,  # The WeightedRandomSampler already shuffles
                 sampler=sampler,
                 collate_fn=dataset.collate,
                 batch_size=batch_size,
@@ -58,23 +66,25 @@ def get_train_loader(loader, dataset, batch_size,
         assert grouper is not None
         assert n_groups_per_batch is not None
         if n_groups_per_batch > grouper.n_groups:
-            raise ValueError(f'n_groups_per_batch was set to {n_groups_per_batch} but there are only {grouper.n_groups} groups specified.')
+            raise ValueError(
+                f'n_groups_per_batch was set to {n_groups_per_batch} but there are only {grouper.n_groups} groups specified.'
+            )
 
         group_ids = grouper.metadata_to_group(dataset.metadata_array)
-        batch_sampler = GroupSampler(
-            group_ids=group_ids,
-            batch_size=batch_size,
-            n_groups_per_batch=n_groups_per_batch,
-            uniform_over_groups=uniform_over_groups,
-            distinct_groups=distinct_groups)
+        batch_sampler = GroupSampler(group_ids=group_ids,
+                                     batch_size=batch_size,
+                                     n_groups_per_batch=n_groups_per_batch,
+                                     uniform_over_groups=uniform_over_groups,
+                                     distinct_groups=distinct_groups)
 
         return DataLoader(dataset,
-              shuffle=None,
-              sampler=None,
-              collate_fn=dataset.collate,
-              batch_sampler=batch_sampler,
-              drop_last=False,
-              **loader_kwargs)
+                          shuffle=None,
+                          sampler=None,
+                          collate_fn=dataset.collate,
+                          batch_sampler=batch_sampler,
+                          drop_last=False,
+                          **loader_kwargs)
+
 
 def get_eval_loader(loader, dataset, batch_size, grouper=None, **loader_kwargs):
     """
@@ -90,28 +100,35 @@ def get_eval_loader(loader, dataset, batch_size, grouper=None, **loader_kwargs):
     if loader == 'standard':
         return DataLoader(
             dataset,
-            shuffle=False, # Do not shuffle eval datasets
+            shuffle=False,  # Do not shuffle eval datasets
             sampler=None,
             collate_fn=dataset.collate,
             batch_size=batch_size,
             **loader_kwargs)
 
+
 class GroupSampler:
     """
         Constructs batches by first sampling groups,
         then sampling data from those groups.
         It drops the last batch if it's incomplete.
     """
+
     def __init__(self, group_ids, batch_size, n_groups_per_batch,
                  uniform_over_groups, distinct_groups):
 
         if batch_size % n_groups_per_batch != 0:
-            raise ValueError(f'batch_size ({batch_size}) must be evenly divisible by n_groups_per_batch ({n_groups_per_batch}).')
+            raise ValueError(
+                f'batch_size ({batch_size}) must be evenly divisible by n_groups_per_batch ({n_groups_per_batch}).'
+            )
         if len(group_ids) < batch_size:
-            raise ValueError(f'The dataset has only {len(group_ids)} examples but the batch size is {batch_size}. There must be enough examples to form at least one complete batch.')
+            raise ValueError(
+                f'The dataset has only {len(group_ids)} examples but the batch size is {batch_size}. There must be enough examples to form at least one complete batch.'
+            )
 
         self.group_ids = group_ids
-        self.unique_groups, self.group_indices, unique_counts = split_into_groups(group_ids)
+        self.unique_groups, self.group_indices, unique_counts = split_into_groups(
+            group_ids)
 
         self.distinct_groups = distinct_groups
         self.n_groups_per_batch = n_groups_per_batch
@@ -120,10 +137,11 @@ def __init__(self, group_ids, batch_size, n_groups_per_batch,
         self.dataset_size = len(group_ids)
         self.num_batches = self.dataset_size // batch_size
 
-        if uniform_over_groups: # Sample uniformly over groups
+        if uniform_over_groups:  # Sample uniformly over groups
             self.group_prob = None
-        else: # Sample a group proportionately to its size
-            self.group_prob = unique_counts.numpy() / unique_counts.numpy().sum()
+        else:  # Sample a group proportionately to its size
+            self.group_prob = unique_counts.numpy() / unique_counts.numpy().sum(
+            )
 
     def __iter__(self):
         for batch_id in range(self.num_batches):
@@ -137,9 +155,10 @@ def __iter__(self):
                 np.random.choice(
                     self.group_indices[group],
                     size=self.n_points_per_group,
-                    replace=len(self.group_indices[group]) <= self.n_points_per_group, # False if the group is larger than the sample size
-                    p=None)
-                for group in groups_for_batch]
+                    replace=len(self.group_indices[group]) <= self.
+                    n_points_per_group,  # False if the group is larger than the sample size
+                    p=None) for group in groups_for_batch
+            ]
 
             # Flatten
             sampled_ids = np.concatenate(sampled_ids)