Skip to content

Commit

Permalink
Creating testing workflow for github actions
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Aug 8, 2024
1 parent ff3cfad commit 697286f
Show file tree
Hide file tree
Showing 29 changed files with 855 additions and 462 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/.readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2

build:
os: ubuntu-22.04
tools:
python: "3.7"

python:
install:
- requirements: requirements.txt
submodules:
include: all
56 changes: 56 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Python package

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]


jobs:
ci:
name: "CI setup"

strategy:
fail-fast: false
matrix:
python-version:
- "3.11"

os:
- "ubuntu-latest"

runs-on: "${{ matrix.os }}"

# use bash everywhere
defaults:
run:
shell: "bash -l {0}"

steps:
- name: "Checkout code"
uses: "actions/checkout@v4"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run pytest
run: pytest -v

- name: Check style
run: yapf -d --recursive milliontrees/ --style=.style.yapf

#- name: Check notebook style
# run: nbqa yapf --in-place docs/examples/*.ipynb --style=.style.yapf

# - name: Check notebook build
# run: pytest --nbmake docs/*.ipynb

#- name: Run docformatter
# run: docformatter --check --recursive milliontrees/

#- name: Test Docs
# run: |
# cd docs && make clean && make html SPHINXOPTS="-W"
39 changes: 39 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package

on:
release:
types: [published]

permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
4 changes: 4 additions & 0 deletions .style.yapf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[style]
# YAPF uses the chromium style
based_on_style = google
COLUMN_LIMIT = 80
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
[![Github Actions](https://github.com/weecology/MillionTrees/actions/workflows/python-package.yml/badge.svg)](https://github.com/weecology/MillionTrees/actions/workflows/python-package.yml)

[![Documentation Status](https://readthedocs.org/projects/MillionTrees/badge/?version=latest)](http://MillionTrees.readthedocs.io/en/latest/?badge=latest)
[![Version](https://img.shields.io/pypi/v/MillionTrees.svg)](https://pypi.python.org/pypi/MillionTrees)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/MillionTrees)](https://pypi.python.org/pypi/MillionTrees)

[![PyPI](https://img.shields.io/pypi/v/wilds)](https://pypi.org/project/wilds/)
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/p-lambda/wilds/blob/master/LICENSE)

# Overview

Expand Down
12 changes: 7 additions & 5 deletions data_prep/ArcGISRestServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,21 @@
import sys
import time

#"usda":"https://gis.apfo.usda.gov/arcgis/rest/services/NAIP/USDA_CONUS_PRIME/ImageServer"

sites = {
"calgary": "https://gis.calgary.ca/arcgis/rest/services/pub_Orthophotos/CurrentOrthophoto/ImageServer/exportImage?bbox=-7403.92%2C5641545.49%2C-7236.74%2C5641645.36&bboxSR=&size=&imageSR=&time=&format=tif&pixelType=U8&noData=&noDataInterpretation=esriNoDataMatchAny&interpolation=+RSP_BilinearInterpolation&compression=&compressionQuality=&bandIds=&sliceId=&mosaicRule=&renderingRule=&adjustAspectRatio=true&validateExtent=false&lercVersion=1&compressionTolerance=&f=html",
"charlottesville":"https://gismaps.vdem.virginia.gov/arcgis/rest/services/VBMP_Imagery/MostRecentImagery_WGS_Tile_Index/MapServer",
"bloomington":"https://imageserver.gisdata.mn.gov/cgi-bin/wms?",
"new_york":"https://orthos.its.ny.gov/arcgis/rest/services/wms/Latest/MapServer",
"Washington_DC":"https://www.arcgis.com/apps/mapviewer/index.html?panel=gallery&layers=586a475ec2b74defa1ac2ec3221a6aaf", # Not clear if this is the right link
"Edmonton":"https://gis.edmonton.ca/site1/rest/services/Imagery_Public/2020_Spring_RGB_Pictometry_Tiled/MapServer", # Also downloadable latest from https://drive.google.com/drive/folders/1c8rapuHkDuec_HzQoK27Bl3EIKovtDut
"new_york":"https://orthos.its.ny.gov/arcgis/rest/services/2018_4Band/ImageServer",
"Washington_DC":"https://imagery.dcgis.dc.gov/dcgis/rest/services/Ortho/Ortho_2023/ImageServer",
"Edmonton":"https://gis.edmonton.ca/site1/rest/services/Imagery_Public/2019_RGB_Pictometry/ImageServer", # Also downloadable latest from https://drive.google.com/drive/folders/1c8rapuHkDuec_HzQoK27Bl3EIKovtDut
"Pittsurgh":"https://imagery.pasda.psu.edu/arcgis/services/pasda/AlleghenyCountyImagery2017/MapServer/WMSServer?SERVICE=WMS&request=getcapabilities",#3 inch but leaf off
"Sioux_Falls":"https://siouxfalls-my.sharepoint.com/personal/cityofsfgis_siouxfalls_gov/_layouts/15/onedrive.aspx?ga=1&id=%2Fpersonal%2Fcityofsfgis%5Fsiouxfalls%5Fgov%2FDocuments%2FOpen%20Data%2FImagery%2F2023%2Fthree%2Dinch", # 3 inch but leaf off
"Vancouver":"https://opendata.vancouver.ca/explore/dataset/orthophoto-imagery-2015/table/?location=12,49.25627,-123.1388",
"Minneapolis":"https://gis.hennepin.us/arcgis/rest/services/Imagery/UTM_Aerial_2018/MapServer",
"Tempe":"https://data.tempe.gov/maps/cea011e890c847d297678a3176869bf3/explore?location=33.395241%2C-111.926973%2C19.00",
"Tempe":"https://data.tempe.gov/maps/cea011e890c847d297678a3176869bf3/explore?location=33.395241%2C-111.926973%2C19.00", # Can't download
"Massachusetts":"https://tiles.arcgis.com/tiles/hGdibHYSPO59RG1h/arcgis/rest/services/orthos2021/MapServer",
"Hennepin":"https://gis.hennepin.us/arcgis/rest/services/Imagery/UTM_Aerial_2022/MapServer",
}

# Oregon server https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2022
Expand Down
10 changes: 3 additions & 7 deletions milliontrees/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
'TreePolygons',
]

additional_datasets = [
]
additional_datasets = []

supported_datasets = benchmark_datasets + additional_datasets

Expand All @@ -17,8 +16,5 @@
]

unlabeled_splits = [
'train_unlabeled',
'val_unlabeled',
'test_unlabeled',
'extra_unlabeled'
]
'train_unlabeled', 'val_unlabeled', 'test_unlabeled', 'extra_unlabeled'
]
79 changes: 49 additions & 30 deletions milliontrees/common/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@
from torch.utils.data.sampler import WeightedRandomSampler, SubsetRandomSampler
from milliontrees.common.utils import get_counts, split_into_groups

def get_train_loader(loader, dataset, batch_size,
uniform_over_groups=None, grouper=None, distinct_groups=True, n_groups_per_batch=None, **loader_kwargs):

def get_train_loader(loader,
dataset,
batch_size,
uniform_over_groups=None,
grouper=None,
distinct_groups=True,
n_groups_per_batch=None,
**loader_kwargs):
"""
Constructs and returns the data loader for training.
Args:
Expand All @@ -29,24 +36,25 @@ def get_train_loader(loader, dataset, batch_size,
if uniform_over_groups is None or not uniform_over_groups:
return DataLoader(
dataset,
shuffle=True, # Shuffle training dataset
shuffle=True, # Shuffle training dataset
sampler=None,
collate_fn=dataset.collate,
batch_size=batch_size,
**loader_kwargs)
else:
assert grouper is not None
groups, group_counts = grouper.metadata_to_group(
dataset.metadata_array,
return_counts=True)
dataset.metadata_array, return_counts=True)
group_weights = 1 / group_counts
weights = group_weights[groups]

# Replacement needs to be set to True, otherwise we'll run out of minority samples
sampler = WeightedRandomSampler(weights, len(dataset), replacement=True)
sampler = WeightedRandomSampler(weights,
len(dataset),
replacement=True)
return DataLoader(
dataset,
shuffle=False, # The WeightedRandomSampler already shuffles
shuffle=False, # The WeightedRandomSampler already shuffles
sampler=sampler,
collate_fn=dataset.collate,
batch_size=batch_size,
Expand All @@ -58,23 +66,25 @@ def get_train_loader(loader, dataset, batch_size,
assert grouper is not None
assert n_groups_per_batch is not None
if n_groups_per_batch > grouper.n_groups:
raise ValueError(f'n_groups_per_batch was set to {n_groups_per_batch} but there are only {grouper.n_groups} groups specified.')
raise ValueError(
f'n_groups_per_batch was set to {n_groups_per_batch} but there are only {grouper.n_groups} groups specified.'
)

group_ids = grouper.metadata_to_group(dataset.metadata_array)
batch_sampler = GroupSampler(
group_ids=group_ids,
batch_size=batch_size,
n_groups_per_batch=n_groups_per_batch,
uniform_over_groups=uniform_over_groups,
distinct_groups=distinct_groups)
batch_sampler = GroupSampler(group_ids=group_ids,
batch_size=batch_size,
n_groups_per_batch=n_groups_per_batch,
uniform_over_groups=uniform_over_groups,
distinct_groups=distinct_groups)

return DataLoader(dataset,
shuffle=None,
sampler=None,
collate_fn=dataset.collate,
batch_sampler=batch_sampler,
drop_last=False,
**loader_kwargs)
shuffle=None,
sampler=None,
collate_fn=dataset.collate,
batch_sampler=batch_sampler,
drop_last=False,
**loader_kwargs)


def get_eval_loader(loader, dataset, batch_size, grouper=None, **loader_kwargs):
"""
Expand All @@ -90,28 +100,35 @@ def get_eval_loader(loader, dataset, batch_size, grouper=None, **loader_kwargs):
if loader == 'standard':
return DataLoader(
dataset,
shuffle=False, # Do not shuffle eval datasets
shuffle=False, # Do not shuffle eval datasets
sampler=None,
collate_fn=dataset.collate,
batch_size=batch_size,
**loader_kwargs)


class GroupSampler:
"""
Constructs batches by first sampling groups,
then sampling data from those groups.
It drops the last batch if it's incomplete.
"""

def __init__(self, group_ids, batch_size, n_groups_per_batch,
uniform_over_groups, distinct_groups):

if batch_size % n_groups_per_batch != 0:
raise ValueError(f'batch_size ({batch_size}) must be evenly divisible by n_groups_per_batch ({n_groups_per_batch}).')
raise ValueError(
f'batch_size ({batch_size}) must be evenly divisible by n_groups_per_batch ({n_groups_per_batch}).'
)
if len(group_ids) < batch_size:
raise ValueError(f'The dataset has only {len(group_ids)} examples but the batch size is {batch_size}. There must be enough examples to form at least one complete batch.')
raise ValueError(
f'The dataset has only {len(group_ids)} examples but the batch size is {batch_size}. There must be enough examples to form at least one complete batch.'
)

self.group_ids = group_ids
self.unique_groups, self.group_indices, unique_counts = split_into_groups(group_ids)
self.unique_groups, self.group_indices, unique_counts = split_into_groups(
group_ids)

self.distinct_groups = distinct_groups
self.n_groups_per_batch = n_groups_per_batch
Expand All @@ -120,10 +137,11 @@ def __init__(self, group_ids, batch_size, n_groups_per_batch,
self.dataset_size = len(group_ids)
self.num_batches = self.dataset_size // batch_size

if uniform_over_groups: # Sample uniformly over groups
if uniform_over_groups: # Sample uniformly over groups
self.group_prob = None
else: # Sample a group proportionately to its size
self.group_prob = unique_counts.numpy() / unique_counts.numpy().sum()
else: # Sample a group proportionately to its size
self.group_prob = unique_counts.numpy() / unique_counts.numpy().sum(
)

def __iter__(self):
for batch_id in range(self.num_batches):
Expand All @@ -137,9 +155,10 @@ def __iter__(self):
np.random.choice(
self.group_indices[group],
size=self.n_points_per_group,
replace=len(self.group_indices[group]) <= self.n_points_per_group, # False if the group is larger than the sample size
p=None)
for group in groups_for_batch]
replace=len(self.group_indices[group]) <= self.
n_points_per_group, # False if the group is larger than the sample size
p=None) for group in groups_for_batch
]

# Flatten
sampled_ids = np.concatenate(sampled_ids)
Expand Down
Loading

0 comments on commit 697286f

Please sign in to comment.