diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml new file mode 100644 index 00000000..ae3f6fe7 --- /dev/null +++ b/.github/workflows/unittests.yml @@ -0,0 +1,37 @@ +name: Run Unit Tests + +# Trigger the action on push or pull request +on: + # TODO: Specify appropriate branches + push: + branches: + - js/implement_unit_tests + pull_request: + branches: + - dev_push + +jobs: + test: + runs-on: ubuntu-latest + + steps: + # Check out repository + - name: Checkout repository + uses: actions/checkout@v3 + + # Set up Python + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + # Install dependencies + - name: Install dependencies + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + $CONDA/bin/conda env update --file tools/code/rdl-tools.yml --name base + + - name: Test with pytest + run: | + conda install pytest + $CONDA/bin/pytest diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..a72eb44c --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = tools/code/ \ No newline at end of file diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/code/__init__.py b/tools/code/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/code/common.py b/tools/code/common.py index 442c4d6a..cc3e902e 100644 --- a/tools/code/common.py +++ b/tools/code/common.py @@ -1,10 +1,9 @@ # This file includes common configuration elements used by the script. -import os -# Load env vars from dotenv file -from dotenv import dotenv_values +import os +from dotenv import dotenv_values, find_dotenv -config = dotenv_values(".env") +config = dotenv_values(find_dotenv()) DATA_DIR = config["DATA_DIR"] OUTPUT_DIR = config["OUTPUT_DIR"] CACHE_DIR = config["CACHE_DIR"] @@ -32,7 +31,7 @@ 'MENA': 'AFRICA', # Middle East and North Africa 'EAP': 'ASIA', # East Asia and Pacific 'SAR': 'ASIA', # South Asia - 'LCR': 'LAC', # Latin America and Caribbean 'ECA': 'ASIA', # East Europe and Central Asia + 'LCR': 'LAC', # Latin America and Caribbean 'Other': 'GLOBAL', # North America, Europe, Japan, Korea, Australia and New Zealand } \ No newline at end of file diff --git a/tools/code/damageFunctions.py b/tools/code/damageFunctions.py index 3f83cf40..63340d50 100644 --- a/tools/code/damageFunctions.py +++ b/tools/code/damageFunctions.py @@ -36,10 +36,8 @@ def damage_factor_builtup(x: np.array, wb_region: str): 'LAC': np.maximum(0.0, np.minimum(1.0, 1.04578 + (0.001490579 - 1.04578)/(1 + (x/0.5619431)**1.509554))), 'GLOBAL': np.maximum(0.0, np.minimum(1.0, 2.100049 + (-0.00003530885 - 2.100049)/(1 + (x/6.632485)**0.559315))), } - region = wb_to_region.get(wb_region) - if region not in function_mapping.keys(): - return np.maximum(0.0, np.minimum(1.0, 2.100049 + (-0.00003530885 - 2.100049)/(1 + (x/6.632485)**0.559315))) - return function_mapping.get(wb_to_region.get(wb_region)) + region = wb_to_region.get(wb_region, 'GLOBAL') + return function_mapping.get(region) # Floods (river and coastal) impact function over Agricultural areas @@ -60,9 +58,6 @@ def damage_factor_agri(x: np.array, wb_region: str): 'LAC': np.maximum(0.0, np.minimum(1.0, 1.876076 + (0.01855393 - 1.876076)/(1 + (x/5.08262)**0.7629432))), 'GLOBAL': np.maximum(0.0, np.minimum(1.0, 1.167022 + (-0.002602531 - 1.167022)/(1 + (x/1.398796)**1.246833))), } - - region = wb_to_region.get(wb_region) - if region not in function_mapping.keys(): - return np.maximum(0.0, np.minimum(1.0, 1.167022 + (-0.002602531 - 1.167022)/(1 + (x/1.398796)**1.246833))) - return function_mapping.get(wb_to_region.get(wb_region)) + region = wb_to_region.get(wb_region, 'GLOBAL') + return function_mapping.get(region) diff --git a/tools/code/rdl-tools.yml b/tools/code/rdl-tools.yml index a3c9b946..ac289856 100644 --- a/tools/code/rdl-tools.yml +++ b/tools/code/rdl-tools.yml @@ -1,5 +1,5 @@ # This file may be used to create an environment using: -# conda create --name rdl-tools --file rdl-tools.yml +# conda env create --name rdl-tools --file rdl-tools.yml name: rdl-tools channels: @@ -34,6 +34,7 @@ dependencies: - scipy - xarray - contextily + - pytest - pip - pip: - geopy \ No newline at end of file diff --git a/tools/code/runAnalysis.py b/tools/code/runAnalysis.py index 5bd5485c..5e2b6825 100644 --- a/tools/code/runAnalysis.py +++ b/tools/code/runAnalysis.py @@ -9,11 +9,10 @@ import rasterio import rioxarray as rxr from rasterstats import gen_zonal_stats, zonal_stats -from shapely.geometry import shape # Importing internal libraries import common -from input_utils import * +import input_utils from damageFunctions import mortality_factor, damage_factor_builtup, damage_factor_agri # Importing the libraries for parallel processing @@ -21,12 +20,14 @@ from functools import partial import multiprocess as mp +DATA_DIR = common.DATA_DIR +OUTPUT_DIR = common.OUTPUT_DIR warnings.filterwarnings("ignore", message="'GeoSeries.swapaxes' is deprecated", category=FutureWarning) # Defining functions for parallel processing of zonal_stats def chunks(iterable_data, n): - it_data = it.iter(iterable_data) - for chunk in it.iter(lambda: list(it.islice(it_data, n)), []): + it_data = iter(iterable_data) + for chunk in iter(lambda: list(it.islice(it_data, n)), []): yield chunk def zonal_stats_partial(feats, raster, stats="*", affine=None, nodata=None, all_touched=True): @@ -65,7 +66,7 @@ def process_exposure_data(country, exp_cat, exp_nam, exp_year, exp_folder, wb_re exp_ras = f"{exp_folder}/{country}_POP.tif" if not os.path.exists(exp_ras): print(f"Population data not found. Fetching data for {country}...") - fetch_population_data(country, exp_year) + input_utils.fetch_population_data(country, exp_year) if not os.path.exists(exp_ras): raise FileNotFoundError(f"Failed to fetch population data for {country}") damage_factor = mortality_factor @@ -74,16 +75,17 @@ def process_exposure_data(country, exp_cat, exp_nam, exp_year, exp_folder, wb_re exp_ras = f"{exp_folder}/{country}_BU.tif" if not os.path.exists(exp_ras): print(f"Built-up data not found. Fetching data for {country}...") - fetch_built_up_data(country) + input_utils.fetch_built_up_data(country) if not os.path.exists(exp_ras): raise FileNotFoundError(f"Failed to fetch built-up data for {country}") damage_factor = lambda x, region=wb_region: damage_factor_builtup(x, region) elif exp_cat == 'AGR': exp_ras = f"{exp_folder}/{country}_AGR.tif" + if not os.path.exists(exp_ras): print(f"Agriculture data not found. Fetching data for {country}...") - fetch_agri_data(country) + input_utils.fetch_agri_data(country) if not os.path.exists(exp_ras): raise FileNotFoundError(f"Failed to fetch agricultural data for {country}") damage_factor = lambda x, region=wb_region: damage_factor_agri(x, region) @@ -145,7 +147,7 @@ def run_analysis(country: str, haz_cat: str, period: str, scenario: str, valid_R # Fetch the ADM data print(f"Fetching ADM data for {country}, level {adm_level}") - adm_data = get_adm_data(country, adm_level) + adm_data = input_utils.get_adm_data(country, adm_level) if adm_data is None: raise ValueError(f"ADM data not available for {country}, level {adm_level}") @@ -204,7 +206,7 @@ def run_analysis(country: str, haz_cat: str, period: str, scenario: str, valid_R 'prob_RPs_LB':prob_RPs_LB, 'prob_RPs_UB':prob_RPs_UB, 'prob_RPs_Mean':prob_RPs_Mean}) - prob_RPs_df.to_csv(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_prob_RPs.csv"), index=False) + prob_RPs_df.to_csv(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_prob_RPs.csv"), index=False) # Computing the results for each RP n_valid_RPs_gt_1 = len(valid_RPs) > 1 @@ -297,7 +299,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena # Assign impact factor (this is F_i in the equations) haz_data = damage_factor(haz_data, wb_region) if save_check_raster: - haz_data.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_haz_imp_factor.tif")) + haz_data.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_haz_imp_factor.tif")) elif analysis_type == "Classes": # Assign bin values to raster data - Follows: x_{i-1} <= x_{i} < x_{i+1} bin_idx = np.digitize(haz_data, bin_seq) @@ -307,7 +309,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena affected_exp = exp_data.where(haz_data.data > 0, np.nan) if save_check_raster: - affected_exp.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_affected.tif")) + affected_exp.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_affected.tif")) # Conduct analyses for classes if analysis_type == "Classes": @@ -333,7 +335,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena impact_exp = affected_exp.data * haz_data # If save intermediate to disk is TRUE, then if save_check_raster: - impact_exp.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{period}_{scenario}_{rp}_{exp_cat}_impact.tif")) + impact_exp.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{period}_{scenario}_{rp}_{exp_cat}_impact.tif")) # Compute the impact per ADM level impact_exp_per_ADM = gen_zonal_stats(vectors=adm_data["geometry"], raster=impact_exp.data, stats=["sum"], affine=impact_exp.rio.transform(), nodata=np.nan) @@ -351,12 +353,17 @@ def result_df_reorder_columns(result_df, RPs, analysis_type, exp_cat, adm_level, Reorders the columns of result_df. """ # Re-ordering and dropping selected columns for better presentation of the results - if analysis_type == "Function": - all_RPs = ["RP" + str(rp) for rp in RPs] - all_exp = [x + f"_{exp_cat}_exp" for x in all_RPs] - all_imp = [x + f"_{exp_cat}_imp" for x in all_RPs] - col_order = all_adm_codes + all_adm_names + [f"ADM{adm_level}_{exp_cat}"] + all_exp + all_imp + ["geometry"] - result_df = result_df.loc[:, col_order] + + if analysis_type != "Function": + return result_df + + adm_column = f"ADM{adm_level}_{exp_cat}" + + all_RPs = ["RP" + str(rp) for rp in RPs] + all_exp = [x + f"_{exp_cat}_exp" for x in all_RPs] + all_imp = [x + f"_{exp_cat}_imp" for x in all_RPs] + col_order = all_adm_codes + all_adm_names + [adm_column] + all_exp + all_imp + ["geometry"] + result_df = result_df.loc[:, col_order] return result_df diff --git a/tools/tests/__init__.py b/tools/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/tests/code/__init__.py b/tools/tests/code/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/tests/code/test_damageFunctions.py b/tools/tests/code/test_damageFunctions.py new file mode 100644 index 00000000..38aa0298 --- /dev/null +++ b/tools/tests/code/test_damageFunctions.py @@ -0,0 +1,166 @@ +from tools.code.damageFunctions import mortality_factor, damage_factor_builtup, damage_factor_agri +import numpy as np +import pytest + + +def compare_outcomes(x,y): + return np.allclose(x, y, rtol=1e-5) + + +def test_mortality_factor(): + + # Case 1: Typical input values + x = np.array([0, 10, 50, 100, 200]) + expected = np.array([0.00176976, 0.0020376 , 0.00357871, 0.00722308, 0.02898486]) + result = mortality_factor(x) + compare_outcomes(result, expected) + + # TODO + # Case 2: Test edge cases + + # TODO + # Case 3: Test that clipping to 0 and 1 works out + + + # Case 4: Failing with Invalid input types + with pytest.raises(TypeError): + mortality_factor("invalid INPUT!") + + # Case 5: Failing with None passed + with pytest.raises(TypeError): + mortality_factor() + + with pytest.raises(TypeError): + mortality_factor(None) + + +def test_damage_factor_builtup(): + + # We will reuse this np array through the tests + x = np.array([0, 1, 5, 10, 15, 25, 50, 100, 1000]) + test_outcomes = lambda x, y: np.allclose(x, y, rtol=1e-5) + + + ## Case 1: Test with sample values for African region + expected_africa = np.array([ + 0.00440468, 0.00622345, 0.0177687 , 0.03561978, + 0.05527796,0.09712188, 0.20376833, 0.39173692, 1. + ]) + + # Case 1a: Passing 'AFR' + result_1a = damage_factor_builtup(x, 'AFR') + assert test_outcomes(result_1a, expected_africa) + + # Case 1b: Passing 'MENA' + result_1b = damage_factor_builtup(x, 'MENA') + assert test_outcomes(result_1b, expected_africa) + + + ## Case 2: Test with sample values for Asian region + expected_asia = np.array([ + 0.0025538 , 0.01040312, 0.0415447 , 0.07877379, + 0.11401381,0.17890422, 0.31396794, 0.50474888, 1. + ]) + + # Case 2a: Passing 'EAP' + result_2a = damage_factor_builtup(x, 'EAP') + assert test_outcomes(result_2a, expected_asia) + + # Case 2b: Passing 'SAR' + result_2b = damage_factor_builtup(x, 'SAR') + assert test_outcomes(result_2b, expected_asia) + + # Case 2c: Passing 'ECA' + result_2c = damage_factor_builtup(x, 'ECA') + assert test_outcomes(result_2c, expected_asia) + + + # Case 3: Test with sample values for LAC region + expected_lac = np.array([ + 0.00149058, 0.00387057, 0.02788907, 0.07329972, + 0.12665801, 0.23903597, 0.47772589, 0.73745742, 1. + ]) + + # Only one LAC so far + result_3 = damage_factor_builtup(x, 'LCR') + assert test_outcomes(result_3, expected_lac) + + + # Case 4: Test with sample values for GLOBAL region + expected_global = np.array([ + 0. , 0.05400394, 0.12809041, 0.18346507, + 0.22516733,0.2893803 , 0.40031077, 0.54105391, 1. + ]) + + # Case 4a: Passing 'Other' + result_4a = damage_factor_builtup(x, 'Other') + assert test_outcomes(result_4a, expected_global) + + # Case 4b: Passing another variable that is not in dict, should default to GLOBAL + result_4b = damage_factor_builtup(x, 'ANY_OTHER_REGION') + assert test_outcomes(result_4b, expected_global) + + +def test_damage_factor_agri(): + + # We will reuse this np array through the tests + x = np.array([0, 1, 5, 10, 15, 25, 50, 100, 1000]) + + # Case 1: Test AFRICA + expected_africa = np.array([ + 0.01417282, 0.01447255, 0.01860892, 0.02827435, + 0.04179812,0.07796918, 0.2039562 , 0.50278286, 1. + ]) + + # Case 1a: Passing 'AFR' + result_1a = damage_factor_agri(x, 'AFR') + assert compare_outcomes(result_1a, expected_africa) + + # Case 1b: Passing 'MENA' + result_1b = damage_factor_agri(x, 'MENA') + assert compare_outcomes(result_1b, expected_africa) + + + # Case 2: Test ASIA + expected_asia = np.array([ + 0. , 0.00426 , 0.02108523, 0.04164555, + 0.06170034,0.1003661 , 0.1893709 , 0.34022844, 1. + ]) + + # Case 2a: Passing 'EAP' + result_2a = damage_factor_agri(x, 'EAP') + assert compare_outcomes(result_2a, expected_asia) + + # Case 2b: Passing 'SAR' + result_2b = damage_factor_agri(x, 'SAR') + assert compare_outcomes(result_2b, expected_asia) + + # Case 2c: Passing 'ECA' + result_2c = damage_factor_agri(x, 'ECA') + assert compare_outcomes(result_2c, expected_asia) + + + # Case 3: Test with sample values for LAC region + expected_lac = np.array([ + 0.01855393, 0.03442536, 0.07164524, 0.10688757, + 0.13687177,0.18811284, 0.28907636, 0.43531503, 1. + ]) + + # Only one LAC so far + result_3 = damage_factor_agri(x, 'LCR') + assert compare_outcomes(result_3, expected_lac) + + + # Case 4: Test with sample values for GLOBAL region + expected_global = np.array([ + 0. , 0. , 0.015485 , 0.03943019, + 0.06547371, 0.11976183, 0.25131424, 0.46160653, 1. + ]) + + # Case 4a: Passing 'Other' + result_4a = damage_factor_agri(x, 'Other') + assert compare_outcomes(result_4a, expected_global) + + # Case 4b: Passing another variable that is not in dict, should default to GLOBAL + result_4b = damage_factor_agri(x, 'ANY_OTHER_REGION') + assert compare_outcomes(result_4b, expected_global) diff --git a/tools/tests/code/test_input_utils.py b/tools/tests/code/test_input_utils.py new file mode 100644 index 00000000..99dd86dd --- /dev/null +++ b/tools/tests/code/test_input_utils.py @@ -0,0 +1,35 @@ +import pytest +from tools.code.input_utils import get_layer_id_for_adm +from tools.code.common import rest_api_url +import requests + + +def test_get_layer_id_for_adm(): + + # Getting actual data + response = requests.get(f"{rest_api_url}/layers", params={'f': 'json'}) + layers_info = response.json().get('layers', []) + + # Case 1: Test ADM Level 1 + adm_level_1 = get_layer_id_for_adm(adm_level=1) + assert layers_info[adm_level_1]['name'] == 'WB_GAD_ADM1' + + # Case 2: Test ADM Level 2 + adm_level_2 = get_layer_id_for_adm(adm_level=2) + assert layers_info[adm_level_2]['name'] == 'WB_GAD_ADM2' + + # Case 3: Test ADM Level 0 + adm_level_0 = get_layer_id_for_adm(adm_level=0) + assert layers_info[adm_level_0]['name'] == 'WB_GAD_ADM0' + + + # Case 4: Testing 'incorrect' ADM level + with pytest.raises(ValueError) as exc_info: + _ = get_layer_id_for_adm(adm_level='xyz') + assert str(exc_info.value) == 'Layer matching WB_GAD_ADMxyz not found.' + + with pytest.raises(ValueError) as exc_info: + _ = get_layer_id_for_adm(adm_level=None) + assert str(exc_info.value) == 'Layer matching WB_GAD_ADMNone not found.' + +# TODO: Test get_adm_data function \ No newline at end of file diff --git a/tools/tests/code/test_runAnalysis.py b/tools/tests/code/test_runAnalysis.py new file mode 100644 index 00000000..85b641e0 --- /dev/null +++ b/tools/tests/code/test_runAnalysis.py @@ -0,0 +1,69 @@ +import pytest +import pandas as pd +from tools.code.runAnalysis import chunks, result_df_reorder_columns + + +def test_chunks(): + + # Test Case 1 + iterable = list(range(1,8)) + result = list(chunks(iterable, 3)) + expected = [[1,2,3], [4,5,6], [7]] + assert result == expected + + # Test Case 2 + iterable = list(range(1,7)) + result = list(chunks(iterable, 2)) + expected = [[1,2], [3,4], [5,6]] + assert result == expected + + +def test_result_df_reorder_columns(): + + df = pd.DataFrame({ + "geometry": ["geom1", "geom2"], + "ADM1_code": [1, 2], + "ADM1_name": ["Region1", "Region2"], + "ADM1_Category1": ["ADM1a", "ADM1b"], + "RP10_Category1_exp": [100, 200], + "RP10_Category1_imp": [50, 100], + "RP20_Category1_exp": [150, 250], + "RP20_Category1_imp": [75, 125], + }) + + # Test Case 1: Reorder column functions of type analysis + expected_df = pd.DataFrame({ + "ADM1_code": [1, 2], + "ADM1_name": ["Region1", "Region2"], + "ADM1_Category1": ["ADM1a", "ADM1b"], + "RP10_Category1_exp": [100, 200], + "RP20_Category1_exp": [150, 250], + "RP10_Category1_imp": [50, 100], + "RP20_Category1_imp": [75, 125], + "geometry": ["geom1", "geom2"] + }) + + result_df = result_df_reorder_columns( + df, RPs=[10, 20], analysis_type='Function', exp_cat='Category1', + adm_level=1, all_adm_codes=['ADM1_code'], all_adm_names=['ADM1_name'] + ) + + pd.testing.assert_frame_equal(result_df, expected_df) + + + # Test Case 2: No reordering becauase analysis_type is not function + result2_df = result_df_reorder_columns( + df, RPs=[10, 20], analysis_type='Classes', exp_cat='Category1', + adm_level=1, all_adm_codes=['ADM1_code'], all_adm_names=['ADM1_name'] + ) + + pd.testing.assert_frame_equal(result2_df, df) + + # Test Case 3: Error is through because of missing column + df_missing_col = df.drop(columns=['RP20_Category1_exp']) + with pytest.raises(KeyError) as exc_inf: + _ = result_df_reorder_columns( + df_missing_col, RPs=[10, 20], analysis_type='Function', exp_cat='Category1', + adm_level=1, all_adm_codes=['ADM1_code'], all_adm_names=['ADM1_name'] + ) + assert str(exc_inf.value) == '"[\'RP20_Category1_exp\'] not in index"'