diff --git a/.github/actions/setup_env/action.yml b/.github/actions/setup_env/action.yml index bdb289d082..f9ab9d108d 100644 --- a/.github/actions/setup_env/action.yml +++ b/.github/actions/setup_env/action.yml @@ -60,13 +60,14 @@ runs: cat environment.yml - name: Setup conda environment - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@v1 with: environment-file: environment.yml environment-name: env channels: conda-forge - cache-env: true - cache-env-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }} + init-shell: bash + cache-environment: true + cache-environment-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }} - name: List conda environment shell: bash -l {0} diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 8dbd8d12da..c1996c55b7 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -4,4 +4,4 @@ Fixes # ## Checklist - [ ] Updated HISTORY.rst and link to any relevant issue (if these changes are user-facing) - [ ] Updated the user's guide (if needed) -- [ ] Tested the affected models' UIs (if relevant) +- [ ] Tested the Workbench UI (if relevant) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index a09ce209a0..b24eb51d61 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -317,7 +317,7 @@ jobs: run: make userguide - name: Build binaries - run: make CONDA=micromamba binaries + run: make CONDA="$MAMBA_EXE" binaries - name: Run invest-autotest with binaries if : | @@ -344,7 +344,18 @@ jobs: yarn config set network-timeout 600000 -g yarn install - - name: Build Workbench + - name: Authenticate GCP + if: github.event_name != 'pull_request' + uses: google-github-actions/auth@v0 + with: + credentials_json: ${{ secrets.GOOGLE_SERVICE_ACC_KEY }} + + - name: Set up GCP + if: github.event_name != 'pull_request' + uses: google-github-actions/setup-gcloud@v0 + + - name: Build Workbench (PRs) + if: github.event_name == 'pull_request' working-directory: workbench env: GH_TOKEN: env.GITHUB_TOKEN @@ -354,19 +365,35 @@ jobs: yarn run build yarn run dist - - name: Test electron app with puppeteer + - name: Build Workbench (macOS) + if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR working-directory: workbench - run: npx cross-env CI=true yarn run test-electron-app + env: + GH_TOKEN: env.GITHUB_TOKEN + DEBUG: electron-builder + CSC_LINK: 2025-01-16-Expiry-AppStore-App.p12 + CSC_KEY_PASSWORD: ${{ secrets.MACOS_CODESIGN_CERT_PASS }} + run: | + gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK + yarn run build + yarn run dist - - name: Authenticate GCP - if: github.event_name != 'pull_request' - uses: google-github-actions/auth@v0 - with: - credentials_json: ${{ secrets.GOOGLE_SERVICE_ACC_KEY }} + - name: Build Workbench (Windows) + if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR + working-directory: workbench + env: + GH_TOKEN: env.GITHUB_TOKEN + DEBUG: electron-builder + CSC_LINK: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12 + CSC_KEY_PASSWORD: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }} + run: | + gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK + yarn run build + yarn run dist - - name: Set up GCP - if: github.event_name != 'pull_request' - uses: google-github-actions/setup-gcloud@v0 + - name: Test electron app with puppeteer + working-directory: workbench + run: npx cross-env CI=true yarn run test-electron-app - name: Sign binaries (macOS) if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR diff --git a/HISTORY.rst b/HISTORY.rst index 333b91975e..cc3d8a7391 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -35,12 +35,34 @@ .. :changelog: +3.14.0 (YYYY-MM-DD) +------------------- +* SDR + * We implemented two major functional changes to the InVEST LS Factor + that significantly affect most outputs of SDR and will bring the LS + factor output more in line with the outputs of SAGA-GIS's LS Factor. + A discussion of differences between these two implementations can be + viewed at https://github.com/natcap/invest/tree/main/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md. + The two specific changes implemented are: + + * The LS Factor's on-pixel aspect length is now calculated as + ``abs(sin(slope)) + abs(cos(slope))``. + * The LS Factor's upstream contributing area is now calculated as + an estimate for the specific catchment area, calculated by + ``sqrt(n_pixels_upstream * pixel_area)``. + Unreleased Changes ------------------ * General * Fixed a bug in the CLI where ``invest getspec --json`` failed on non-json-serializable objects such as ``pint.Unit``. https://github.com/natcap/invest/issues/1280 + * A new directory at `./doc/decision-records` has been created for + "Architecture/Any Decision Records", which will serve as a record of + nontrivial decisions that were made to InVEST and why. This is + intended for reference by our science and software teams, and also by + the community at large when inquiring about a nontrivial change. + https://github.com/natcap/invest/issues/1079 * Updated the package installation instructions in the API docs for clarity and also to highlight the ease of installation through ``conda-forge``. https://github.com/natcap/invest/issues/1256 @@ -48,10 +70,33 @@ Unreleased Changes has been merged into ``utils.read_csv_to_dataframe`` (`#1319 `_), (`#1327 `_) + * Improved the validation message that is returned when not all spatial + inputs overlap (`#502 `_) + * Standardized the name and location of the taskgraph cache directory for + all models. It is now called ``taskgraph_cache`` and located in the top + level of the workspace directory. + (`#1230 `_) * Workbench * Fixed a bug where sampledata downloads failed silently (and progress bar became innacurate) if the Workbench did not have write permission to the download location. https://github.com/natcap/invest/issues/1070 + * The workbench app is now distributed with a valid code signature + (`#727 `_) + * Changing the language setting will now cause the app to relaunch + (`#1168 `_) + * Closing the main window will now close any user's guide windows that are + open. Fixed a bug where the app could not be reopened after closing. + (`#1258 `_) + * Fixed a bug where invalid metadata for a recent run would result + in an uncaught exception. + (`#1286 `_) + * Middle clicking an InVEST model tab was opening a blank window. Now + middle clicking will close that tab as expected. + (`#1261 `_) +* Coastal Blue Carbon + * Added validation for the transition table, raising a validation error if + unexpected values are encountered. + (`#729 `_) * Forest Carbon * The biophysical table is now case-insensitive. * HRA @@ -59,7 +104,17 @@ Unreleased Changes consequence criteria were skipped for a single habitat. The model now correctly handles this case. https://github.com/natcap/invest/issues/1250 * Tables in the .xls format are no longer supported. This format was - deprecated by ``pandas``. (`#1271 `_) + deprecated by ``pandas``. + (`#1271 `_) + * Fixed a bug where vector inputs could be rasterized onto a grid that is + not exactly aligned with other raster inputs. + (`#1312 `_) +* NDR + * The contents of the output ``cache_dir`` have been consolidated into + ``intermediate_outputs``. + * Fixed a bug where results were calculated incorrectly if the runoff proxy + raster (or the DEM or LULC) had no nodata value + (`#1005 `_) * Pollination * Several exceptions have been tidied up so that only fieldnames are printed instead of the python data structures representing the whole @@ -85,6 +140,8 @@ Unreleased Changes * Fixed an issue with sediment deposition progress logging that was causing the "percent complete" indicator to not progress linearly. https://github.com/natcap/invest/issues/1262 + * The contents of the output ``churn_dir_not_for_humans`` have been + consolidated into ``intermediate_outputs``. * Seasonal Water Yield * Fixed a bug where monthy quickflow nodata pixels were not being passed on to the total quickflow raster, which could result in negative values @@ -96,18 +153,76 @@ Unreleased Changes set to 0. The old behavior was not well documented and caused some confusion when nodata pixels did not line up. It's safer not to fill in unknown data. (`#1317 `_) + * Negative monthly quickflow values will now be set to 0. This is because + very small negative values occasionally result from valid data, but they + should be interpreted as 0. + (`#1318 `_) + * In the monthly quickflow calculation, QF_im will be set to 0 on any pixel + where s_i / a_im > 100. This is done to avoid overflow errors when + calculating edge cases where the result would round down to 0 anyway. + (`#1318 `_) + * The contents of the output ``cache_dir`` have been consolidated into + ``intermediate_outputs``. * Urban Flood Risk * Fixed a bug where the model incorrectly raised an error if the biophysical table contained a row of all 0s. (`#1123 `_) + * The contents of the output ``temp_working_dir_not_for_humans`` have been + consolidated into ``intermediate_files``. + * Biophysical table Workbench validation now warns if there is a missing + curve number value. + (`#1346 `_) * Urban Nature Access + * Urban nature supply outputs have been renamed to add ``percapita`` to the + filename. + + * In uniform search radius mode, ``urban_nature_supply.tif`` has been + renamed to ``urban_nature_supply_percapita.tif``. + * When defining search radii by urban nature class, + ``urban_nature_supply_lucode_[LUCODE].tif`` has been renamed to + ``urban_nature_supply_percapita_lucode_[LUCODE].tif``. + * When defining search radii by population groups, + ``urban_nature_supply_to_[POP_GROUP].tif`` has been renamed to + ``urban_nature_supply_percapita_to_[POP_GROUP].tif``. + + * A new output for "Accessible Urban Nature" is created, indicating the + area of accessible greenspace available to people within the search + radius, weighted by the selected decay function. The outputs vary + slightly depending on the selected execution mode. + + * In uniform search radius mode, a single new output is created, + ``accessible_urban_nature.tif``. + * When defining search radii by urban nature class, one new + output raster is created for each class of urban nature. These files + are named ``accessible_urban_nature_lucode_[LUCODE].tif``. + * When defining search radii for population groups, one new output + raster is created for each population group. These files are named + ``accessible_urban_nature_to_[POP_GROUP].tif``. + + * Urban nature classes can now be defined to occupy a proportion of a + pixel, such as a park that is semi-developed. This proportion is + provided through user input as a proportion (0-1) in the + ``urban_nature`` column of the LULC Attribute Table. A value of ``0`` + indicates that there is no urban nature in this class, ``0.333`` + indicates that a third of the area of this LULC class is urban nature, + and ``1`` would indicate that the entire LULC class's area is urban + nature. https://github.com/natcap/invest/issues/1180 * Fixed an issue where, under certain circumstances, the model would raise a cryptic ``TypeError`` when creating the summary vector. https://github.com/natcap/invest/issues/1350 * Visitation: Recreation and Tourism * Fixed a bug where overlapping predictor polygons would be double-counted - in ``polygon_area_coverage`` and ``polygon_percent_coverage`` calculations. - (`#1310 `_) + in ``polygon_area_coverage`` and ``polygon_percent_coverage`` + calculations. (`#1310 `_) + * Changed the calculation of ``point_nearest_distance`` metric to match + the description in the User's Guide. Values are now the distance to the + centroid of the AOI polygon instead of the distance to the nearest + edge of the AOI polygon. + (`#1347 `_) +* Wind Energy + * Updated a misleading error message that is raised when the AOI does + not spatially overlap another input. + (`#1054 `_) 3.13.0 (2023-03-17) ------------------- diff --git a/Makefile b/Makefile index 6aa6f39b02..80db38d491 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ DATA_DIR := data GIT_SAMPLE_DATA_REPO := https://bitbucket.org/natcap/invest-sample-data.git GIT_SAMPLE_DATA_REPO_PATH := $(DATA_DIR)/invest-sample-data -GIT_SAMPLE_DATA_REPO_REV := a58b9c7bdd8a31cab469ea919fe0ebf23a6c668e +GIT_SAMPLE_DATA_REPO_REV := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05 GIT_TEST_DATA_REPO := https://bitbucket.org/natcap/invest-test-data.git GIT_TEST_DATA_REPO_PATH := $(DATA_DIR)/invest-test-data -GIT_TEST_DATA_REPO_REV := a89253d83d5f70a8ea2d8a951b2d47d603505f14 +GIT_TEST_DATA_REPO_REV := e7d32d65612f4f3578a4fb57824af4e297c65283 GIT_UG_REPO := https://github.com/natcap/invest.users-guide GIT_UG_REPO_PATH := doc/users-guide diff --git a/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md b/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md new file mode 100644 index 0000000000..6c61fb35db --- /dev/null +++ b/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md @@ -0,0 +1,94 @@ +# ADR-0001: Update the InVEST SDR LS Factor + +Author: James + +Science Lead: Rafa + +## Context + +Since we released the updated InVEST SDR model in InVEST 3.1.0, we have seen a +common refrain of users and NatCap science staff noticing that the LS factor +output of SDR did not produce realistic results and that the LS factor produced +by SAGA was much more realistic. We have over the years made a couple of notable +changes to the model and to the LS factor that have altered the output including: + +1. The SDR model's underlying routing model was changed from d-infinity to MFD in 3.5.0 +2. The $x$ parameter was changed in InVEST 3.8.1 from the true on-pixel aspect + $|\sin \theta|+|\cos \theta|$ (described in Zevenbergen & Thorne 1987 and repeated + in Desmet & Govers 1996) to the weighted mean of proportional flow from the + current pixel to its neighbors. +3. A typo in a constant value in the LS factor was corrected in InVEST 3.9.1 +4. An `l_max` parameter was exposed to the user in InVEST 3.9.1 + +Despite these changes to the LS factor, we still received occasional reports +describing unrealistic LS factor outputs from SDR and that SAGA's LS factor +was much more realistic. + +After diving into the SAGA source code, it turns out that there are several +important differences between the two despite both using Desmet & Govers (1996) +for their LS factor equations: + +1. The contributing area $A_{i,j-in}$ is not strictly defined in Desmet & + Govers (1996), it is only referred to as "the contributing area at the inlet + of a grid cell with coordinates (i, j) (m^2)". + InVEST assumes that "contributing area" is $area_{pixel} \cdot n\\_upstream\\_pixels$. + SAGA refers to this as "specific catchment area" and allows the user to choose their + specific catchment area equation, where the available options are + "contour length simply as cell size", "contour length dependent on aspect", "square + root of catchment area" and "effective flow length". +2. SAGA uses on-pixel aspect, $|\sin \theta|+|\cos \theta|$, and does not consider + flow direction derived from a routing model when calculating the LS factor. +3. The length exponent $m$ differs between the implementations. In SAGA, + $m = \beta / (1 + \beta)$. In InVEST, we have a discontinuous function where + $m$ is dependent on the slope of the current pixel and described as "classical USLE" + in the user's guide and discussed in Oliveira et al (2013). +4. SAGA's flow accumulation function [`Get_Flow()`](https://github.com/saga-gis/saga-gis/blob/master/saga-gis/src/tools/terrain_analysis/ta_hydrology/Erosion_LS_Fields.cpp#L394) + only considers a pixel downstream if and only if its elevation is strictly less + than the current pixel's elevation, which implies that flow accumulation will + not navigate plateaus. InVEST's flow accumulation handles plateaus well, + which can lead to longer flow accumulation values on the same DEM. +5. SAGA's flow accumulation function `Get_Flow()` uses D8, InVEST's flow + accumulation uses MFD. + +It is important to note that when evaluating differences between the SAGA and InVEST +LS Factor implementations, it is _critical_ to use a hydrologically conditioned DEM such +as conditioned by Wang & Liu so that we control for differences in output due +to the presence of plateaus. + +Once we finally understood these discrepancies, James implemented several of the +contributing area functions available in SAGA to see what might be most comparable +to the real world. Source code and a docker container for these experiments are +available at +https://github.com/phargogh/invest-ls-factor-vs-saga/blob/main/src/natcap/invest/sdr/sdr.py#L901. +Some additional discussion and notes can be viewed in the related github issue: +https://github.com/natcap/invest/issues/915. + +## Decision + +After inspecting the results, Rafa decided that we should make these changes to +the LS Factor calculation: + +1. We will revert to using the on-pixel aspect, $|\sin \theta|+|\cos \theta|$. + This is in line with the published literature. +2. We will convert the "contributing area" portion of the LS Factor to be + $\sqrt{ n\\_upstream\\_pixels \cdot area\_{pixel} }$. Rafa's opinion on this + is that the LS factor equations were designed for a 1-dimensional situation, + so our specific catchment area number should reflect this. + +## Status + +## Consequences + +Once implemented and released, the LS factor outputs of SDR will be +significantly different, but they should more closely match reality. + +We hope that there will be fewer support requests about this once the change is +released. + +## References + +Zevenbergen & Thorne (1987): https://searchworks.stanford.edu/articles/edb__89861226 + +Desmet & Govers (1996): https://searchworks.stanford.edu/articles/edsgac__edsgac.A18832564 + +Oliveira et al (2013): http://dx.doi.org/10.5772/54439 diff --git a/doc/decision-records/README.md b/doc/decision-records/README.md new file mode 100644 index 0000000000..cfec6ff039 --- /dev/null +++ b/doc/decision-records/README.md @@ -0,0 +1,12 @@ +# Architecture/Any Decision Records + +An ADR is a way to track decisions and their rationale in a way that is tied to +the source code, easy to digest, and written in a way that future us will +understand. An ADR consists of several sections: + +1. The title and ADR number (for easier sorting) +2. Context about the problem +3. The decision that was made and why +4. The status of implementation +5. Consequences of the implementation +6. Any references (especially if describing a science/software issue) diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py index 1fa5a9a1ee..8d9bafafae 100644 --- a/src/natcap/invest/annual_water_yield.py +++ b/src/natcap/invest/annual_water_yield.py @@ -87,10 +87,19 @@ } } SUBWATERSHED_OUTPUT_FIELDS = { + "subws_id": { + "type": "integer", + "about": gettext("Unique identifier for each subwatershed.") + }, **BASE_OUTPUT_FIELDS, - **SCARCITY_OUTPUT_FIELDS + **SCARCITY_OUTPUT_FIELDS, + } WATERSHED_OUTPUT_FIELDS = { + "ws_id": { + "type": "integer", + "about": gettext("Unique identifier for each watershed.") + }, **BASE_OUTPUT_FIELDS, **SCARCITY_OUTPUT_FIELDS, **VALUATION_OUTPUT_FIELDS @@ -209,6 +218,7 @@ "units": u.none, "about": gettext("Crop coefficient for this LULC class.")} }, + "index_col": "lucode", "about": gettext( "Table of biophysical parameters for each LULC class. All " "values in the LULC raster must have corresponding entries " @@ -239,6 +249,7 @@ "units": u.meter**3/u.year/u.pixel } }, + "index_col": "lucode", "required": False, "about": gettext( "A table of water demand for each LULC class. Each LULC code " @@ -310,6 +321,7 @@ "the time span.") } }, + "index_col": "ws_id", "required": False, "about": gettext( "A table mapping each watershed to the associated valuation " @@ -328,6 +340,7 @@ }, "watershed_results_wyield.csv": { "columns": {**WATERSHED_OUTPUT_FIELDS}, + "index_col": "ws_id", "about": "Table containing biophysical output values per watershed." }, "subwatershed_results_wyield.shp": { @@ -337,6 +350,7 @@ }, "subwatershed_results_wyield.csv": { "columns": {**SUBWATERSHED_OUTPUT_FIELDS}, + "index_col": "subws_id", "about": "Table containing biophysical output values per subwatershed." }, "per_pixel": { @@ -415,12 +429,12 @@ "veg.tif": { "about": "Map of vegetated state.", "bands": {1: {"type": "integer"}}, - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } } } - } + }, + "taskgraph_dir": spec_utils.TASKGRAPH_DIR } } @@ -509,23 +523,23 @@ def execute(args): if invalid_parameters: raise ValueError(f'Invalid parameters passed: {invalid_parameters}') - # valuation_params is passed to create_vector_output() - # which computes valuation if valuation_params is not None. - valuation_params = None + # valuation_df is passed to create_vector_output() + # which computes valuation if valuation_df is not None. + valuation_df = None if 'valuation_table_path' in args and args['valuation_table_path'] != '': LOGGER.info( 'Checking that watersheds have entries for every `ws_id` in the ' 'valuation table.') # Open/read in valuation parameters from CSV file - valuation_params = utils.read_csv_to_dataframe( - args['valuation_table_path'], 'ws_id').to_dict(orient='index') + valuation_df = utils.read_csv_to_dataframe( + args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path']) watershed_vector = gdal.OpenEx( args['watersheds_path'], gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() missing_ws_ids = [] for watershed_feature in watershed_layer: watershed_ws_id = watershed_feature.GetField('ws_id') - if watershed_ws_id not in valuation_params: + if watershed_ws_id not in valuation_df.index: missing_ws_ids.append(watershed_ws_id) watershed_feature = None watershed_layer = None @@ -587,7 +601,6 @@ def execute(args): seasonality_constant = float(args['seasonality_constant']) # Initialize a TaskGraph - work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -595,7 +608,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - graph = taskgraph.TaskGraph(work_token_dir, n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) base_raster_path_list = [ args['eto_path'], @@ -636,48 +650,43 @@ def execute(args): 'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]} # Open/read in the csv file into a dictionary and add to arguments - bio_dict = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') - bio_lucodes = set(bio_dict.keys()) + bio_df = utils.read_csv_to_dataframe(args['biophysical_table_path'], + MODEL_SPEC['args']['biophysical_table_path']) + bio_lucodes = set(bio_df.index.values) bio_lucodes.add(nodata_dict['lulc']) LOGGER.debug(f'bio_lucodes: {bio_lucodes}') if 'demand_table_path' in args and args['demand_table_path'] != '': - demand_dict = utils.read_csv_to_dataframe( - args['demand_table_path'], 'lucode').to_dict(orient='index') + demand_df = utils.read_csv_to_dataframe( + args['demand_table_path'], MODEL_SPEC['args']['demand_table_path']) demand_reclassify_dict = dict( - [(lucode, demand_dict[lucode]['demand']) - for lucode in demand_dict]) - demand_lucodes = set(demand_dict.keys()) + [(lucode, row['demand']) for lucode, row in demand_df.iterrows()]) + demand_lucodes = set(demand_df.index.values) demand_lucodes.add(nodata_dict['lulc']) LOGGER.debug(f'demand_lucodes: {demand_lucodes}', ) else: demand_lucodes = None - # Break the bio_dict into three separate dictionaries based on + # Break the bio_df into three separate dictionaries based on # Kc, root_depth, and LULC_veg fields to use for reclassifying Kc_dict = {} root_dict = {} vegetated_dict = {} - for lulc_code in bio_dict: - Kc_dict[lulc_code] = bio_dict[lulc_code]['kc'] + for lulc_code, row in bio_df.iterrows(): + Kc_dict[lulc_code] = row['kc'] # Catch invalid LULC_veg values with an informative error. - lulc_veg_value = bio_dict[lulc_code]['lulc_veg'] - try: - vegetated_dict[lulc_code] = int(lulc_veg_value) - if vegetated_dict[lulc_code] not in set([0, 1]): - raise ValueError() - except ValueError: + if row['lulc_veg'] not in set([0, 1]): # If the user provided an invalid LULC_veg value, raise an # informative error. raise ValueError( - f'LULC_veg value must be either 1 or 0, not {lulc_veg_value}') + f'LULC_veg value must be either 1 or 0, not {row["lulc_veg"]}') + vegetated_dict[lulc_code] = row['lulc_veg'] # If LULC_veg value is 1 get root depth value if vegetated_dict[lulc_code] == 1: - root_dict[lulc_code] = bio_dict[lulc_code]['root_depth'] + root_dict[lulc_code] = row['root_depth'] # If LULC_veg value is 0 then we do not care about root # depth value so will just substitute in a 1. This # value will not end up being used. @@ -843,7 +852,7 @@ def execute(args): write_output_vector_attributes_task = graph.add_task( func=write_output_vector_attributes, args=(target_ws_path, ws_id_name, zonal_stats_pickle_list, - valuation_params), + valuation_df), target_path_list=[target_ws_path], dependent_task_list=[ *zonal_stats_task_list, copy_watersheds_vector_task], @@ -879,7 +888,7 @@ def copy_vector(base_vector_path, target_vector_path): def write_output_vector_attributes(target_vector_path, ws_id_name, - stats_path_list, valuation_params): + stats_path_list, valuation_df): """Add data attributes to the vector outputs of this model. Join results of zonal stats to copies of the watershed shapefiles. @@ -893,7 +902,7 @@ def write_output_vector_attributes(target_vector_path, ws_id_name, represent watersheds or subwatersheds. stats_path_list (list): List of file paths to pickles storing the zonal stats results. - valuation_params (dict): The dictionary built from + valuation_df (pandas.DataFrame): dataframe built from args['valuation_table_path']. Or None if valuation table was not provided. @@ -929,10 +938,10 @@ def write_output_vector_attributes(target_vector_path, ws_id_name, _add_zonal_stats_dict_to_shape( target_vector_path, ws_stats_dict, key_name, 'mean') - if valuation_params: + if valuation_df is not None: # only do valuation for watersheds, not subwatersheds if ws_id_name == 'ws_id': - compute_watershed_valuation(target_vector_path, valuation_params) + compute_watershed_valuation(target_vector_path, valuation_df) def convert_vector_to_csv(base_vector_path, target_csv_path): @@ -1141,14 +1150,14 @@ def pet_op(eto_pix, Kc_pix, eto_nodata, output_nodata): return result -def compute_watershed_valuation(watershed_results_vector_path, val_dict): +def compute_watershed_valuation(watershed_results_vector_path, val_df): """Compute net present value and energy for the watersheds. Args: watershed_results_vector_path (string): Path to an OGR shapefile for the watershed results. Where the results will be added. - val_dict (dict): a python dictionary that has all the valuation + val_df (pandas.DataFrame): a dataframe that has all the valuation parameters for each watershed. Returns: @@ -1183,26 +1192,23 @@ def compute_watershed_valuation(watershed_results_vector_path, val_dict): # there won't be a rsupply_vl value if the polygon feature only # covers nodata raster values, so check before doing math. if rsupply_vl is not None: - # Get the valuation parameters for watershed 'ws_id' - val_row = val_dict[ws_id] - # Compute hydropower energy production (KWH) # This is from the equation given in the Users' Guide energy = ( - val_row['efficiency'] * val_row['fraction'] * - val_row['height'] * rsupply_vl * 0.00272) + val_df['efficiency'][ws_id] * val_df['fraction'][ws_id] * + val_df['height'][ws_id] * rsupply_vl * 0.00272) dsum = 0 # Divide by 100 because it is input at a percent and we need # decimal value - disc = val_row['discount'] / 100 + disc = val_df['discount'][ws_id] / 100 # To calculate the summation of the discount rate term over the life # span of the dam we can use a geometric series ratio = 1 / (1 + disc) if ratio != 1: - dsum = (1 - math.pow(ratio, val_row['time_span'])) / (1 - ratio) + dsum = (1 - math.pow(ratio, val_df['time_span'][ws_id])) / (1 - ratio) - npv = ((val_row['kw_price'] * energy) - val_row['cost']) * dsum + npv = ((val_df['kw_price'][ws_id] * energy) - val_df['cost'][ws_id]) * dsum # Get the volume field index and add value ws_feat.SetField(energy_field, energy) diff --git a/src/natcap/invest/carbon.py b/src/natcap/invest/carbon.py index b36a7b519c..624644c0b4 100644 --- a/src/natcap/invest/carbon.py +++ b/src/natcap/invest/carbon.py @@ -130,6 +130,7 @@ "units": u.metric_ton/u.hectare, "about": gettext("Carbon density of dead matter.")} }, + "index_col": "lucode", "about": gettext( "A table that maps each LULC code to carbon pool data for " "that LULC type."), @@ -254,10 +255,10 @@ "intermediate": { "type": "directory", "contents": { - **CARBON_OUTPUTS, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + **CARBON_OUTPUTS } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -366,11 +367,9 @@ def execute(args): (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), (_TMP_BASE_FILES, output_dir)], file_suffix) - carbon_pool_table = utils.read_csv_to_dataframe( - args['carbon_pools_path'], 'lucode').to_dict(orient='index') + carbon_pool_df = utils.read_csv_to_dataframe( + args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path']) - work_token_dir = os.path.join( - intermediate_output_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -378,7 +377,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. - graph = taskgraph.TaskGraph(work_token_dir, n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) cell_size_set = set() raster_size_set = set() @@ -413,9 +413,7 @@ def execute(args): carbon_map_task_lookup[scenario_type] = [] storage_path_list = [] for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']: - carbon_pool_by_type = dict([ - (lucode, float(carbon_pool_table[lucode][pool_type])) - for lucode in carbon_pool_table]) + carbon_pool_by_type = carbon_pool_df[pool_type].to_dict() lulc_key = 'lulc_%s_path' % scenario_type storage_key = '%s_%s' % (pool_type, scenario_type) diff --git a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py index c8c1515d66..9ce6ef5536 100644 --- a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py +++ b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py @@ -97,6 +97,7 @@ import shutil import numpy +import pandas import pygeoprocessing import scipy.sparse import taskgraph @@ -117,6 +118,9 @@ "({latest_year})") INVALID_SNAPSHOT_RASTER_MSG = gettext( "Raster for snapshot {snapshot_year} could not be validated.") +INVALID_TRANSITION_VALUES_MSG = gettext( + "The transition table expects values of {model_transitions} but found " + "values of {transition_values}.") POOL_SOIL = 'soil' POOL_BIOMASS = 'biomass' @@ -154,7 +158,6 @@ CARBON_STOCK_AT_YEAR_RASTER_PATTERN = 'carbon-stock-at-{year}{suffix}.tif' INTERMEDIATE_DIR_NAME = 'intermediate' -TASKGRAPH_CACHE_DIR_NAME = 'task_cache' OUTPUT_DIR_NAME = 'output' MODEL_SPEC = { @@ -167,10 +170,10 @@ "n_workers": spec_utils.N_WORKERS, "landcover_snapshot_csv": { "type": "csv", + "index_col": "snapshot_year", "columns": { "snapshot_year": { - "type": "number", - "units": u.year_AD, + "type": "integer", "about": gettext( "The snapshot year that this row's LULC raster " "represents. Each year in this table must be unique.") @@ -204,6 +207,7 @@ "biophysical_table_path": { "name": gettext("biophysical table"), "type": "csv", + "index_col": "code", "columns": { "code": { "type": "integer", @@ -300,11 +304,12 @@ "landcover_transitions_table": { "name": gettext("landcover transitions table"), "type": "csv", + "index_col": "lulc-class", "columns": { "lulc-class": { - "type": "integer", + "type": "freestyle_string", "about": gettext( - "LULC codes matching the codes in the biophysical " + "LULC class names matching those in the biophysical " "table.")}, "[LULC CODE]": { "type": "option_string", @@ -382,6 +387,7 @@ "name": gettext("price table"), "type": "csv", "required": "use_price_table", + "index_col": "year", "columns": { "year": { "type": "number", @@ -517,7 +523,7 @@ } } }, - "task_cache": spec_utils.TASKGRAPH_DIR + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -564,7 +570,10 @@ def execute(args): task_graph, n_workers, intermediate_dir, output_dir, suffix = ( _set_up_workspace(args)) - snapshots = _extract_snapshots_from_table(args['landcover_snapshot_csv']) + snapshots = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() # Phase 1: alignment and preparation of inputs baseline_lulc_year = min(snapshots.keys()) @@ -584,15 +593,14 @@ def execute(args): # We're assuming that the LULC initial variables and the carbon pool # transient table are combined into a single lookup table. - biophysical_parameters = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'code').to_dict(orient='index') + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], + MODEL_SPEC['args']['biophysical_table_path']) # LULC Classnames are critical to the transition mapping, so they must be # unique. This check is here in ``execute`` because it's possible that # someone might have a LOT of classes in their biophysical table. - unique_lulc_classnames = set( - params['lulc-class'] for params in biophysical_parameters.values()) - if len(unique_lulc_classnames) != len(biophysical_parameters): + if not biophysical_df['lulc-class'].is_unique: raise ValueError( "All values in `lulc-class` column must be unique, but " "duplicates were found.") @@ -630,7 +638,7 @@ def execute(args): task_name='Align input landcover rasters.') (disturbance_matrices, accumulation_matrices) = _read_transition_matrix( - args['landcover_transitions_table'], biophysical_parameters) + args['landcover_transitions_table'], biophysical_df) # Baseline stocks are simply reclassified. # Baseline accumulation are simply reclassified @@ -664,8 +672,7 @@ def execute(args): func=pygeoprocessing.reclassify_raster, args=( (aligned_lulc_paths[baseline_lulc_year], 1), - {lucode: values[f'{pool}-initial'] for (lucode, values) - in biophysical_parameters.items()}, + biophysical_df[f'{pool}-initial'].to_dict(), stock_rasters[baseline_lulc_year][pool], gdal.GDT_Float32, NODATA_FLOAT32_MIN), @@ -682,9 +689,7 @@ def execute(args): func=pygeoprocessing.reclassify_raster, args=( (aligned_lulc_paths[baseline_lulc_year], 1), - {lucode: values[f'{pool}-yearly-accumulation'] - for (lucode, values) - in biophysical_parameters.items()}, + biophysical_df[f'{pool}-yearly-accumulation'].to_dict(), yearly_accum_rasters[baseline_lulc_year][pool], gdal.GDT_Float32, NODATA_FLOAT32_MIN), @@ -805,9 +810,7 @@ def execute(args): func=pygeoprocessing.reclassify_raster, args=( (aligned_lulc_paths[prior_transition_year], 1), - {lucode: values[f'{pool}-half-life'] - for (lucode, values) - in biophysical_parameters.items()}, + biophysical_df[f'{pool}-half-life'].to_dict(), halflife_rasters[current_transition_year][pool], gdal.GDT_Float32, NODATA_FLOAT32_MIN), @@ -868,9 +871,7 @@ def execute(args): yearly_accum_tasks[current_transition_year][POOL_LITTER] = task_graph.add_task( func=pygeoprocessing.reclassify_raster, args=((aligned_lulc_paths[current_transition_year], 1), - {lucode: values[f'{POOL_LITTER}-yearly-accumulation'] - for (lucode, values) in - biophysical_parameters.items()}, + biophysical_df[f'{POOL_LITTER}-yearly-accumulation'].to_dict(), yearly_accum_rasters[current_transition_year][POOL_LITTER], gdal.GDT_Float32, NODATA_FLOAT32_MIN), @@ -962,11 +963,10 @@ def execute(args): prices = None if args.get('do_economic_analysis', False): # Do if truthy if args.get('use_price_table', False): - prices = { - year: values['price'] for (year, values) in - utils.read_csv_to_dataframe( - args['price_table_path'], 'year' - ).to_dict(orient='index').items()} + prices = utils.read_csv_to_dataframe( + args['price_table_path'], + MODEL_SPEC['args']['price_table_path'] + )['price'].to_dict() else: inflation_rate = float(args['inflation_rate']) * 0.01 annual_price = float(args['price']) @@ -1068,10 +1068,9 @@ def _set_up_workspace(args): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. - taskgraph_cache_dir = os.path.join( - args['workspace_dir'], TASKGRAPH_CACHE_DIR_NAME) task_graph = taskgraph.TaskGraph( - taskgraph_cache_dir, n_workers, reporting_interval=5.0) + os.path.join(args['workspace_dir'], 'taskgraph_cache'), + n_workers, reporting_interval=5.0) suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_dir = os.path.join( @@ -1079,7 +1078,7 @@ def _set_up_workspace(args): output_dir = os.path.join( args['workspace_dir'], OUTPUT_DIR_NAME) - utils.make_directories([output_dir, intermediate_dir, taskgraph_cache_dir]) + utils.make_directories([output_dir, intermediate_dir]) return task_graph, n_workers, intermediate_dir, output_dir, suffix @@ -1957,7 +1956,7 @@ def _sum_n_rasters( target_raster = None -def _read_transition_matrix(transition_csv_path, biophysical_dict): +def _read_transition_matrix(transition_csv_path, biophysical_df): """Read a transition CSV table in to a series of sparse matrices. Args: @@ -1975,7 +1974,7 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict): * ``'high-impact-disturb'`` indicating a high-impact disturbance * ``''`` (blank), which is equivalent to no carbon change.o - biophysical_dict (dict): A ``dict`` mapping of integer landcover codes + biophysical_df (pandas.DataFrame): A table mapping integer landcover codes to biophysical values for disturbance and accumulation values for soil and biomass carbon pools. @@ -1987,14 +1986,13 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict): the pool for the landcover transition. """ table = utils.read_csv_to_dataframe( - transition_csv_path, convert_cols_to_lower=False, convert_vals_to_lower=False) + transition_csv_path, MODEL_SPEC['args']['landcover_transitions_table'] + ).reset_index() lulc_class_to_lucode = {} - max_lucode = 0 - for (lucode, values) in biophysical_dict.items(): - lulc_class_to_lucode[ - str(values['lulc-class']).strip().lower()] = lucode - max_lucode = max(max_lucode, lucode) + max_lucode = biophysical_df.index.max() + for lucode, row in biophysical_df.iterrows(): + lulc_class_to_lucode[row['lulc-class']] = lucode # Load up a sparse matrix with the transitions to save on memory usage. # The number of possible rows/cols is the value of the maximum possible @@ -2029,24 +2027,19 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict): "blank line encountered.") break - # Strip any whitespace to eliminate leading/trailing whitespace - row = row.str.strip() - # skip rows starting with a blank cell, these are part of the legend - if not row['lulc-class']: + if pandas.isna(row['lulc-class']): continue try: - from_colname = str(row['lulc-class']).lower() - from_lucode = lulc_class_to_lucode[from_colname] + from_lucode = lulc_class_to_lucode[row['lulc-class']] except KeyError: raise ValueError("The transition table's 'lulc-class' column has " - f"a value, '{from_colname}', that was expected " + f"a value, '{row['lulc-class']}', that was expected " "in the biophysical table but could not be " "found.") - for colname, field_value in row.items(): - to_colname = str(colname).strip().lower() + for to_colname, field_value in row.items(): # Skip the top row, only contains headers. if to_colname == 'lulc-class': @@ -2062,27 +2055,24 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict): # Only set values where the transition HAS a value. # Takes advantage of the sparse characteristic of the model. - if (isinstance(field_value, float) and - numpy.isnan(field_value)): + if pandas.isna(field_value): continue # When transition is a disturbance, we use the source landcover's # disturbance values. if field_value.endswith('disturb'): soil_disturbance_matrix[from_lucode, to_lucode] = ( - biophysical_dict[from_lucode][f'soil-{field_value}']) + biophysical_df[f'soil-{field_value}'][from_lucode]) biomass_disturbance_matrix[from_lucode, to_lucode] = ( - biophysical_dict[from_lucode][f'biomass-{field_value}']) + biophysical_df[f'biomass-{field_value}'][from_lucode]) # When we're transitioning to a landcover that accumulates, use the # target landcover's accumulation value. elif field_value == 'accum': soil_accumulation_matrix[from_lucode, to_lucode] = ( - biophysical_dict[to_lucode][ - 'soil-yearly-accumulation']) + biophysical_df['soil-yearly-accumulation'][to_lucode]) biomass_accumulation_matrix[from_lucode, to_lucode] = ( - biophysical_dict[to_lucode][ - 'biomass-yearly-accumulation']) + biophysical_df['biomass-yearly-accumulation'][to_lucode]) disturbance_matrices = { 'soil': soil_disturbance_matrix, @@ -2224,37 +2214,6 @@ def _reclassify_disturbance( target_raster_path, gdal.GDT_Float32, NODATA_FLOAT32_MIN) -def _extract_snapshots_from_table(csv_path): - """Extract the year/raster snapshot mapping from a CSV. - - No validation is performed on the years or raster paths. - - Args: - csv_path (string): The path to a CSV on disk containing snapshot - years and a corresponding transition raster path. Snapshot years - may be in any order in the CSV, but must be integers and no two - years may be the same. Snapshot raster paths must refer to a - raster file located on disk representing the landcover at that - transition. If the path is absolute, the path will be used as - given. If the path is relative, the path will be interpreted as - relative to the parent directory of this CSV file. - - Returns: - A ``dict`` mapping int snapshot years to their corresponding raster - paths. These raster paths will be absolute paths. - - """ - table = utils.read_csv_to_dataframe( - csv_path, convert_vals_to_lower=False, expand_path_cols=['raster_path']) - - output_dict = {} - table.set_index("snapshot_year", drop=False, inplace=True) - - for index, row in table.iterrows(): - output_dict[int(index)] = row['raster_path'] - return output_dict - - @validation.invest_validator def validate(args, limit_to=None): """Validate an input dictionary for Coastal Blue Carbon. @@ -2277,8 +2236,10 @@ def validate(args, limit_to=None): if ("landcover_snapshot_csv" not in invalid_keys and "landcover_snapshot_csv" in sufficient_keys): - snapshots = _extract_snapshots_from_table( - args['landcover_snapshot_csv']) + snapshots = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() for snapshot_year, snapshot_raster_path in snapshots.items(): raster_error_message = validation.check_raster( @@ -2299,4 +2260,26 @@ def validate(args, limit_to=None): analysis_year=args['analysis_year'], latest_year=max(snapshots.keys())))) + # check for invalid options in the translation table + if ("landcover_transitions_table" not in invalid_keys and + "landcover_transitions_table" in sufficient_keys): + transitions_spec = MODEL_SPEC['args']['landcover_transitions_table'] + transition_options = list( + transitions_spec['columns']['[LULC CODE]']['options'].keys()) + # lowercase options since utils call will lowercase table values + transition_options = [x.lower() for x in transition_options] + transitions_df = utils.read_csv_to_dataframe( + args['landcover_transitions_table'], transitions_spec) + transitions_mask = ~transitions_df.isin(transition_options) & ~transitions_df.isna() + if transitions_mask.any(axis=None): + transition_numpy_mask = transitions_mask.values + transition_numpy_values = transitions_df.to_numpy() + bad_transition_values = list( + numpy.unique(transition_numpy_values[transition_numpy_mask])) + validation_warnings.append(( + ['landcover_transitions_table'], + INVALID_TRANSITION_VALUES_MSG.format( + model_transitions=(transition_options), + transition_values=bad_transition_values))) + return validation_warnings diff --git a/src/natcap/invest/coastal_blue_carbon/preprocessor.py b/src/natcap/invest/coastal_blue_carbon/preprocessor.py index 8b590151de..8c395e6fba 100644 --- a/src/natcap/invest/coastal_blue_carbon/preprocessor.py +++ b/src/natcap/invest/coastal_blue_carbon/preprocessor.py @@ -36,6 +36,7 @@ "A table mapping LULC codes from the snapshot rasters to the " "corresponding LULC class names, and whether or not the " "class is a coastal blue carbon habitat."), + "index_col": "code", "columns": { "code": { "type": "integer", @@ -55,10 +56,10 @@ }, "landcover_snapshot_csv": { "type": "csv", + "index_col": "snapshot_year", "columns": { "snapshot_year": { - "type": "number", - "units": u.year_AD, + "type": "integer", "about": gettext("Year to snapshot.")}, "raster_path": { "type": "raster", @@ -82,6 +83,7 @@ "source LULC class, and the first row represents the " "destination LULC classes. Cells are populated with " "transition states, or left empty if no such transition occurs."), + "index_col": "lulc-class", "columns": { "lulc-class": { "type": "integer", @@ -112,6 +114,7 @@ "Table mapping each LULC type to impact and accumulation " "information. This is a template that you will fill out to " "create the biophysical table input to the main model."), + "index_col": "code", "columns": { **BIOPHYSICAL_COLUMNS_SPEC, # remove "expression" property which doesn't go in output spec @@ -131,7 +134,7 @@ "to match all the other LULC maps."), "bands": {1: {"type": "integer"}} }, - "task_cache": spec_utils.TASKGRAPH_DIR + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -164,8 +167,7 @@ def execute(args): """ suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir'], 'outputs_preprocessor') - taskgraph_cache_dir = os.path.join(args['workspace_dir'], 'task_cache') - utils.make_directories([output_dir, taskgraph_cache_dir]) + utils.make_directories([output_dir]) try: n_workers = int(args['n_workers']) @@ -175,11 +177,13 @@ def execute(args): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( - taskgraph_cache_dir, n_workers, reporting_interval=5.0) + os.path.join(args['workspace_dir'], 'taskgraph_cache'), + n_workers, reporting_interval=5.0) - snapshots_dict = ( - coastal_blue_carbon._extract_snapshots_from_table( - args['landcover_snapshot_csv'])) + snapshots_dict = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() # Align the raster stack for analyzing the various transitions. min_pixel_size = float('inf') @@ -209,14 +213,15 @@ def execute(args): target_path_list=aligned_snapshot_paths, task_name='Align input landcover rasters') - landcover_table = utils.read_csv_to_dataframe( - args['lulc_lookup_table_path'], 'code').to_dict(orient='index') + landcover_df = utils.read_csv_to_dataframe( + args['lulc_lookup_table_path'], + MODEL_SPEC['args']['lulc_lookup_table_path']) target_transition_table = os.path.join( output_dir, TRANSITION_TABLE.format(suffix=suffix)) _ = task_graph.add_task( func=_create_transition_table, - args=(landcover_table, + args=(landcover_df, aligned_snapshot_paths, target_transition_table), target_path_list=[target_transition_table], @@ -227,7 +232,7 @@ def execute(args): output_dir, BIOPHYSICAL_TABLE.format(suffix=suffix)) _ = task_graph.add_task( func=_create_biophysical_table, - args=(landcover_table, target_biophysical_table_path), + args=(landcover_df, target_biophysical_table_path), target_path_list=[target_biophysical_table_path], task_name='Write biophysical table template') @@ -235,20 +240,20 @@ def execute(args): task_graph.join() -def _create_transition_table(landcover_table, lulc_snapshot_list, +def _create_transition_table(landcover_df, lulc_snapshot_list, target_table_path): """Create the transition table from a series of landcover snapshots. Args: - landcover_table (dict): A dict mapping integer landcover codes to dict - values indicating the landcover class name in the ``lulc-class`` - field and ``True`` or ``False`` under the - ``is_coastal_blue_carbon_habitat`` key. + landcover_df (pandas.DataFrame: A table mapping integer landcover + codes to values indicating the landcover class name in the + ``lulc-class`` column and ``True`` or ``False`` under the + ``is_coastal_blue_carbon_habitat`` column. lulc_snapshot_list (list): A list of string paths to GDAL rasters on disk. All rasters must have the same spatial reference, pixel size and dimensions and must also all be integer rasters, where all non-nodata pixel values must be represented in the - ``landcover_table`` dict. + ``landcover_df`` dataframe. target_table_path (string): A string path to where the target transition table should be written. @@ -317,13 +322,13 @@ def _create_transition_table(landcover_table, lulc_snapshot_list, sparse_transition_table = {} for from_lucode, to_lucode in transition_pairs: try: - from_is_cbc = landcover_table[ - from_lucode]['is_coastal_blue_carbon_habitat'] - to_is_cbc = landcover_table[ - to_lucode]['is_coastal_blue_carbon_habitat'] + from_is_cbc = landcover_df[ + 'is_coastal_blue_carbon_habitat'][from_lucode] + to_is_cbc = landcover_df[ + 'is_coastal_blue_carbon_habitat'][to_lucode] except KeyError: for variable in (from_lucode, to_lucode): - if variable not in landcover_table: + if variable not in landcover_df.index: raise ValueError( 'The landcover table is missing a row with the ' f'landuse code {variable}.') @@ -331,14 +336,14 @@ def _create_transition_table(landcover_table, lulc_snapshot_list, sparse_transition_table[(from_lucode, to_lucode)] = ( transition_types[(from_is_cbc, to_is_cbc)]) - code_list = sorted([code for code in landcover_table.keys()]) + code_list = sorted(landcover_df.index) lulc_class_list_sorted = [ - landcover_table[code]['lulc-class'] for code in code_list] + landcover_df['lulc-class'][code] for code in code_list] with open(target_table_path, 'w') as csv_file: fieldnames = ['lulc-class'] + lulc_class_list_sorted csv_file.write(f"{','.join(fieldnames)}\n") for row_code in code_list: - class_name = landcover_table[row_code]['lulc-class'] + class_name = landcover_df['lulc-class'][row_code] row = [class_name] for col_code in code_list: try: @@ -361,7 +366,7 @@ def _create_transition_table(landcover_table, lulc_snapshot_list, csv_file.write("\n,NCC (no-carbon-change)") -def _create_biophysical_table(landcover_table, target_biophysical_table_path): +def _create_biophysical_table(landcover_df, target_biophysical_table_path): """Write the biophysical table template to disk. The biophysical table templates contains all of the fields required by the @@ -370,8 +375,8 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path): table. Args: - landcover_table (dict): A dict mapping int landcover codes to a dict - with string keys that map to numeric or string column values. + landcover_df (pandas.DataFrame): A table mapping int landcover codes + to biophysical data target_biophysical_table_path (string): The path to where the biophysical table template will be stored on disk. @@ -384,16 +389,19 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path): with open(target_biophysical_table_path, 'w') as bio_table: bio_table.write(f"{','.join(target_column_names)}\n") - for lulc_code in sorted(landcover_table.keys()): + for lulc_code, row in landcover_df.sort_index().iterrows(): # 2 columns are defined below, and we need 1 less comma to only # have commas between fields. row = [] for colname in target_column_names: - try: - # Use the user's defined value if it exists - row.append(str(landcover_table[lulc_code][colname])) - except KeyError: - row.append('') + if colname == 'code': + row.append(str(lulc_code)) + else: + try: + # Use the user's defined value if it exists + row.append(str(landcover_df[colname][lulc_code])) + except KeyError: + row.append('') bio_table.write(f"{','.join(row)}\n") diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py index 7ce14522ba..8924f6f451 100644 --- a/src/natcap/invest/coastal_vulnerability.py +++ b/src/natcap/invest/coastal_vulnerability.py @@ -247,17 +247,11 @@ "represented by any value and absence of the habitat " "can be represented by 0 and nodata values.")}, "rank": { - "type": "option_string", - "options": { - "1": {"description": gettext("very high protection")}, - "2": {"description": gettext("high protection")}, - "3": {"description": gettext("moderate protection")}, - "4": {"description": gettext("low protection")}, - "5": {"description": gettext("very low protection")} - }, + "type": "integer", "about": gettext( "Relative amount of coastline protection this habitat " - "provides.") + "provides, from 1 (very high protection) to 5 " + "(very low protection.") }, "protection distance (m)": { "type": "number", @@ -268,6 +262,7 @@ "no protection to the coastline.") }, }, + "index_col": "id", "about": gettext( "Table that specifies spatial habitat data and parameters."), "name": gettext("habitats table") @@ -365,6 +360,7 @@ }, "coastal_exposure.csv": { "about": "This is an identical copy of the attribute table of coastal_exposure.gpkg provided in csv format for convenience. Users may wish to modify or add to the columns of this table in order to calculate exposure indices for custom scenarios.", + "index_col": "shore_id", "columns": FINAL_OUTPUT_FIELDS }, "intermediate": { @@ -460,6 +456,7 @@ "habitat_protection.csv": { "about": ( "Shore points with associated habitat data"), + "index_col": "shore_id", "columns": { "shore_id": { "type": "integer", @@ -685,10 +682,10 @@ "fields": WWIII_FIELDS } } - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -800,8 +797,6 @@ def execute(args): geomorph_dir, wind_wave_dir, surge_dir, population_dir, slr_dir]) file_suffix = utils.make_suffix_string(args, 'results_suffix') - taskgraph_cache_dir = os.path.join( - intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -809,7 +804,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. - task_graph = taskgraph.TaskGraph(taskgraph_cache_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) model_resolution = float(args['model_resolution']) max_fetch_distance = float(args['max_fetch_distance']) @@ -2315,42 +2311,41 @@ def _schedule_habitat_tasks( """ habitat_dataframe = utils.read_csv_to_dataframe( - habitat_table_path, convert_vals_to_lower=False, expand_path_cols=['path']) - habitat_dataframe = habitat_dataframe.rename( - columns={'protection distance (m)': 'distance'}) + habitat_table_path, MODEL_SPEC['args']['habitat_table_path'] + ).rename(columns={'protection distance (m)': 'distance'}) habitat_task_list = [] habitat_pickles_list = [] - for habitat_row in habitat_dataframe.itertuples(): + for _id, habitat_row in habitat_dataframe.iterrows(): target_habitat_pickle_path = os.path.join( - working_dir, f'{habitat_row.id}{file_suffix}.pickle') + working_dir, f'{_id}{file_suffix}.pickle') habitat_pickles_list.append(target_habitat_pickle_path) gis_type = pygeoprocessing.get_gis_type(habitat_row.path) if gis_type == 2: habitat_task_list.append(task_graph.add_task( func=search_for_vector_habitat, args=(base_shore_point_vector_path, - habitat_row.distance, - habitat_row.rank, - habitat_row.id, - habitat_row.path, + habitat_row['distance'], + habitat_row['rank'], + _id, + habitat_row['path'], target_habitat_pickle_path), target_path_list=[target_habitat_pickle_path], - task_name=f'searching for {habitat_row.id}')) + task_name=f'searching for {_id}')) continue if gis_type == 1: habitat_task_list.append(task_graph.add_task( func=search_for_raster_habitat, args=(base_shore_point_vector_path, - habitat_row.distance, - habitat_row.rank, - habitat_row.id, - habitat_row.path, + habitat_row['distance'], + habitat_row['rank'], + _id, + habitat_row['path'], target_habitat_pickle_path, model_resolution, file_suffix), target_path_list=[target_habitat_pickle_path], - task_name=f'searching for {habitat_row.id}')) + task_name=f'searching for {_id}')) return habitat_task_list, habitat_pickles_list @@ -2835,12 +2830,14 @@ def assemble_results_and_calculate_exposure( final_values_dict[var_name] = pickle.load(file) habitat_df = utils.read_csv_to_dataframe( - habitat_protection_path, convert_cols_to_lower=False, convert_vals_to_lower=False) + habitat_protection_path, MODEL_SPEC['outputs']['intermediate'][ + 'contents']['habitats']['contents']['habitat_protection.csv'] + ).rename(columns={'r_hab': 'R_hab'}) output_layer.StartTransaction() for feature in output_layer: shore_id = feature.GetField(SHORE_ID_FIELD) # The R_hab ranks were stored in a CSV, now this dataframe: - rank = habitat_df[habitat_df[SHORE_ID_FIELD] == shore_id][R_hab_name] + rank = habitat_df.loc[shore_id, R_hab_name] feature.SetField(str(R_hab_name), float(rank)) # The other variables were stored in pickles, now this dict: for fieldname in final_values_dict: @@ -3235,7 +3232,6 @@ def _aggregate_raster_values_in_radius( kernel_mask &= ~utils.array_equals_nodata(array, nodata) result[shore_id] = aggregation_op(array, kernel_mask) - with open(target_pickle_path, 'wb') as pickle_file: pickle.dump(result, pickle_file) @@ -3465,8 +3461,7 @@ def _validate_habitat_table_paths(habitat_table_path): ValueError if any vector in the ``path`` column cannot be opened. """ habitat_dataframe = utils.read_csv_to_dataframe( - habitat_table_path, convert_cols_to_lower=False, convert_vals_to_lower=False, - expand_path_cols=['path']) + habitat_table_path, MODEL_SPEC['args']['habitat_table_path']) bad_paths = [] for habitat_row in habitat_dataframe.itertuples(): try: diff --git a/src/natcap/invest/crop_production_percentile.py b/src/natcap/invest/crop_production_percentile.py index 1cc5717ef3..36a56d89b3 100644 --- a/src/natcap/invest/crop_production_percentile.py +++ b/src/natcap/invest/crop_production_percentile.py @@ -22,6 +22,87 @@ LOGGER = logging.getLogger(__name__) +CROP_OPTIONS = { + # TODO: use human-readable translatable crop names (#614) + crop: {"description": crop} for crop in [ + "abaca", "agave", "alfalfa", "almond", "aniseetc", + "apple", "apricot", "areca", "artichoke", "asparagus", + "avocado", "bambara", "banana", "barley", "bean", + "beetfor", "berrynes", "blueberry", "brazil", + "canaryseed", "carob", "carrot", "carrotfor", "cashew", + "broadbean", "buckwheat", "cabbage", "cabbagefor", + "cashewapple", "cassava", "castor", "cauliflower", + "cerealnes", "cherry", "chestnut", "chickpea", + "chicory", "chilleetc", "cinnamon", "citrusnes", + "clove", "clover", "cocoa", "coconut", "coffee", + "cotton", "cowpea", "cranberry", "cucumberetc", + "currant", "date", "eggplant", "fibrenes", "fig", + "flax", "fonio", "fornes", "fruitnes", "garlic", + "ginger", "gooseberry", "grape", "grapefruitetc", + "grassnes", "greenbean", "greenbroadbean", "greencorn", + "greenonion", "greenpea", "groundnut", "hazelnut", + "hemp", "hempseed", "hop", "jute", "jutelikefiber", + "kapokfiber", "kapokseed", "karite", "kiwi", "kolanut", + "legumenes", "lemonlime", "lentil", "lettuce", + "linseed", "lupin", "maize", "maizefor", "mango", + "mate", "melonetc", "melonseed", "millet", + "mixedgrain", "mixedgrass", "mushroom", "mustard", + "nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor", + "oilseednes", "okra", "olive", "onion", "orange", + "papaya", "pea", "peachetc", "pear", "pepper", + "peppermint", "persimmon", "pigeonpea", "pimento", + "pineapple", "pistachio", "plantain", "plum", "poppy", + "potato", "pulsenes", "pumpkinetc", "pyrethrum", + "quince", "quinoa", "ramie", "rapeseed", "rasberry", + "rice", "rootnes", "rubber", "rye", "ryefor", + "safflower", "sesame", "sisal", "sorghum", + "sorghumfor", "sourcherry, soybean", "spicenes", + "spinach", "stonefruitnes", "strawberry", "stringbean", + "sugarbeet", "sugarcane", "sugarnes", "sunflower", + "swedefor", "sweetpotato", "tangetc", "taro", "tea", + "tobacco", "tomato", "triticale", "tropicalnes", + "tung", "turnipfor", "vanilla", "vegetablenes", + "vegfor", "vetch", "walnut", "watermelon", "wheat", + "yam", "yautia" + ] +} + +nutrient_units = { + "protein": u.gram/u.hectogram, + "lipid": u.gram/u.hectogram, # total lipid + "energy": u.kilojoule/u.hectogram, + "ca": u.milligram/u.hectogram, # calcium + "fe": u.milligram/u.hectogram, # iron + "mg": u.milligram/u.hectogram, # magnesium + "ph": u.milligram/u.hectogram, # phosphorus + "k": u.milligram/u.hectogram, # potassium + "na": u.milligram/u.hectogram, # sodium + "zn": u.milligram/u.hectogram, # zinc + "cu": u.milligram/u.hectogram, # copper + "fl": u.microgram/u.hectogram, # fluoride + "mn": u.milligram/u.hectogram, # manganese + "se": u.microgram/u.hectogram, # selenium + "vita": u.IU/u.hectogram, # vitamin A + "betac": u.microgram/u.hectogram, # beta carotene + "alphac": u.microgram/u.hectogram, # alpha carotene + "vite": u.milligram/u.hectogram, # vitamin e + "crypto": u.microgram/u.hectogram, # cryptoxanthin + "lycopene": u.microgram/u.hectogram, # lycopene + "lutein": u.microgram/u.hectogram, # lutein + zeaxanthin + "betat": u.milligram/u.hectogram, # beta tocopherol + "gammat": u.milligram/u.hectogram, # gamma tocopherol + "deltat": u.milligram/u.hectogram, # delta tocopherol + "vitc": u.milligram/u.hectogram, # vitamin C + "thiamin": u.milligram/u.hectogram, + "riboflavin": u.milligram/u.hectogram, + "niacin": u.milligram/u.hectogram, + "pantothenic": u.milligram/u.hectogram, # pantothenic acid + "vitb6": u.milligram/u.hectogram, # vitamin B6 + "folate": u.microgram/u.hectogram, + "vitb12": u.microgram/u.hectogram, # vitamin B12 + "vitk": u.microgram/u.hectogram, # vitamin K +} + MODEL_SPEC = { "model_name": MODEL_METADATA["crop_production_percentile"].model_title, "pyname": MODEL_METADATA["crop_production_percentile"].pyname, @@ -44,54 +125,12 @@ }, "landcover_to_crop_table_path": { "type": "csv", + "index_col": "crop_name", "columns": { "lucode": {"type": "integer"}, "crop_name": { "type": "option_string", - "options": { - # TODO: use human-readable translatable crop names (#614) - crop: {"description": crop} for crop in [ - "abaca", "agave", "alfalfa", "almond", "aniseetc", - "apple", "apricot", "areca", "artichoke", "asparagus", - "avocado", "bambara", "banana", "barley", "bean", - "beetfor", "berrynes", "blueberry", "brazil", - "canaryseed", "carob", "carrot", "carrotfor", "cashew", - "broadbean", "buckwheat", "cabbage", "cabbagefor", - "cashewapple", "cassava", "castor", "cauliflower", - "cerealnes", "cherry", "chestnut", "chickpea", - "chicory", "chilleetc", "cinnamon", "citrusnes", - "clove", "clover", "cocoa", "coconut", "coffee", - "cotton", "cowpea", "cranberry", "cucumberetc", - "currant", "date", "eggplant", "fibrenes", "fig", - "flax", "fonio", "fornes", "fruitnes", "garlic", - "ginger", "gooseberry", "grape", "grapefruitetc", - "grassnes", "greenbean", "greenbroadbean", "greencorn", - "greenonion", "greenpea", "groundnut", "hazelnut", - "hemp", "hempseed", "hop", "jute", "jutelikefiber", - "kapokfiber", "kapokseed", "karite", "kiwi", "kolanut", - "legumenes", "lemonlime", "lentil", "lettuce", - "linseed", "lupin", "maize", "maizefor", "mango", - "mate", "melonetc", "melonseed", "millet", - "mixedgrain", "mixedgrass", "mushroom", "mustard", - "nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor", - "oilseednes", "okra", "olive", "onion", "orange", - "papaya", "pea", "peachetc", "pear", "pepper", - "peppermint", "persimmon", "pigeonpea", "pimento", - "pineapple", "pistachio", "plantain", "plum", "poppy", - "potato", "pulsenes", "pumpkinetc", "pyrethrum", - "quince", "quinoa", "ramie", "rapeseed", "rasberry", - "rice", "rootnes", "rubber", "rye", "ryefor", - "safflower", "sesame", "sisal", "sorghum", - "sorghumfor", "sourcherry, soybean", "spicenes", - "spinach", "stonefruitnes", "strawberry", "stringbean", - "sugarbeet", "sugarcane", "sugarnes", "sunflower", - "swedefor", "sweetpotato", "tangetc", "taro", "tea", - "tobacco", "tomato", "triticale", "tropicalnes", - "tung", "turnipfor", "vanilla", "vegetablenes", - "vegfor", "vetch", "walnut", "watermelon", "wheat", - "yam", "yautia" - ] - } + "options": CROP_OPTIONS } }, "about": gettext( @@ -116,6 +155,7 @@ "contents": { "[CROP]_percentile_yield_table.csv": { "type": "csv", + "index_col": "climate_bin", "columns": { "climate_bin": {"type": "integer"}, "yield_25th": { @@ -163,45 +203,19 @@ }, "crop_nutrient.csv": { "type": "csv", + "index_col": "crop", "columns": { - nutrient: { + "crop": { + "type": "option_string", + "options": CROP_OPTIONS + }, + "percentrefuse": { + "type": "percent" + }, + **{nutrient: { "type": "number", "units": units - } for nutrient, units in { - "protein": u.gram/u.hectogram, - "lipid": u.gram/u.hectogram, # total lipid - "energy": u.kilojoule/u.hectogram, - "ca": u.milligram/u.hectogram, # calcium - "fe": u.milligram/u.hectogram, # iron - "mg": u.milligram/u.hectogram, # magnesium - "ph": u.milligram/u.hectogram, # phosphorus - "k": u.milligram/u.hectogram, # potassium - "na": u.milligram/u.hectogram, # sodium - "zn": u.milligram/u.hectogram, # zinc - "cu": u.milligram/u.hectogram, # copper - "fl": u.microgram/u.hectogram, # fluoride - "mn": u.milligram/u.hectogram, # manganese - "se": u.microgram/u.hectogram, # selenium - "vita": u.IU/u.hectogram, # vitamin A - "betac": u.microgram/u.hectogram, # beta carotene - "alphac": u.microgram/u.hectogram, # alpha carotene - "vite": u.milligram/u.hectogram, # vitamin e - "crypto": u.microgram/u.hectogram, # cryptoxanthin - "lycopene": u.microgram/u.hectogram, # lycopene - "lutein": u.microgram/u.hectogram, # lutein + zeaxanthin - "betaT": u.milligram/u.hectogram, # beta tocopherol - "gammaT": u.milligram/u.hectogram, # gamma tocopherol - "deltaT": u.milligram/u.hectogram, # delta tocopherol - "vitc": u.milligram/u.hectogram, # vitamin C - "thiamin": u.milligram/u.hectogram, - "riboflavin": u.milligram/u.hectogram, - "niacin": u.milligram/u.hectogram, - "pantothenic": u.milligram/u.hectogram, # pantothenic acid - "vitb6": u.milligram/u.hectogram, # vitamin B6 - "folate": u.microgram/u.hectogram, - "vitb12": u.microgram/u.hectogram, # vitamin B12 - "vitk": u.microgram/u.hectogram, # vitamin K - }.items() + } for nutrient, units in nutrient_units.items()} } } }, @@ -213,6 +227,7 @@ "aggregate_results.csv": { "created_if": "aggregate_polygon_path", "about": "Model results aggregated to AOI polygons", + "index_col": "FID", "columns": { "FID": { "type": "integer", @@ -251,6 +266,7 @@ }, "result_table.csv": { "about": "Model results aggregated by crop", + "index_col": "crop", "columns": { "crop": { "type": "freestyle_string", @@ -346,10 +362,10 @@ "bands": {1: { "type": "number", "units": u.metric_ton/u.hectare }} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -405,12 +421,7 @@ _AGGREGATE_TABLE_FILE_PATTERN = os.path.join( '.', 'aggregate_results%s.csv') -_EXPECTED_NUTRIENT_TABLE_HEADERS = [ - 'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn', - 'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto', - 'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin', - 'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12', - 'VitK'] +_EXPECTED_NUTRIENT_TABLE_HEADERS = list(nutrient_units.keys()) _EXPECTED_LUCODE_TABLE_HEADER = 'lucode' _NODATA_YIELD = -1 @@ -458,10 +469,11 @@ def execute(args): None. """ - crop_to_landcover_table = utils.read_csv_to_dataframe( - args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index') + crop_to_landcover_df = utils.read_csv_to_dataframe( + args['landcover_to_crop_table_path'], + MODEL_SPEC['args']['landcover_to_crop_table_path']) bad_crop_name_list = [] - for crop_name in crop_to_landcover_table: + for crop_name in crop_to_landcover_df.index: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) @@ -498,8 +510,6 @@ def execute(args): edge_samples=11) # Initialize a TaskGraph - work_token_dir = os.path.join( - output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -507,14 +517,14 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. - task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(output_dir, 'taskgraph_cache'), n_workers) dependent_task_list = [] crop_lucode = None observed_yield_nodata = None - for crop_name in crop_to_landcover_table: - crop_lucode = crop_to_landcover_table[crop_name][ - _EXPECTED_LUCODE_TABLE_HEADER] + for crop_name, row in crop_to_landcover_df.iterrows(): + crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], @@ -540,11 +550,13 @@ def execute(args): climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) - crop_climate_percentile_table = utils.read_csv_to_dataframe( - climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index') + crop_climate_percentile_df = utils.read_csv_to_dataframe( + climate_percentile_yield_table_path, + MODEL_SPEC['args']['model_data_path']['contents'][ + 'climate_percentile_yield_tables']['contents'][ + '[CROP]_percentile_yield_table.csv']) yield_percentile_headers = [ - x for x in list(crop_climate_percentile_table.values())[0] - if x != 'climate_bin'] + x for x in crop_climate_percentile_df.columns if x != 'climate_bin'] reclassify_error_details = { 'raster_name': f'{crop_name} Climate Bin', @@ -556,10 +568,8 @@ def execute(args): output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % ( crop_name, yield_percentile_id, file_suffix)) - bin_to_percentile_yield = dict([ - (bin_id, - crop_climate_percentile_table[bin_id][yield_percentile_id]) - for bin_id in crop_climate_percentile_table]) + bin_to_percentile_yield = ( + crop_climate_percentile_df[yield_percentile_id].to_dict()) # reclassify nodata to a valid value of 0 # we're assuming that the crop doesn't exist where there is no data # this is more likely than assuming the crop does exist, esp. @@ -698,17 +708,17 @@ def execute(args): # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. - nutrient_table = utils.read_csv_to_dataframe( + nutrient_df = utils.read_csv_to_dataframe( os.path.join(args['model_data_path'], 'crop_nutrient.csv'), - 'crop', convert_cols_to_lower=False, convert_vals_to_lower=False - ).to_dict(orient='index') + MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv']) result_table_path = os.path.join( output_dir, 'result_table%s.csv' % file_suffix) + crop_names = crop_to_landcover_df.index.to_list() tabulate_results_task = task_graph.add_task( func=tabulate_results, - args=(nutrient_table, yield_percentile_headers, - crop_to_landcover_table, pixel_area_ha, + args=(nutrient_df, yield_percentile_headers, + crop_names, pixel_area_ha, args['landcover_raster_path'], landcover_nodata, output_dir, file_suffix, result_table_path), target_path_list=[result_table_path], @@ -727,7 +737,7 @@ def execute(args): args=(args['aggregate_polygon_path'], target_aggregate_vector_path, landcover_raster_info['projection_wkt'], - crop_to_landcover_table, nutrient_table, + crop_names, nutrient_df, yield_percentile_headers, output_dir, file_suffix, aggregate_results_table_path), target_path_list=[target_aggregate_vector_path, @@ -851,19 +861,18 @@ def _mask_observed_yield_op( def tabulate_results( - nutrient_table, yield_percentile_headers, - crop_to_landcover_table, pixel_area_ha, landcover_raster_path, + nutrient_df, yield_percentile_headers, + crop_names, pixel_area_ha, landcover_raster_path, landcover_nodata, output_dir, file_suffix, target_table_path): """Write table with total yield and nutrient results by crop. This function includes all the operations that write to results_table.csv. Args: - nutrient_table (dict): a lookup of nutrient values by crop in the - form of nutrient_table[][]. + nutrient_df (pandas.DataFrame): a table of nutrient values by crop yield_percentile_headers (list): list of strings indicating percentiles at which yield was calculated. - crop_to_landcover_table (dict): landcover codes keyed by crop names + crop_names (list): list of crop names pixel_area_ha (float): area of lulc raster cells (hectares) landcover_raster_path (string): path to landcover raster landcover_nodata (float): landcover raster nodata value @@ -894,7 +903,7 @@ def tabulate_results( 'crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join( nutrient_headers) + '\n') - for crop_name in sorted(crop_to_landcover_table): + for crop_name in sorted(crop_names): result_table.write(crop_name) production_lookup = {} production_pixel_count = 0 @@ -942,19 +951,19 @@ def tabulate_results( # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( - 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) + 1 - nutrient_df['percentrefuse'][crop_name] / 100) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = ( nutrient_factor * production_lookup[yield_percentile_id] * - nutrient_table[crop_name][nutrient_id]) + nutrient_df[nutrient_id][crop_name]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % ( nutrient_factor * production_lookup['observed'] * - nutrient_table[crop_name][nutrient_id])) + nutrient_df[nutrient_id][crop_name])) result_table.write('\n') total_area = 0 @@ -972,8 +981,8 @@ def tabulate_results( def aggregate_to_polygons( base_aggregate_vector_path, target_aggregate_vector_path, - landcover_raster_projection, crop_to_landcover_table, - nutrient_table, yield_percentile_headers, output_dir, file_suffix, + landcover_raster_projection, crop_names, + nutrient_df, yield_percentile_headers, output_dir, file_suffix, target_aggregate_table_path): """Write table with aggregate results of yield and nutrient values. @@ -986,9 +995,8 @@ def aggregate_to_polygons( target_aggregate_vector_path (string): path to re-projected copy of polygon vector landcover_raster_projection (string): a WKT projection string - crop_to_landcover_table (dict): landcover codes keyed by crop names - nutrient_table (dict): a lookup of nutrient values by crop in the - form of nutrient_table[][]. + crop_names (list): list of crop names + nutrient_df (pandas.DataFrame): a table of nutrient values by crop yield_percentile_headers (list): list of strings indicating percentiles at which yield was calculated. output_dir (string): the file path to the output workspace. @@ -1012,10 +1020,10 @@ def aggregate_to_polygons( total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) - for crop_name in crop_to_landcover_table: + for crop_name in crop_names: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( - 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) + 1 - nutrient_df['percentrefuse'][crop_name] / 100) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( @@ -1040,24 +1048,24 @@ def aggregate_to_polygons( total_yield_lookup['%s_%s' % ( crop_name, yield_percentile_id)][ id_index]['sum'] * - nutrient_table[crop_name][nutrient_id]) + nutrient_df[nutrient_id][crop_name]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % ( crop_name, file_suffix)) - total_yield_lookup['%s_observed' % crop_name] = ( + total_yield_lookup[f'{crop_name}_observed'] = ( pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path)) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: - for id_index in total_yield_lookup['%s_observed' % crop_name]: + for id_index in total_yield_lookup[f'{crop_name}_observed']: total_nutrient_table[ nutrient_id]['observed'][id_index] += ( nutrient_factor * total_yield_lookup[ - '%s_observed' % crop_name][id_index]['sum'] * - nutrient_table[crop_name][nutrient_id]) + f'{crop_name}_observed'][id_index]['sum'] * + nutrient_df[nutrient_id][crop_name]) # report everything to a table with open(target_aggregate_table_path, 'w') as aggregate_table: diff --git a/src/natcap/invest/crop_production_regression.py b/src/natcap/invest/crop_production_regression.py index 602070ec5a..d53e080455 100644 --- a/src/natcap/invest/crop_production_regression.py +++ b/src/natcap/invest/crop_production_regression.py @@ -86,6 +86,7 @@ }, "landcover_to_crop_table_path": { "type": "csv", + "index_col": "crop_name", "columns": { "lucode": {"type": "integer"}, "crop_name": { @@ -101,6 +102,7 @@ }, "fertilization_rate_table_path": { "type": "csv", + "index_col": "crop_name", "columns": { "crop_name": { "type": "option_string", @@ -129,29 +131,38 @@ "contents": { "[CROP]_regression_yield_table.csv": { "type": "csv", + "index_col": "climate_bin", "columns": { - 'climate_bin': {"type": "integer"}, - 'yield_ceiling': { + "climate_bin": {"type": "integer"}, + "yield_ceiling": { "type": "number", "units": u.metric_ton/u.hectare }, - 'b_nut': {"type": "number", "units": u.none}, - 'b_k2o': {"type": "number", "units": u.none}, - 'c_n': {"type": "number", "units": u.none}, - 'c_p2o5': {"type": "number", "units": u.none}, - 'c_k2o': {"type": "number", "units": u.none} + "b_nut": {"type": "number", "units": u.none}, + "b_k2o": {"type": "number", "units": u.none}, + "c_n": {"type": "number", "units": u.none}, + "c_p2o5": {"type": "number", "units": u.none}, + "c_k2o": {"type": "number", "units": u.none} } } } }, "crop_nutrient.csv": { "type": "csv", + "index_col": "crop", "columns": { - nutrient: { + "crop": { + "type": "option_string", + "options": CROPS + }, + "percentrefuse": { + "type": "percent" + }, + **{nutrient: { "about": about, "type": "number", "units": units - } for nutrient, about, units in NUTRIENTS + } for nutrient, about, units in NUTRIENTS} } }, "extended_climate_bin_maps": { @@ -186,6 +197,7 @@ "aggregate_results.csv": { "created_if": "aggregate_polygon_path", "about": "Table of results aggregated by ", + "index_col": "FID", "columns": { "FID": { "type": "integer", @@ -213,6 +225,7 @@ }, "result_table.csv": { "about": "Table of results aggregated by crop", + "index_col": "crop", "columns": { "crop": { "type": "freestyle_string", @@ -306,10 +319,10 @@ "bands": {1: { "type": "number", "units": u.metric_ton/u.hectare }} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -319,7 +332,7 @@ 'climate_regression_yield_tables', '%s_regression_yield_table.csv') _EXPECTED_REGRESSION_TABLE_HEADERS = [ - 'climate_bin', 'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o'] + 'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o'] # crop_name, yield_regression_id, file_suffix _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN = os.path.join( @@ -409,11 +422,11 @@ '.', 'aggregate_results%s.csv') _EXPECTED_NUTRIENT_TABLE_HEADERS = [ - 'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn', - 'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto', - 'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin', - 'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12', - 'VitK'] + 'protein', 'lipid', 'energy', 'ca', 'fe', 'mg', 'ph', 'k', 'na', 'zn', + 'cu', 'fl', 'mn', 'se', 'vita', 'betac', 'alphac', 'vite', 'crypto', + 'lycopene', 'lutein', 'betat', 'gammat', 'deltat', 'vitc', 'thiamin', + 'riboflavin', 'niacin', 'pantothenic', 'vitb6', 'folate', 'vitb12', + 'vitk'] _EXPECTED_LUCODE_TABLE_HEADER = 'lucode' _NODATA_YIELD = -1 @@ -470,8 +483,6 @@ def execute(args): output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) # Initialize a TaskGraph - work_token_dir = os.path.join( - output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -479,20 +490,21 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. - task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(output_dir, 'taskgraph_cache'), n_workers) dependent_task_list = [] LOGGER.info( "Checking if the landcover raster is missing lucodes") - crop_to_landcover_table = utils.read_csv_to_dataframe( - args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index') + crop_to_landcover_df = utils.read_csv_to_dataframe( + args['landcover_to_crop_table_path'], + MODEL_SPEC['args']['landcover_to_crop_table_path']) - crop_to_fertlization_rate_table = utils.read_csv_to_dataframe( - args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index') + crop_to_fertilization_rate_df = utils.read_csv_to_dataframe( + args['fertilization_rate_table_path'], + MODEL_SPEC['args']['fertilization_rate_table_path']) - crop_lucodes = [ - x[_EXPECTED_LUCODE_TABLE_HEADER] - for x in crop_to_landcover_table.values()] + crop_lucodes = list(crop_to_landcover_df[_EXPECTED_LUCODE_TABLE_HEADER]) unique_lucodes = numpy.array([]) for _, lu_band_data in pygeoprocessing.iterblocks( @@ -509,9 +521,7 @@ def execute(args): "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") - for crop_name in crop_to_landcover_table: - crop_lucode = crop_to_landcover_table[crop_name][ - _EXPECTED_LUCODE_TABLE_HEADER] + for crop_name in crop_to_landcover_df.index: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) @@ -543,9 +553,8 @@ def execute(args): crop_lucode = None observed_yield_nodata = None - for crop_name in crop_to_landcover_table: - crop_lucode = crop_to_landcover_table[crop_name][ - _EXPECTED_LUCODE_TABLE_HEADER] + for crop_name, row in crop_to_landcover_df.iterrows(): + crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], @@ -568,19 +577,19 @@ def execute(args): task_name='crop_climate_bin') dependent_task_list.append(crop_climate_bin_task) - crop_regression_table_path = os.path.join( - args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) - - crop_regression_table = utils.read_csv_to_dataframe( - crop_regression_table_path, 'climate_bin').to_dict(orient='index') - for bin_id in crop_regression_table: + crop_regression_df = utils.read_csv_to_dataframe( + os.path.join(args['model_data_path'], + _REGRESSION_TABLE_PATTERN % crop_name), + MODEL_SPEC['args']['model_data_path']['contents'][ + 'climate_regression_yield_tables']['contents'][ + '[CROP]_regression_yield_table.csv']) + for _, row in crop_regression_df.iterrows(): for header in _EXPECTED_REGRESSION_TABLE_HEADERS: - if crop_regression_table[bin_id][header.lower()] == '': - crop_regression_table[bin_id][header.lower()] = 0 + if numpy.isnan(row[header]): + row[header] = 0 yield_regression_headers = [ - x for x in list(crop_regression_table.values())[0] - if x != 'climate_bin'] + x for x in crop_regression_df.columns if x != 'climate_bin'] reclassify_error_details = { 'raster_name': f'{crop_name} Climate Bin', @@ -597,10 +606,7 @@ def execute(args): output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % ( crop_name, yield_regression_id, file_suffix))) - bin_to_regression_value = dict([ - (bin_id, - crop_regression_table[bin_id][yield_regression_id]) - for bin_id in crop_regression_table]) + bin_to_regression_value = crop_regression_df[yield_regression_id].to_dict() # reclassify nodata to a valid value of 0 # we're assuming that the crop doesn't exist where there is no data # this is more likely than assuming the crop does exist, esp. @@ -653,8 +659,8 @@ def execute(args): (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1), - (crop_to_fertlization_rate_table[crop_name] - ['nitrogen_rate'], 'raw'), + (crop_to_fertilization_rate_df['nitrogen_rate'][crop_name], + 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw')], _x_yield_op, nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), @@ -672,8 +678,8 @@ def execute(args): (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1), - (crop_to_fertlization_rate_table[crop_name] - ['phosphorus_rate'], 'raw'), + (crop_to_fertilization_rate_df['phosphorus_rate'][crop_name], + 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw')], _x_yield_op, phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), @@ -691,8 +697,8 @@ def execute(args): (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1), - (crop_to_fertlization_rate_table[crop_name] - ['potassium_rate'], 'raw'), + (crop_to_fertilization_rate_df['potassium_rate'][crop_name], + 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw')], _x_yield_op, potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), @@ -796,18 +802,18 @@ def execute(args): # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. - nutrient_table = utils.read_csv_to_dataframe( + nutrient_df = utils.read_csv_to_dataframe( os.path.join(args['model_data_path'], 'crop_nutrient.csv'), - 'crop', convert_cols_to_lower=False, convert_vals_to_lower=False - ).to_dict(orient='index') + MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv']) LOGGER.info("Generating report table") + crop_names = list(crop_to_landcover_df.index) result_table_path = os.path.join( output_dir, 'result_table%s.csv' % file_suffix) _ = task_graph.add_task( func=tabulate_regression_results, - args=(nutrient_table, - crop_to_landcover_table, pixel_area_ha, + args=(nutrient_df, + crop_names, pixel_area_ha, args['landcover_raster_path'], landcover_nodata, output_dir, file_suffix, result_table_path), target_path_list=[result_table_path], @@ -827,7 +833,7 @@ def execute(args): args=(args['aggregate_polygon_path'], target_aggregate_vector_path, landcover_raster_info['projection_wkt'], - crop_to_landcover_table, nutrient_table, + crop_names, nutrient_df, output_dir, file_suffix, aggregate_results_table_path), target_path_list=[target_aggregate_vector_path, @@ -929,17 +935,16 @@ def _mask_observed_yield_op( def tabulate_regression_results( - nutrient_table, - crop_to_landcover_table, pixel_area_ha, landcover_raster_path, + nutrient_df, + crop_names, pixel_area_ha, landcover_raster_path, landcover_nodata, output_dir, file_suffix, target_table_path): """Write table with total yield and nutrient results by crop. This function includes all the operations that write to results_table.csv. Args: - nutrient_table (dict): a lookup of nutrient values by crop in the - form of nutrient_table[][]. - crop_to_landcover_table (dict): landcover codes keyed by crop names + nutrient_df (pandas.DataFrame): a table of nutrient values by crop + crop_names (list): list of crop names pixel_area_ha (float): area of lulc raster cells (hectares) landcover_raster_path (string): path to landcover raster landcover_nodata (float): landcover raster nodata value @@ -960,7 +965,7 @@ def tabulate_regression_results( result_table.write( 'crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') - for crop_name in sorted(crop_to_landcover_table): + for crop_name in sorted(crop_names): result_table.write(crop_name) production_lookup = {} production_pixel_count = 0 @@ -1006,18 +1011,18 @@ def tabulate_regression_results( # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( - 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) + 1 - nutrient_df['percentrefuse'][crop_name] / 100) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = ( nutrient_factor * production_lookup['modeled'] * - nutrient_table[crop_name][nutrient_id]) + nutrient_df[nutrient_id][crop_name]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % ( nutrient_factor * production_lookup['observed'] * - nutrient_table[crop_name][nutrient_id])) + nutrient_df[nutrient_id][crop_name])) result_table.write('\n') total_area = 0 @@ -1035,8 +1040,8 @@ def tabulate_regression_results( def aggregate_regression_results_to_polygons( base_aggregate_vector_path, target_aggregate_vector_path, - landcover_raster_projection, crop_to_landcover_table, - nutrient_table, output_dir, file_suffix, + landcover_raster_projection, crop_names, + nutrient_df, output_dir, file_suffix, target_aggregate_table_path): """Write table with aggregate results of yield and nutrient values. @@ -1049,9 +1054,8 @@ def aggregate_regression_results_to_polygons( target_aggregate_vector_path (string): path to re-projected copy of polygon vector landcover_raster_projection (string): a WKT projection string - crop_to_landcover_table (dict): landcover codes keyed by crop names - nutrient_table (dict): a lookup of nutrient values by crop in the - form of nutrient_table[][]. + crop_names (list): list of crop names + nutrient_df (pandas.DataFrame): a table of nutrient values by crop output_dir (string): the file path to the output workspace. file_suffix (string): string to append to any output filenames. target_aggregate_table_path (string): path to 'aggregate_results.csv' @@ -1072,10 +1076,10 @@ def aggregate_regression_results_to_polygons( total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) - for crop_name in crop_to_landcover_table: + for crop_name in crop_names: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( - 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) + 1 - nutrient_df['percentrefuse'][crop_name] / 100) LOGGER.info( "Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( @@ -1093,7 +1097,7 @@ def aggregate_regression_results_to_polygons( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][ fid_index]['sum'] * - nutrient_table[crop_name][nutrient_id]) + nutrient_df[nutrient_id][crop_name]) # process observed observed_yield_path = os.path.join( @@ -1111,7 +1115,7 @@ def aggregate_regression_results_to_polygons( nutrient_factor * # percent crop used * 1000 [100g per Mg] total_yield_lookup[ '%s_observed' % crop_name][fid_index]['sum'] * - nutrient_table[crop_name][nutrient_id]) # nutrient unit per 100g crop + nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop # report everything to a table aggregate_table_path = os.path.join( diff --git a/src/natcap/invest/datastack.py b/src/natcap/invest/datastack.py index 06b7b9a44b..8c9e16bb85 100644 --- a/src/natcap/invest/datastack.py +++ b/src/natcap/invest/datastack.py @@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path): data_dir, f'{key}_csv_data') dataframe = utils.read_csv_to_dataframe( - source_path, convert_vals_to_lower=False) + source_path, args_spec[key]) csv_source_dir = os.path.abspath(os.path.dirname(source_path)) for spatial_column_name in spatial_columns: # Iterate through the spatial columns, identify the set of diff --git a/src/natcap/invest/delineateit/delineateit.py b/src/natcap/invest/delineateit/delineateit.py index 56b5312179..dae31c3ec4 100644 --- a/src/natcap/invest/delineateit/delineateit.py +++ b/src/natcap/invest/delineateit/delineateit.py @@ -137,7 +137,7 @@ "geometries": spec_utils.POINT, "fields": {} }, - "_work_tokens": spec_utils.TASKGRAPH_DIR + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -221,8 +221,6 @@ def execute(args): file_registry = utils.build_file_registry( [(_OUTPUT_FILES, output_directory)], file_suffix) - work_token_dir = os.path.join(output_directory, '_work_tokens') - # Manually setting n_workers to be -1 so that everything happens in the # same thread. try: @@ -232,7 +230,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 - graph = taskgraph.TaskGraph(work_token_dir, n_workers=n_workers) + graph = taskgraph.TaskGraph( + os.path.join(output_directory, '_work_tokens'), n_workers=n_workers) fill_pits_task = graph.add_task( pygeoprocessing.routing.fill_pits, diff --git a/src/natcap/invest/forest_carbon_edge_effect.py b/src/natcap/invest/forest_carbon_edge_effect.py index e38ead6eed..2422bc5af4 100644 --- a/src/natcap/invest/forest_carbon_edge_effect.py +++ b/src/natcap/invest/forest_carbon_edge_effect.py @@ -10,6 +10,7 @@ import uuid import numpy +import pandas import pygeoprocessing import scipy.spatial import taskgraph @@ -64,6 +65,7 @@ }, "biophysical_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "is_tropical_forest": { @@ -249,10 +251,10 @@ "bands": {1: { "type": "number", "units": u.metric_ton/u.hectare }} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -375,8 +377,6 @@ def execute(args): file_suffix = utils.make_suffix_string(args, 'results_suffix') # Initialize a TaskGraph - taskgraph_working_dir = os.path.join( - intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -384,7 +384,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # used to keep track of files generated by this module output_file_registry = { @@ -418,16 +419,15 @@ def execute(args): # Map non-forest landcover codes to carbon biomasses LOGGER.info('Calculating direct mapped carbon stocks') carbon_maps = [] - biophysical_table = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') - biophysical_keys = [ - x.lower() for x in list(biophysical_table.values())[0].keys()] + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], + MODEL_SPEC['args']['biophysical_table_path']) pool_list = [('c_above', True)] if args['pools_to_calculate'] == 'all': pool_list.extend([ ('c_below', False), ('c_soil', False), ('c_dead', False)]) for carbon_pool_type, ignore_tropical_type in pool_list: - if carbon_pool_type in biophysical_keys: + if carbon_pool_type in biophysical_df.columns: carbon_maps.append( output_file_registry[carbon_pool_type+'_map']) task_graph.add_task( @@ -630,8 +630,8 @@ def _calculate_lulc_carbon_map( """ # classify forest pixels from lulc - biophysical_table = utils.read_csv_to_dataframe( - biophysical_table_path, 'lucode').to_dict(orient='index') + biophysical_df = utils.read_csv_to_dataframe( + biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path']) lucode_to_per_cell_carbon = {} cell_size = pygeoprocessing.get_raster_info( @@ -639,24 +639,22 @@ def _calculate_lulc_carbon_map( cell_area_ha = abs(cell_size[0]) * abs(cell_size[1]) / 10000 # Build a lookup table - for lucode in biophysical_table: + for lucode, row in biophysical_df.iterrows(): if compute_forest_edge_effects: - is_tropical_forest = ( - int(biophysical_table[int(lucode)]['is_tropical_forest'])) + is_tropical_forest = row['is_tropical_forest'] else: - is_tropical_forest = 0 - if ignore_tropical_type and is_tropical_forest == 1: + is_tropical_forest = False + if ignore_tropical_type and is_tropical_forest: # if tropical forest above ground, lookup table is nodata - lucode_to_per_cell_carbon[int(lucode)] = NODATA_VALUE + lucode_to_per_cell_carbon[lucode] = NODATA_VALUE else: - try: - lucode_to_per_cell_carbon[int(lucode)] = float( - biophysical_table[lucode][carbon_pool_type]) * cell_area_ha - except ValueError: + if pandas.isna(row[carbon_pool_type]): raise ValueError( "Could not interpret carbon pool value as a number. " f"lucode: {lucode}, pool_type: {carbon_pool_type}, " - f"value: {biophysical_table[lucode][carbon_pool_type]}") + f"value: {row[carbon_pool_type]}") + lucode_to_per_cell_carbon[lucode] = row[carbon_pool_type] * cell_area_ha + # map aboveground carbon from table to lulc that is not forest reclass_error_details = { @@ -696,11 +694,9 @@ def _map_distance_from_tropical_forest_edge( """ # Build a list of forest lucodes - biophysical_table = utils.read_csv_to_dataframe( - biophysical_table_path, 'lucode').to_dict(orient='index') - forest_codes = [ - lucode for (lucode, ludata) in biophysical_table.items() - if int(ludata['is_tropical_forest']) == 1] + biophysical_df = utils.read_csv_to_dataframe( + biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path']) + forest_codes = biophysical_df[biophysical_df['is_tropical_forest']].index.values # Make a raster where 1 is non-forest landcover types and 0 is forest lulc_nodata = pygeoprocessing.get_raster_info( diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py index 5aa4c42172..7c0b15e54b 100644 --- a/src/natcap/invest/habitat_quality.py +++ b/src/natcap/invest/habitat_quality.py @@ -77,6 +77,7 @@ }, "threats_table_path": { "type": "csv", + "index_col": "threat", "columns": { "threat": { "type": "freestyle_string", @@ -170,8 +171,13 @@ }, "sensitivity_table_path": { "type": "csv", + "index_col": "lulc", "columns": { "lulc": spec_utils.LULC_TABLE_COLUMN, + "name": { + "type": "freestyle_string", + "required": False + }, "habitat": { "type": "ratio", "about": gettext( @@ -303,10 +309,10 @@ "bands": {1: {"type": "integer"}} } } - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } # All out rasters besides rarity should be gte to 0. Set nodata accordingly. @@ -371,28 +377,23 @@ def execute(args): args['workspace_dir'], 'intermediate') utils.make_directories([intermediate_output_dir, output_dir]) - taskgraph_working_dir = os.path.join( - intermediate_output_dir, '_taskgraph_working_dir') - n_workers = int(args.get('n_workers', -1)) - task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) LOGGER.info("Checking Threat and Sensitivity tables for compliance") # Get CSVs as dictionaries and ensure the key is a string for threats. - threat_dict = { - str(key): value for key, value in utils.read_csv_to_dataframe( - args['threats_table_path'], 'THREAT', - expand_path_cols=['cur_path', 'fut_path', 'base_path'] - ).to_dict(orient='index').items()} - sensitivity_dict = utils.read_csv_to_dataframe( - args['sensitivity_table_path'], 'LULC').to_dict(orient='index') + threat_df = utils.read_csv_to_dataframe( + args['threats_table_path'], MODEL_SPEC['args']['threats_table_path'] + ).fillna('') + sensitivity_df = utils.read_csv_to_dataframe( + args['sensitivity_table_path'], + MODEL_SPEC['args']['sensitivity_table_path']) half_saturation_constant = float(args['half_saturation_constant']) # Dictionary for reclassing habitat values - sensitivity_reclassify_habitat_dict = { - int(key): float(val['habitat']) for key, val in - sensitivity_dict.items()} + sensitivity_reclassify_habitat_dict = sensitivity_df['habitat'].to_dict() # declare dictionaries to store the land cover and the threat rasters # pertaining to the different threats @@ -419,13 +420,12 @@ def execute(args): # for each threat given in the CSV file try opening the associated # raster which should be found relative to the Threat CSV - for threat in threat_dict: + for threat, row in threat_df.iterrows(): LOGGER.debug(f"Validating path for threat: {threat}") threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key] - threat_path = threat_dict[threat][threat_table_path_col] threat_validate_result = _validate_threat_path( - threat_path, lulc_key) + row[threat_table_path_col], lulc_key) if threat_validate_result == 'error': raise ValueError( 'There was an Error locating a threat raster from ' @@ -516,7 +516,7 @@ def execute(args): intermediate_output_dir, (f'{os.path.splitext(os.path.basename(lulc_path))[0]}' f'_aligned{file_suffix}.tif')) - for threat in threat_dict: + for threat in threat_df.index.values: threat_path = threat_path_dict['threat' + lulc_key][threat] if threat_path in lulc_and_threat_raster_list: aligned_threat_path = os.path.join( @@ -578,10 +578,7 @@ def execute(args): access_task_list.append(rasterize_access_task) # calculate the weight sum which is the sum of all the threats' weights - weight_sum = 0.0 - for threat_data in threat_dict.values(): - # Sum weight of threats - weight_sum = weight_sum + threat_data['weight'] + weight_sum = threat_df['weight'].sum() # for each land cover raster provided compute habitat quality for lulc_key, lulc_path in lulc_path_dict.items(): @@ -618,9 +615,9 @@ def execute(args): exit_landcover = False # adjust each threat/threat raster for distance, weight, and access - for threat, threat_data in threat_dict.items(): + for threat, row in threat_df.iterrows(): LOGGER.debug( - f'Calculating threat: {threat}.\nThreat data: {threat_data}') + f'Calculating threat: {threat}.\nThreat data: {row}') # get the threat raster for the specific threat threat_raster_path = threat_path_dict['threat' + lulc_key][threat] @@ -634,7 +631,7 @@ def execute(args): exit_landcover = True break # Check to make sure max_dist is greater than 0 - if threat_data['max_dist'] <= 0.0: + if row['max_dist'] <= 0: raise ValueError( f"The max distance for threat: '{threat}' is less than" " or equal to 0. MAX_DIST should be a positive value.") @@ -650,17 +647,15 @@ def execute(args): dependent_task_list=[align_task], task_name=f'distance edt {lulc_key} {threat}') - decay_type = threat_data['decay'] - filtered_threat_raster_path = os.path.join( intermediate_output_dir, - f'filtered_{decay_type}_{threat}{lulc_key}{file_suffix}.tif') + f'filtered_{row["decay"]}_{threat}{lulc_key}{file_suffix}.tif') dist_decay_task = task_graph.add_task( func=_decay_distance, args=( - distance_raster_path, threat_data['max_dist'], - decay_type, filtered_threat_raster_path), + distance_raster_path, row['max_dist'], + row['decay'], filtered_threat_raster_path), target_path_list=[filtered_threat_raster_path], dependent_task_list=[dist_edt_task], task_name=f'distance decay {lulc_key} {threat}') @@ -672,9 +667,7 @@ def execute(args): f'sens_{threat}{lulc_key}{file_suffix}.tif') # Dictionary for reclassing threat sensitivity values - sensitivity_reclassify_threat_dict = { - int(key): float(val[threat]) for key, val in - sensitivity_dict.items()} + sensitivity_reclassify_threat_dict = sensitivity_df[threat].to_dict() reclass_error_details = { 'raster_name': 'LULC', 'column_name': 'lucode', @@ -686,11 +679,11 @@ def execute(args): reclass_error_details), target_path_list=[sens_raster_path], dependent_task_list=[align_task], - task_name=f'sens_raster_{decay_type}{lulc_key}_{threat}') + task_name=f'sens_raster_{row["decay"]}{lulc_key}_{threat}') sensitivity_task_list.append(sens_threat_task) # get the normalized weight for each threat - weight_avg = threat_data['weight'] / weight_sum + weight_avg = row['weight'] / weight_sum # add the threat raster adjusted by distance and the raster # representing sensitivity to the list to be past to @@ -724,7 +717,7 @@ def execute(args): dependent_task_list=[ *threat_decay_task_list, *sensitivity_task_list, *access_task_list], - task_name=f'tot_degradation_{decay_type}{lulc_key}_{threat}') + task_name=f'tot_degradation_{row["decay"]}{lulc_key}_{threat}') # Compute habitat quality # ksq: a term used below to compute habitat quality @@ -1154,20 +1147,18 @@ def validate(args, limit_to=None): if ("threats_table_path" not in invalid_keys and "sensitivity_table_path" not in invalid_keys and "threat_raster_folder" not in invalid_keys): - # Get CSVs as dictionaries and ensure the key is a string for threats. - threat_dict = { - str(key): value for key, value in utils.read_csv_to_dataframe( - args['threats_table_path'], 'THREAT', - expand_path_cols=['cur_path', 'fut_path', 'base_path'] - ).to_dict(orient='index').items()} - sensitivity_dict = utils.read_csv_to_dataframe( - args['sensitivity_table_path'], 'LULC').to_dict(orient='index') + threat_df = utils.read_csv_to_dataframe( + args['threats_table_path'], + MODEL_SPEC['args']['threats_table_path']).fillna('') + sensitivity_df = utils.read_csv_to_dataframe( + args['sensitivity_table_path'], + MODEL_SPEC['args']['sensitivity_table_path']) # check that the threat names in the threats table match with the # threats columns in the sensitivity table. - sens_header_set = set(list(sensitivity_dict.values())[0]) - threat_set = {threat for threat in threat_dict} + sens_header_set = set(sensitivity_df.columns) + threat_set = set(threat_df.index.values) missing_sens_header_set = threat_set.difference(sens_header_set) if missing_sens_header_set: @@ -1191,14 +1182,14 @@ def validate(args, limit_to=None): # for each threat given in the CSV file try opening the # associated raster which should be found in # threat_raster_folder - for threat in threat_dict: + for threat, row in threat_df.iterrows(): threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key] - if threat_table_path_col not in threat_dict[threat]: + if threat_table_path_col not in row: bad_threat_columns.append(threat_table_path_col) break # Threat path from threat CSV is relative to CSV - threat_path = threat_dict[threat][threat_table_path_col] + threat_path = row[threat_table_path_col] threat_validate_result = _validate_threat_path( threat_path, lulc_key) diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py index b21d9f18e0..b5087a2483 100644 --- a/src/natcap/invest/hra.py +++ b/src/natcap/invest/hra.py @@ -66,6 +66,7 @@ "name": gettext("habitat stressor table"), "about": gettext("A table describing each habitat and stressor."), "type": "csv", + "index_col": "name", "columns": { "name": { "type": "freestyle_string", @@ -437,7 +438,7 @@ } } }, - ".taskgraph": spec_utils.TASKGRAPH_DIR + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -496,7 +497,6 @@ def execute(args): intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') output_dir = os.path.join(args['workspace_dir'], 'outputs') - taskgraph_working_dir = os.path.join(args['workspace_dir'], '.taskgraph') utils.make_directories([intermediate_dir, output_dir]) suffix = utils.make_suffix_string(args, 'results_suffix') @@ -526,7 +526,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # parse the info table and get info dicts for habitats, stressors. habitats_info, stressors_info = _parse_info_table(args['info_table_path']) @@ -1584,7 +1585,7 @@ def _align(raster_path_map, vector_path_map, target_pixel_size, layer = None vector = None - _create_raster_from_bounding_box( + pygeoprocessing.create_raster_from_bounding_box( target_raster_path=target_raster_path, target_bounding_box=target_bounding_box, target_pixel_size=target_pixel_size, @@ -1599,74 +1600,6 @@ def _align(raster_path_map, vector_path_map, target_pixel_size, burn_values=burn_values, option_list=rasterize_option_list) -def _create_raster_from_bounding_box( - target_raster_path, target_bounding_box, target_pixel_size, - target_pixel_type, target_srs_wkt, target_nodata=None, - fill_value=None): - """Create a raster from a given bounding box. - - Args: - target_raster_path (string): The path to where the new raster should be - created on disk. - target_bounding_box (tuple): a 4-element iterable of (minx, miny, - maxx, maxy) in projected units matching the SRS of - ``target_srs_wkt``. - target_pixel_size (tuple): A 2-element tuple of the (x, y) pixel size - of the target raster. Elements are in units of the target SRS. - target_pixel_type (int): The GDAL GDT_* type of the target raster. - target_srs_wkt (string): The SRS of the target raster, in Well-Known - Text format. - target_nodata (float): If provided, the nodata value of the target - raster. - fill_value=None (number): If provided, the value that the target raster - should be filled with. - - Returns: - ``None`` - """ - bbox_minx, bbox_miny, bbox_maxx, bbox_maxy = target_bounding_box - - driver = gdal.GetDriverByName('GTiff') - n_bands = 1 - n_cols = int(numpy.ceil( - abs((bbox_maxx - bbox_minx) / target_pixel_size[0]))) - n_rows = int(numpy.ceil( - abs((bbox_maxy - bbox_miny) / target_pixel_size[1]))) - - raster = driver.Create( - target_raster_path, n_cols, n_rows, n_bands, target_pixel_type, - options=['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE', - 'BLOCKXSIZE=256', 'BLOCKYSIZE=256']) - raster.SetProjection(target_srs_wkt) - - # Set the transform based on the upper left corner and given pixel - # dimensions. Bounding box is in format [minx, miny, maxx, maxy] - if target_pixel_size[0] < 0: - x_source = bbox_maxx - else: - x_source = bbox_minx - if target_pixel_size[1] < 0: - y_source = bbox_maxy - else: - y_source = bbox_miny - raster_transform = [ - x_source, target_pixel_size[0], 0.0, - y_source, 0.0, target_pixel_size[1]] - raster.SetGeoTransform(raster_transform) - - # Fill the band if requested. - band = raster.GetRasterBand(1) - if fill_value is not None: - band.Fill(fill_value) - - # Set the nodata value. - if target_nodata is not None: - band.SetNoDataValue(float(target_nodata)) - - band = None - raster = None - - def _simplify(source_vector_path, tolerance, target_vector_path, preserve_columns=None): """Simplify a geometry to a given tolerance. @@ -1841,12 +1774,15 @@ def _open_table_as_dataframe(table_path, **kwargs): excel_df = pandas.read_excel(table_path, **kwargs) excel_df.columns = excel_df.columns.str.lower() excel_df['path'] = excel_df['path'].apply( - lambda p: utils.expand_path(p, table_path)) + lambda p: utils.expand_path(p, table_path)).astype('string') + excel_df['name'] = excel_df['name'].astype('string') + excel_df['type'] = excel_df['type'].astype('string') + excel_df['stressor buffer (meters)'] = excel_df['stressor buffer (meters)'].astype(float) + excel_df = excel_df.set_index('name') return excel_df else: return utils.read_csv_to_dataframe( - table_path, convert_vals_to_lower=False, - expand_path_cols=['path'], **kwargs) + table_path, MODEL_SPEC['args']['info_table_path'], **kwargs) def _parse_info_table(info_table_path): @@ -1871,8 +1807,12 @@ def _parse_info_table(info_table_path): """ info_table_path = os.path.abspath(info_table_path) - table = _open_table_as_dataframe(info_table_path) - table = table.set_index('name') + try: + table = _open_table_as_dataframe(info_table_path) + except ValueError as err: + if 'Index has duplicate keys' in str(err): + raise ValueError("Habitat and stressor names may not overlap.") + table = table.rename(columns={'stressor buffer (meters)': 'buffer'}) # Drop the buffer column from the habitats list; we don't need it. @@ -1883,15 +1823,6 @@ def _parse_info_table(info_table_path): stressors = table.loc[table['type'] == 'stressor'].drop( columns=['type']).to_dict(orient='index') - # habitats and stressors must be nonoverlapping sets. - repeated_habitats_stressors = set( - habitats.keys()).intersection(stressors.keys()) - if repeated_habitats_stressors: - raise ValueError( - "Habitat and stressor names may not overlap. These names are " - "both habitats and stressors: " - f"{', '.join(repeated_habitats_stressors)}") - return (habitats, stressors) diff --git a/src/natcap/invest/ndr/ndr.py b/src/natcap/invest/ndr/ndr.py index fd8d39353b..79760f728b 100644 --- a/src/natcap/invest/ndr/ndr.py +++ b/src/natcap/invest/ndr/ndr.py @@ -73,6 +73,7 @@ }, "biophysical_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "load_[NUTRIENT]": { # nitrogen or phosphorus nutrient loads @@ -373,53 +374,60 @@ "type": "integer" }} }, - "cache_dir": { - "type": "directory", - "contents": { - "aligned_dem.tif": { - "about": "Copy of the DEM clipped to the extent of the other inputs", - "bands": {1: {"type": "number", "units": u.meter}} - }, - "aligned_lulc.tif": { - "about": ( - "Copy of the LULC clipped to the extent of the other inputs " - "and reprojected to the DEM projection"), - "bands": {1: {"type": "integer"}} - }, - "aligned_runoff_proxy.tif": { - "about": ( - "Copy of the runoff proxy clipped to the extent of the other inputs " - "and reprojected to the DEM projection"), - "bands": {1: {"type": "number", "units": u.none}} - }, - "filled_dem.tif": spec_utils.FILLED_DEM, - "slope.tif": spec_utils.SLOPE, - "subsurface_export_n.pickle": { - "about": "Pickled zonal statistics of nitrogen subsurface export" - }, - "subsurface_load_n.pickle": { - "about": "Pickled zonal statistics of nitrogen subsurface load" - }, - "surface_export_n.pickle": { - "about": "Pickled zonal statistics of nitrogen surface export" - }, - "surface_export_p.pickle": { - "about": "Pickled zonal statistics of phosphorus surface export" - }, - "surface_load_n.pickle": { - "about": "Pickled zonal statistics of nitrogen surface load" - }, - "surface_load_p.pickle": { - "about": "Pickled zonal statistics of phosphorus surface load" - }, - "total_export_n.pickle": { - "about": "Pickled zonal statistics of total nitrogen export" - }, - "taskgraph.db": {} - } + "aligned_dem.tif": { + "about": "Copy of the DEM clipped to the extent of the other inputs", + "bands": {1: {"type": "number", "units": u.meter}} + }, + "aligned_lulc.tif": { + "about": ( + "Copy of the LULC clipped to the extent of the other inputs " + "and reprojected to the DEM projection"), + "bands": {1: {"type": "integer"}} + }, + "aligned_runoff_proxy.tif": { + "about": ( + "Copy of the runoff proxy clipped to the extent of the other inputs " + "and reprojected to the DEM projection"), + "bands": {1: {"type": "number", "units": u.none}} + }, + "masked_dem.tif": { + "about": "DEM input masked to exclude pixels outside the watershed", + "bands": {1: {"type": "number", "units": u.meter}} + }, + "masked_lulc.tif": { + "about": "LULC input masked to exclude pixels outside the watershed", + "bands": {1: {"type": "integer"}} + }, + "masked_runoff_proxy.tif": { + "about": "Runoff proxy input masked to exclude pixels outside the watershed", + "bands": {1: {"type": "number", "units": u.none}} + }, + "filled_dem.tif": spec_utils.FILLED_DEM, + "slope.tif": spec_utils.SLOPE, + "subsurface_export_n.pickle": { + "about": "Pickled zonal statistics of nitrogen subsurface export" + }, + "subsurface_load_n.pickle": { + "about": "Pickled zonal statistics of nitrogen subsurface load" + }, + "surface_export_n.pickle": { + "about": "Pickled zonal statistics of nitrogen surface export" + }, + "surface_export_p.pickle": { + "about": "Pickled zonal statistics of phosphorus surface export" + }, + "surface_load_n.pickle": { + "about": "Pickled zonal statistics of nitrogen surface load" + }, + "surface_load_p.pickle": { + "about": "Pickled zonal statistics of phosphorus surface load" + }, + "total_export_n.pickle": { + "about": "Pickled zonal statistics of total nitrogen export" } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -463,14 +471,14 @@ 'thresholded_slope_path': 'thresholded_slope.tif', 'dist_to_channel_path': 'dist_to_channel.tif', 'drainage_mask': 'what_drains_to_stream.tif', -} - -_CACHE_BASE_FILES = { 'filled_dem_path': 'filled_dem.tif', 'aligned_dem_path': 'aligned_dem.tif', + 'masked_dem_path': 'masked_dem.tif', 'slope_path': 'slope.tif', 'aligned_lulc_path': 'aligned_lulc.tif', + 'masked_lulc_path': 'masked_lulc.tif', 'aligned_runoff_proxy_path': 'aligned_runoff_proxy.tif', + 'masked_runoff_proxy_path': 'masked_runoff_proxy.tif', 'surface_load_n_pickle_path': 'surface_load_n.pickle', 'surface_load_p_pickle_path': 'surface_load_p.pickle', 'subsurface_load_n_pickle_path': 'subsurface_load_n.pickle', @@ -537,14 +545,14 @@ def execute(args): None """ - def _validate_inputs(nutrients_to_process, lucode_to_parameters): + def _validate_inputs(nutrients_to_process, biophysical_df): """Validate common errors in inputs. Args: nutrients_to_process (list): list of 'n' and/or 'p' - lucode_to_parameters (dictionary): biophysical input table mapping - lucode to dictionary of table parameters. Used to validate - the correct columns are input + biophysical_df (pandas.DataFrame): dataframe representation of + the input biophysical table. Used to validate the correct + columns are input Returns: None @@ -564,7 +572,7 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): # is missing. row_header_table_list = [] - lu_parameter_row = list(lucode_to_parameters.values())[0] + lu_parameter_row = biophysical_df.columns.to_list() row_header_table_list.append( (lu_parameter_row, ['load_', 'eff_', 'crit_len_'], args['biophysical_table_path'])) @@ -594,8 +602,7 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): output_dir = os.path.join(args['workspace_dir']) intermediate_output_dir = os.path.join( args['workspace_dir'], INTERMEDIATE_DIR_NAME) - cache_dir = os.path.join(intermediate_output_dir, 'cache_dir') - utils.make_directories([output_dir, intermediate_output_dir, cache_dir]) + utils.make_directories([output_dir, intermediate_output_dir]) try: n_workers = int(args['n_workers']) @@ -605,13 +612,13 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( - cache_dir, n_workers, reporting_interval=5.0) + os.path.join(args['workspace_dir'], 'taskgraph_cache'), + n_workers, reporting_interval=5.0) file_suffix = utils.make_suffix_string(args, 'results_suffix') f_reg = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), - (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), - (_CACHE_BASE_FILES, cache_dir)], file_suffix) + (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix) # Build up a list of nutrients to process based on what's checked on nutrients_to_process = [] @@ -619,10 +626,11 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): if args['calc_' + nutrient_id]: nutrients_to_process.append(nutrient_id) - lucode_to_parameters = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], + MODEL_SPEC['args']['biophysical_table_path']) - _validate_inputs(nutrients_to_process, lucode_to_parameters) + _validate_inputs(nutrients_to_process, biophysical_df) # these are used for aggregation in the last step field_pickle_map = {} @@ -646,18 +654,64 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): base_raster_list, aligned_raster_list, ['near']*len(base_raster_list), dem_info['pixel_size'], 'intersection'), - kwargs={ - 'base_vector_path_list': [args['watersheds_path']], - 'vector_mask_options': { - 'mask_vector_path': args['watersheds_path']}}, + kwargs={'base_vector_path_list': [args['watersheds_path']]}, target_path_list=aligned_raster_list, task_name='align rasters') + # Use the cutline feature of gdal.Warp to mask pixels outside the watershed + # it's possible that the DEM, LULC, or runoff proxy inputs might have an + # undefined nodata value. since we're introducing nodata pixels, set a nodata + # value if one is not already defined. + rp_nodata = pygeoprocessing.get_raster_info( + f_reg['aligned_runoff_proxy_path'])['nodata'][0] + mask_runoff_proxy_task = task_graph.add_task( + func=gdal.Warp, + kwargs={ + 'destNameOrDestDS': f_reg['masked_runoff_proxy_path'], + 'srcDSOrSrcDSTab': f_reg['aligned_runoff_proxy_path'], + 'dstNodata': _TARGET_NODATA if rp_nodata is None else rp_nodata, + 'cutlineDSName': args['watersheds_path']}, + dependent_task_list=[align_raster_task], + target_path_list=[f_reg['masked_runoff_proxy_path']], + task_name='mask runoff proxy raster') + + dem_nodata = pygeoprocessing.get_raster_info( + f_reg['aligned_dem_path'])['nodata'][0] + dem_target_nodata = float( # GDAL expects a python float, not numpy.float32 + numpy.finfo(numpy.float32).min if dem_nodata is None else dem_nodata) + mask_dem_task = task_graph.add_task( + func=gdal.Warp, + kwargs={ + 'destNameOrDestDS': f_reg['masked_dem_path'], + 'srcDSOrSrcDSTab': f_reg['aligned_dem_path'], + 'outputType': gdal.GDT_Float32, + 'dstNodata': dem_target_nodata, + 'cutlineDSName': args['watersheds_path']}, + dependent_task_list=[align_raster_task], + target_path_list=[f_reg['masked_dem_path']], + task_name='mask dem raster') + + lulc_nodata = pygeoprocessing.get_raster_info( + f_reg['aligned_lulc_path'])['nodata'][0] + lulc_target_nodata = ( + numpy.iinfo(numpy.int32).min if lulc_nodata is None else lulc_nodata) + mask_lulc_task = task_graph.add_task( + func=gdal.Warp, + kwargs={ + 'destNameOrDestDS': f_reg['masked_lulc_path'], + 'srcDSOrSrcDSTab': f_reg['aligned_lulc_path'], + 'outputType': gdal.GDT_Int32, + 'dstNodata': lulc_target_nodata, + 'cutlineDSName': args['watersheds_path']}, + dependent_task_list=[align_raster_task], + target_path_list=[f_reg['masked_lulc_path']], + task_name='mask lulc raster') + fill_pits_task = task_graph.add_task( func=pygeoprocessing.routing.fill_pits, args=( - (f_reg['aligned_dem_path'], 1), f_reg['filled_dem_path']), - kwargs={'working_dir': cache_dir}, + (f_reg['masked_dem_path'], 1), f_reg['filled_dem_path']), + kwargs={'working_dir': intermediate_output_dir}, dependent_task_list=[align_raster_task], target_path_list=[f_reg['filled_dem_path']], task_name='fill pits') @@ -666,7 +720,7 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): func=pygeoprocessing.routing.flow_dir_mfd, args=( (f_reg['filled_dem_path'], 1), f_reg['flow_direction_path']), - kwargs={'working_dir': cache_dir}, + kwargs={'working_dir': intermediate_output_dir}, dependent_task_list=[fill_pits_task], target_path_list=[f_reg['flow_direction_path']], task_name='flow dir') @@ -707,7 +761,7 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): runoff_proxy_index_task = task_graph.add_task( func=_normalize_raster, - args=((f_reg['aligned_runoff_proxy_path'], 1), + args=((f_reg['masked_runoff_proxy_path'], 1), f_reg['runoff_proxy_index_path']), target_path_list=[f_reg['runoff_proxy_index_path']], dependent_task_list=[align_raster_task], @@ -790,14 +844,16 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): # Perrine says that 'n' is the only case where we could consider a # prop subsurface component. So there's a special case for that. if nutrient == 'n': - subsurface_proportion_type = 'proportion_subsurface_n' + subsurface_proportion_map = ( + biophysical_df['proportion_subsurface_n'].to_dict()) else: - subsurface_proportion_type = None + subsurface_proportion_map = None load_task = task_graph.add_task( func=_calculate_load, args=( - f_reg['aligned_lulc_path'], lucode_to_parameters, - f'load_{nutrient}', load_path), + f_reg['masked_lulc_path'], + biophysical_df[f'load_{nutrient}'], + load_path), dependent_task_list=[align_raster_task], target_path_list=[load_path], task_name=f'{nutrient} load') @@ -813,9 +869,8 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): surface_load_path = f_reg[f'surface_load_{nutrient}_path'] surface_load_task = task_graph.add_task( func=_map_surface_load, - args=(modified_load_path, f_reg['aligned_lulc_path'], - lucode_to_parameters, subsurface_proportion_type, - surface_load_path), + args=(modified_load_path, f_reg['masked_lulc_path'], + subsurface_proportion_map, surface_load_path), target_path_list=[surface_load_path], dependent_task_list=[modified_load_task, align_raster_task], task_name=f'map surface load {nutrient}') @@ -824,8 +879,8 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): eff_task = task_graph.add_task( func=_map_lulc_to_val_mask_stream, args=( - f_reg['aligned_lulc_path'], f_reg['stream_path'], - lucode_to_parameters, f'eff_{nutrient}', eff_path), + f_reg['masked_lulc_path'], f_reg['stream_path'], + biophysical_df[f'eff_{nutrient}'].to_dict(), eff_path), target_path_list=[eff_path], dependent_task_list=[align_raster_task, stream_extraction_task], task_name=f'ret eff {nutrient}') @@ -834,8 +889,9 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): crit_len_task = task_graph.add_task( func=_map_lulc_to_val_mask_stream, args=( - f_reg['aligned_lulc_path'], f_reg['stream_path'], - lucode_to_parameters, f'crit_len_{nutrient}', crit_len_path), + f_reg['masked_lulc_path'], f_reg['stream_path'], + biophysical_df[f'crit_len_{nutrient}'].to_dict(), + crit_len_path), target_path_list=[crit_len_path], dependent_task_list=[align_raster_task, stream_extraction_task], task_name=f'ret eff {nutrient}') @@ -879,12 +935,11 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters): # only calculate subsurface things for nitrogen if nutrient == 'n': - proportion_subsurface_map = { - lucode: params['proportion_subsurface_n'] - for lucode, params in lucode_to_parameters.items()} + proportion_subsurface_map = ( + biophysical_df['proportion_subsurface_n'].to_dict()) subsurface_load_task = task_graph.add_task( func=_map_subsurface_load, - args=(modified_load_path, f_reg['aligned_lulc_path'], + args=(modified_load_path, f_reg['masked_lulc_path'], proportion_subsurface_map, f_reg['sub_load_n_path']), target_path_list=[f_reg['sub_load_n_path']], dependent_task_list=[modified_load_task, align_raster_task], @@ -1174,18 +1229,13 @@ def _normalize_raster_op(array): target_nodata) -def _calculate_load( - lulc_raster_path, lucode_to_parameters, load_type, - target_load_raster): +def _calculate_load(lulc_raster_path, lucode_to_load, target_load_raster): """Calculate load raster by mapping landcover and multiplying by area. Args: lulc_raster_path (string): path to integer landcover raster. - lucode_to_parameters (dict): a mapping of landcover IDs to a - dictionary indexed by the value of `load_{load_type}` that - represents a per-area nutrient load. - load_type (string): represent nutrient to map, either 'load_n' or - 'load_p'. + lucode_to_load (dict): a mapping of landcover IDs to per-area + nutrient load. target_load_raster (string): path to target raster that will have total load per pixel. @@ -1205,8 +1255,7 @@ def _map_load_op(lucode_array): if lucode != nodata_landuse: try: result[lucode_array == lucode] = ( - lucode_to_parameters[lucode][load_type] * - cell_area_ha) + lucode_to_load[lucode] * cell_area_ha) except KeyError: raise KeyError( 'lucode: %d is present in the landuse raster but ' @@ -1290,18 +1339,17 @@ def _sum_op(*array_list): def _map_surface_load( - modified_load_path, lulc_raster_path, lucode_to_parameters, - subsurface_proportion_type, target_surface_load_path): + modified_load_path, lulc_raster_path, lucode_to_subsurface_proportion, + target_surface_load_path): """Calculate surface load from landcover raster. Args: modified_load_path (string): path to modified load raster with units of kg/pixel. lulc_raster_path (string): path to landcover raster. - lucode_to_parameters (dict): maps landcover codes to a dictionary that - can be indexed by `subsurface_proportion_type`. - subsurface_proportion_type (string): if None no subsurface transfer - is mapped. Otherwise indexed from lucode_to_parameters. + lucode_to_subsurface_proportion (dict): maps landcover codes to + subsurface proportion values. Or if None, no subsurface transfer + is mapped. target_surface_load_path (string): path to target raster. Returns: @@ -1311,16 +1359,15 @@ def _map_surface_load( lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path) nodata_landuse = lulc_raster_info['nodata'][0] - keys = sorted(numpy.array(list(lucode_to_parameters))) - if subsurface_proportion_type is not None: + if lucode_to_subsurface_proportion is not None: + keys = sorted(lucode_to_subsurface_proportion.keys()) subsurface_values = numpy.array( - [lucode_to_parameters[x][subsurface_proportion_type] - for x in keys]) + [lucode_to_subsurface_proportion[x] for x in keys]) def _map_surface_load_op(lucode_array, modified_load_array): """Convert unit load to total load & handle nodata.""" # If we don't have subsurface, just return 0.0. - if subsurface_proportion_type is None: + if lucode_to_subsurface_proportion is None: return numpy.where( ~utils.array_equals_nodata(lucode_array, nodata_landuse), modified_load_array, _TARGET_NODATA) @@ -1382,17 +1429,13 @@ def _map_subsurface_load_op(lucode_array, modified_load_array): def _map_lulc_to_val_mask_stream( - lulc_raster_path, stream_path, lucode_to_parameters, map_id, - target_eff_path): + lulc_raster_path, stream_path, lucodes_to_vals, target_eff_path): """Make retention efficiency raster from landcover. Args: lulc_raster_path (string): path to landcover raster. stream_path (string) path to stream layer 0, no stream 1 stream. - lucode_to_parameters (dict) mapping of landcover code to a dictionary - that contains the key in `map_id` - map_id (string): the id in the lookup table with values to map - landcover to efficiency. + lucodes_to_val (dict) mapping of landcover codes to values target_eff_path (string): target raster that contains the mapping of landcover codes to retention efficiency values except where there is a stream in which case the retention efficiency is 0. @@ -1401,9 +1444,8 @@ def _map_lulc_to_val_mask_stream( None. """ - keys = sorted(numpy.array(list(lucode_to_parameters))) - values = numpy.array( - [lucode_to_parameters[x][map_id] for x in keys]) + lucodes = sorted(lucodes_to_vals.keys()) + values = numpy.array([lucodes_to_vals[x] for x in lucodes]) nodata_landuse = pygeoprocessing.get_raster_info( lulc_raster_path)['nodata'][0] @@ -1417,7 +1459,7 @@ def _map_eff_op(lucode_array, stream_array): result = numpy.empty(valid_mask.shape, dtype=numpy.float32) result[:] = _TARGET_NODATA index = numpy.digitize( - lucode_array[valid_mask].ravel(), keys, right=True) + lucode_array[valid_mask].ravel(), lucodes, right=True) result[valid_mask] = ( values[index] * (1 - stream_array[valid_mask])) return result diff --git a/src/natcap/invest/pollination.py b/src/natcap/invest/pollination.py index 77aff70095..adb5d851eb 100644 --- a/src/natcap/invest/pollination.py +++ b/src/natcap/invest/pollination.py @@ -39,6 +39,7 @@ }, "guild_table_path": { "type": "csv", + "index_col": "species", "columns": { "species": { "type": "freestyle_string", @@ -87,6 +88,7 @@ }, "landcover_biophysical_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "nesting_[SUBSTRATE]_availability_index": { @@ -309,10 +311,10 @@ "about": "Farm vector reprojected to the LULC projection", "fields": {}, "geometries": spec_utils.POLYGONS - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -322,7 +324,7 @@ _NESTING_SUBSTRATE_PATTERN = 'nesting_([^_]+)_availability_index' _FLORAL_RESOURCES_AVAILABLE_PATTERN = 'floral_resources_([^_]+)_index' _EXPECTED_BIOPHYSICAL_HEADERS = [ - 'lucode', _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN] + _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN] # These are patterns expected in the guilds table _NESTING_SUITABILITY_PATTERN = 'nesting_suitability_([^_]+)_index' @@ -332,7 +334,7 @@ _RELATIVE_SPECIES_ABUNDANCE_FIELD = 'relative_abundance' _ALPHA_HEADER = 'alpha' _EXPECTED_GUILD_HEADERS = [ - 'species', _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN, + _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN, _ALPHA_HEADER, _RELATIVE_SPECIES_ABUNDANCE_FIELD] _NESTING_SUBSTRATE_INDEX_FILEPATTERN = 'nesting_substrate_index_%s%s.tif' @@ -502,8 +504,6 @@ def execute(args): # create initial working directories and determine file suffixes intermediate_output_dir = os.path.join( args['workspace_dir'], 'intermediate_outputs') - work_token_dir = os.path.join( - intermediate_output_dir, '_taskgraph_working_dir') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, intermediate_output_dir]) @@ -532,7 +532,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. - task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) if farm_vector_path is not None: # ensure farm vector is in the same projection as the landcover map @@ -718,6 +719,7 @@ def execute(args): pollinator_abundance_task_map = {} floral_resources_index_path_map = {} floral_resources_index_task_map = {} + alpha_kernel_map = {} for species in scenario_variables['species_list']: # calculate foraging_effectiveness[species] # FE(x, s) = sum_j [RA(l(x), j) * fa(s, j)] @@ -762,11 +764,17 @@ def execute(args): intermediate_output_dir, _KERNEL_FILE_PATTERN % ( alpha, file_suffix)) - alpha_kernel_raster_task = task_graph.add_task( - task_name=f'decay_kernel_raster_{alpha}', - func=utils.exponential_decay_kernel_raster, - args=(alpha, kernel_path), - target_path_list=[kernel_path]) + # to avoid creating duplicate kernel rasters check to see if an + # adequate kernel task has already been submitted + try: + alpha_kernel_raster_task = alpha_kernel_map[kernel_path] + except: + alpha_kernel_raster_task = task_graph.add_task( + task_name=f'decay_kernel_raster_{alpha}', + func=utils.exponential_decay_kernel_raster, + args=(alpha, kernel_path), + target_path_list=[kernel_path]) + alpha_kernel_map[kernel_path] = alpha_kernel_raster_task # convolve FE with alpha_s floral_resources_index_path = os.path.join( @@ -1179,23 +1187,22 @@ def _parse_scenario_variables(args): else: farm_vector_path = None - guild_table = utils.read_csv_to_dataframe( - guild_table_path, 'species').to_dict(orient='index') + guild_df = utils.read_csv_to_dataframe( + guild_table_path, MODEL_SPEC['args']['guild_table_path']) LOGGER.info('Checking to make sure guild table has all expected headers') - guild_headers = list(guild_table.values())[0].keys() for header in _EXPECTED_GUILD_HEADERS: - matches = re.findall(header, " ".join(guild_headers)) + matches = re.findall(header, " ".join(guild_df.columns)) if len(matches) == 0: raise ValueError( "Expected a header in guild table that matched the pattern " f"'{header}' but was unable to find one. Here are all the " - f"headers from {guild_table_path}: {', '.join(guild_headers)}") + f"headers from {guild_table_path}: {', '.join(guild_df.columns)}") - landcover_biophysical_table = utils.read_csv_to_dataframe( - landcover_biophysical_table_path, 'lucode').to_dict(orient='index') - biophysical_table_headers = ( - list(landcover_biophysical_table.values())[0].keys()) + landcover_biophysical_df = utils.read_csv_to_dataframe( + landcover_biophysical_table_path, + MODEL_SPEC['args']['landcover_biophysical_table_path']) + biophysical_table_headers = landcover_biophysical_df.columns for header in _EXPECTED_BIOPHYSICAL_HEADERS: matches = re.findall(header, " ".join(biophysical_table_headers)) if len(matches) == 0: @@ -1211,7 +1218,7 @@ def _parse_scenario_variables(args): # this dict to dict will map substrate types to guild/biophysical headers # ex substrate_to_header['cavity']['biophysical'] substrate_to_header = collections.defaultdict(dict) - for header in guild_headers: + for header in guild_df.columns: match = re.match(_FORAGING_ACTIVITY_RE_PATTERN, header) if match: season = match.group(1) @@ -1297,55 +1304,48 @@ def _parse_scenario_variables(args): # * substrate_list (list of string) result['substrate_list'] = sorted(substrate_to_header) # * species_list (list of string) - result['species_list'] = sorted(guild_table) + result['species_list'] = sorted(guild_df.index) result['alpha_value'] = dict() for species in result['species_list']: - result['alpha_value'][species] = float( - guild_table[species][_ALPHA_HEADER]) + result['alpha_value'][species] = guild_df[_ALPHA_HEADER][species] # * species_abundance[species] (string->float) - total_relative_abundance = numpy.sum([ - guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD] - for species in result['species_list']]) + total_relative_abundance = guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD].sum() result['species_abundance'] = {} for species in result['species_list']: result['species_abundance'][species] = ( - guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD] / - float(total_relative_abundance)) + guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD][species] / + total_relative_abundance) # map the relative foraging activity of a species during a certain season # (species, season) result['species_foraging_activity'] = dict() for species in result['species_list']: total_activity = numpy.sum([ - guild_table[species][_FORAGING_ACTIVITY_PATTERN % season] + guild_df[_FORAGING_ACTIVITY_PATTERN % season][species] for season in result['season_list']]) for season in result['season_list']: result['species_foraging_activity'][(species, season)] = ( - guild_table[species][_FORAGING_ACTIVITY_PATTERN % season] / - float(total_activity)) + guild_df[_FORAGING_ACTIVITY_PATTERN % season][species] / + total_activity) # * landcover_substrate_index[substrate][landcover] (float) result['landcover_substrate_index'] = collections.defaultdict(dict) - for raw_landcover_id in landcover_biophysical_table: - landcover_id = int(raw_landcover_id) + for landcover_id, row in landcover_biophysical_df.iterrows(): for substrate in result['substrate_list']: substrate_biophysical_header = ( substrate_to_header[substrate]['biophysical']) result['landcover_substrate_index'][substrate][landcover_id] = ( - landcover_biophysical_table[landcover_id][ - substrate_biophysical_header]) + row[substrate_biophysical_header]) # * landcover_floral_resources[season][landcover] (float) result['landcover_floral_resources'] = collections.defaultdict(dict) - for raw_landcover_id in landcover_biophysical_table: - landcover_id = int(raw_landcover_id) + for landcover_id, row in landcover_biophysical_df.iterrows(): for season in result['season_list']: floral_rources_header = season_to_header[season]['biophysical'] result['landcover_floral_resources'][season][landcover_id] = ( - landcover_biophysical_table[landcover_id][ - floral_rources_header]) + row[floral_rources_header]) # * species_substrate_index[(species, substrate)] (tuple->float) result['species_substrate_index'] = collections.defaultdict(dict) @@ -1353,7 +1353,7 @@ def _parse_scenario_variables(args): for substrate in result['substrate_list']: substrate_guild_header = substrate_to_header[substrate]['guild'] result['species_substrate_index'][species][substrate] = ( - guild_table[species][substrate_guild_header]) + guild_df[substrate_guild_header][species]) # * foraging_activity_index[(species, season)] (tuple->float) result['foraging_activity_index'] = {} @@ -1362,7 +1362,7 @@ def _parse_scenario_variables(args): key = (species, season) foraging_biophyiscal_header = season_to_header[season]['guild'] result['foraging_activity_index'][key] = ( - guild_table[species][foraging_biophyiscal_header]) + guild_df[foraging_biophyiscal_header][species]) return result diff --git a/src/natcap/invest/recreation/recmodel_client.py b/src/natcap/invest/recreation/recmodel_client.py index 034b24a3fc..ca675f1200 100644 --- a/src/natcap/invest/recreation/recmodel_client.py +++ b/src/natcap/invest/recreation/recmodel_client.py @@ -77,7 +77,7 @@ "point_nearest_distance": { "description": gettext( "Predictor is a point vector. Metric is the Euclidean " - "distance between the center of each AOI grid cell and " + "distance between the centroid of each AOI grid cell and " "the nearest point in this layer.")}, "line_intersect_length": { "description": gettext( @@ -192,6 +192,7 @@ }, "predictor_table_path": { "type": "csv", + "index_col": "id", "columns": predictor_table_columns, "required": "compute_regression", "about": gettext( @@ -202,6 +203,7 @@ }, "scenario_predictor_table_path": { "type": "csv", + "index_col": "id", "columns": predictor_table_columns, "required": False, "about": gettext( @@ -233,7 +235,12 @@ }, "monthly_table.csv": { "about": gettext("Table of monthly photo-user-days."), + "index_col": "poly_id", "columns": { + "poly_id": { + "type": "integer", + "about": gettext("Polygon ID") + }, "[YEAR]-[MONTH]": { "about": gettext( "Total photo-user-days counted in each cell in the " @@ -324,10 +331,10 @@ }, "server_version.pickle": { "about": gettext("Server version info") - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -410,7 +417,7 @@ def execute(args): * 'point_count': count of the points contained in the response polygon * 'point_nearest_distance': distance to the nearest point - from the response polygon + from the centroid of the response polygon * 'line_intersect_length': length of lines that intersect with the response polygon in projected units of AOI * 'polygon_area': area of the polygon contained within @@ -472,7 +479,6 @@ def execute(args): (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix) # Initialize a TaskGraph - taskgraph_db_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -480,7 +486,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(output_dir, 'taskgraph_cache'), n_workers) if args['grid_aoi']: prep_aoi_task = task_graph.add_task( @@ -853,16 +860,14 @@ def _schedule_predictor_data_processing( 'line_intersect_length': _line_intersect_length, } - predictor_table = utils.read_csv_to_dataframe( - predictor_table_path, 'id', expand_path_cols=['path'] - ).to_dict(orient='index') + predictor_df = utils.read_csv_to_dataframe( + predictor_table_path, MODEL_SPEC['args']['predictor_table_path']) predictor_task_list = [] predictor_json_list = [] # tracks predictor files to add to shp - for predictor_id in predictor_table: + for predictor_id, row in predictor_df.iterrows(): LOGGER.info(f"Building predictor {predictor_id}") - - predictor_type = predictor_table[predictor_id]['type'].strip() + predictor_type = row['type'] if predictor_type.startswith('raster'): # type must be one of raster_sum or raster_mean raster_op_mode = predictor_type.split('_')[1] @@ -871,7 +876,7 @@ def _schedule_predictor_data_processing( predictor_json_list.append(predictor_target_path) predictor_task_list.append(task_graph.add_task( func=_raster_sum_mean, - args=(predictor_table[predictor_id]['path'], raster_op_mode, + args=(row['path'], raster_op_mode, response_vector_path, predictor_target_path), target_path_list=[predictor_target_path], task_name=f'predictor {predictor_id}')) @@ -884,8 +889,7 @@ def _schedule_predictor_data_processing( predictor_task_list.append(task_graph.add_task( func=_polygon_area, args=(predictor_type, response_polygons_pickle_path, - predictor_table[predictor_id]['path'], - predictor_target_path), + row['path'], predictor_target_path), target_path_list=[predictor_target_path], dependent_task_list=[prepare_response_polygons_task], task_name=f'predictor {predictor_id}')) @@ -896,8 +900,7 @@ def _schedule_predictor_data_processing( predictor_task_list.append(task_graph.add_task( func=predictor_functions[predictor_type], args=(response_polygons_pickle_path, - predictor_table[predictor_id]['path'], - predictor_target_path), + row['path'], predictor_target_path), target_path_list=[predictor_target_path], dependent_task_list=[prepare_response_polygons_task], task_name=f'predictor {predictor_id}')) @@ -1167,7 +1170,7 @@ def _line_intersect_length( def _point_nearest_distance( response_polygons_pickle_path, point_vector_path, predictor_target_path): - """Calculate distance to nearest point for all polygons. + """Calculate distance to nearest point for the centroid of all polygons. Args: response_polygons_pickle_path (str): path to a pickled dictionary which @@ -1197,7 +1200,7 @@ def _point_nearest_distance( f"{(100*index)/len(response_polygons_lookup):.2f}% complete")) point_distance_lookup[str(feature_id)] = min([ - geometry.distance(point) for point in points]) + geometry.centroid.distance(point) for point in points]) LOGGER.info(f"{os.path.basename(point_vector_path)} point distance: " "100.00% complete") with open(predictor_target_path, 'w') as jsonfile: @@ -1546,10 +1549,10 @@ def _validate_same_id_lengths(table_path): tables. """ - predictor_table = utils.read_csv_to_dataframe( - table_path, 'id').to_dict(orient='index') + predictor_df = utils.read_csv_to_dataframe( + table_path, MODEL_SPEC['args']['predictor_table_path']) too_long = set() - for p_id in predictor_table: + for p_id in predictor_df.index: if len(p_id) > 10: too_long.add(p_id) if len(too_long) > 0: @@ -1580,21 +1583,21 @@ def _validate_same_ids_and_types( tables. """ - predictor_table = utils.read_csv_to_dataframe( - predictor_table_path, 'id').to_dict(orient='index') - - scenario_predictor_table = utils.read_csv_to_dataframe( - scenario_predictor_table_path, 'id').to_dict(orient='index') - - predictor_table_pairs = set([ - (p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table]) - scenario_predictor_table_pairs = set([ - (p_id, scenario_predictor_table[p_id]['type'].strip()) for p_id in - scenario_predictor_table]) - if predictor_table_pairs != scenario_predictor_table_pairs: + predictor_df = utils.read_csv_to_dataframe( + predictor_table_path, MODEL_SPEC['args']['predictor_table_path']) + + scenario_predictor_df = utils.read_csv_to_dataframe( + scenario_predictor_table_path, + MODEL_SPEC['args']['scenario_predictor_table_path']) + + predictor_pairs = set([ + (p_id, row['type']) for p_id, row in predictor_df.iterrows()]) + scenario_predictor_pairs = set([ + (p_id, row['type']) for p_id, row in scenario_predictor_df.iterrows()]) + if predictor_pairs != scenario_predictor_pairs: raise ValueError('table pairs unequal.\n\t' - f'predictor: {predictor_table_pairs}\n\t' - f'scenario:{scenario_predictor_table_pairs}') + f'predictor: {predictor_pairs}\n\t' + f'scenario:{scenario_predictor_pairs}') LOGGER.info('tables validate correctly') @@ -1617,8 +1620,8 @@ def _validate_same_projection(base_vector_path, table_path): # This will load the table as a list of paths which we can iterate through # without bothering the rest of the table structure data_paths = utils.read_csv_to_dataframe( - table_path, convert_vals_to_lower=False, expand_path_cols=['path'] - ).squeeze('columns')['path'].tolist() + table_path, MODEL_SPEC['args']['predictor_table_path'] + )['path'].tolist() base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR) base_layer = base_vector.GetLayer() @@ -1674,14 +1677,14 @@ def _validate_predictor_types(table_path): ValueError if any value in the ``type`` column does not match a valid type, ignoring leading/trailing whitespace. """ - df = utils.read_csv_to_dataframe(table_path, convert_vals_to_lower=False) + df = utils.read_csv_to_dataframe( + table_path, MODEL_SPEC['args']['predictor_table_path']) # ignore leading/trailing whitespace because it will be removed # when the type values are used - type_list = set([type.strip() for type in df['type']]) valid_types = set({'raster_mean', 'raster_sum', 'point_count', 'point_nearest_distance', 'line_intersect_length', 'polygon_area_coverage', 'polygon_percent_coverage'}) - difference = type_list.difference(valid_types) + difference = set(df['type']).difference(valid_types) if difference: raise ValueError('The table contains invalid type value(s): ' f'{difference}. The allowed types are: {valid_types}') diff --git a/src/natcap/invest/routedem.py b/src/natcap/invest/routedem.py index f9ad493847..5a019d0bde 100644 --- a/src/natcap/invest/routedem.py +++ b/src/natcap/invest/routedem.py @@ -107,7 +107,7 @@ }, }, "outputs": { - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR, "filled.tif": spec_utils.FILLED_DEM, "flow_accumulation.tif": spec_utils.FLOW_ACCUMULATION, "flow_direction.tif": spec_utils.FLOW_DIRECTION, @@ -341,8 +341,7 @@ def execute(args): ``None`` """ file_suffix = utils.make_suffix_string(args, 'results_suffix') - task_cache_dir = os.path.join(args['workspace_dir'], '_taskgraph_working_dir') - utils.make_directories([args['workspace_dir'], task_cache_dir]) + utils.make_directories([args['workspace_dir']]) if ('calculate_flow_direction' in args and bool(args['calculate_flow_direction'])): @@ -373,7 +372,8 @@ def execute(args): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. - graph = taskgraph.TaskGraph(task_cache_dir, n_workers=n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers=n_workers) # Calculate slope. This is intentionally on the original DEM, not # on the pitfilled DEM. If the user really wants the slop of the filled diff --git a/src/natcap/invest/scenario_gen_proximity.py b/src/natcap/invest/scenario_gen_proximity.py index f272c6dc79..62c4acd8b0 100644 --- a/src/natcap/invest/scenario_gen_proximity.py +++ b/src/natcap/invest/scenario_gen_proximity.py @@ -121,6 +121,7 @@ "nearest_to_edge.csv": { "about": gettext( "Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."), + "index_col": "lucode", "columns": { "lucode": { "type": "integer", @@ -140,6 +141,7 @@ "farthest_from_edge.csv": { "about": gettext( "Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."), + "index_col": "lucode", "columns": { "lucode": { "type": "integer", @@ -175,10 +177,10 @@ "Map of the distance from each pixel to the nearest " "edge of the focal landcover."), "bands": {1: {"type": "number", "units": u.pixel}} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -251,8 +253,6 @@ def execute(args): utils.make_directories( [output_dir, intermediate_output_dir, tmp_dir]) - work_token_dir = os.path.join( - intermediate_output_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -260,7 +260,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. - task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) area_to_convert = float(args['area_to_convert']) replacement_lucode = int(args['replacement_lucode']) diff --git a/src/natcap/invest/scenic_quality/scenic_quality.py b/src/natcap/invest/scenic_quality/scenic_quality.py index 3cf41f918e..9ff1679c19 100644 --- a/src/natcap/invest/scenic_quality/scenic_quality.py +++ b/src/natcap/invest/scenic_quality/scenic_quality.py @@ -209,10 +209,10 @@ "visibility_[FEATURE_ID].tif": { "about": gettext("Map of visibility for a given structure's viewpoint. This raster has pixel values of 0 (not visible), 1 (visible), or nodata (where the DEM is nodata)."), "bands": {1: {"type": "integer"}} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -290,7 +290,6 @@ def execute(args): (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix) - work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -298,7 +297,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous execution - graph = taskgraph.TaskGraph(work_token_dir, n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) reprojected_aoi_task = graph.add_task( pygeoprocessing.reproject_vector, @@ -549,10 +549,10 @@ def _determine_valid_viewpoints(dem_path, structures_path): # Coordinates in map units to pass to viewshed algorithm geometry = point.GetGeometryRef() - if geometry.GetGeometryType() != ogr.wkbPoint: + if geometry.GetGeometryName() != 'POINT': raise AssertionError( - f"Feature {point.GetFID()} is not a Point geometry. " - "Features must be a Point.") + f"Feature {point.GetFID()} must be a POINT geometry, " + f"not {geometry.GetGeometryName()}") viewpoint = (geometry.GetX(), geometry.GetY()) diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py index a0b01d2223..f3caed19b0 100644 --- a/src/natcap/invest/sdr/sdr.py +++ b/src/natcap/invest/sdr/sdr.py @@ -87,6 +87,7 @@ }, "biophysical_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "usle_c": { @@ -351,57 +352,52 @@ "times the thresholded slope (in eq. (74))"), "bands": {1: {"type": "ratio"}} }, - "churn_dir_not_for_humans": { - "type": "directory", - "contents": { - "aligned_dem.tif": { - "about": gettext( - "Copy of the input DEM, clipped to the extent " - "of the other raster inputs."), - "bands": {1: { - "type": "number", - "units": u.meter - }} - }, - "aligned_drainage.tif": { - "about": gettext( - "Copy of the input drainage map, clipped to " - "the extent of the other raster inputs and " - "aligned to the DEM."), - "bands": {1: {"type": "integer"}}, - }, - "aligned_erodibility.tif": { - "about": gettext( - "Copy of the input erodibility map, clipped to " - "the extent of the other raster inputs and " - "aligned to the DEM."), - "bands": {1: { - "type": "number", - "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter) - }} - }, - "aligned_erosivity.tif": { - "about": gettext( - "Copy of the input erosivity map, clipped to " - "the extent of the other raster inputs and " - "aligned to the DEM."), - "bands": {1: { - "type": "number", - "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year) - }} - }, - "aligned_lulc.tif": { - "about": gettext( - "Copy of the input drainage map, clipped to " - "the extent of the other raster inputs and " - "aligned to the DEM."), - "bands": {1: {"type": "integer"}}, - }, - "taskgraph.db": {} - } + "aligned_dem.tif": { + "about": gettext( + "Copy of the input DEM, clipped to the extent " + "of the other raster inputs."), + "bands": {1: { + "type": "number", + "units": u.meter + }} + }, + "aligned_drainage.tif": { + "about": gettext( + "Copy of the input drainage map, clipped to " + "the extent of the other raster inputs and " + "aligned to the DEM."), + "bands": {1: {"type": "integer"}}, + }, + "aligned_erodibility.tif": { + "about": gettext( + "Copy of the input erodibility map, clipped to " + "the extent of the other raster inputs and " + "aligned to the DEM."), + "bands": {1: { + "type": "number", + "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter) + }} + }, + "aligned_erosivity.tif": { + "about": gettext( + "Copy of the input erosivity map, clipped to " + "the extent of the other raster inputs and " + "aligned to the DEM."), + "bands": {1: { + "type": "number", + "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year) + }} + }, + "aligned_lulc.tif": { + "about": gettext( + "Copy of the input drainage map, clipped to " + "the extent of the other raster inputs and " + "aligned to the DEM."), + "bands": {1: {"type": "integer"}}, } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -420,6 +416,11 @@ INTERMEDIATE_DIR_NAME = 'intermediate_outputs' _INTERMEDIATE_BASE_FILES = { + 'aligned_dem_path': 'aligned_dem.tif', + 'aligned_drainage_path': 'aligned_drainage.tif', + 'aligned_erodibility_path': 'aligned_erodibility.tif', + 'aligned_erosivity_path': 'aligned_erosivity.tif', + 'aligned_lulc_path': 'aligned_lulc.tif', 'cp_factor_path': 'cp.tif', 'd_dn_path': 'd_dn.tif', 'd_up_path': 'd_up.tif', @@ -440,17 +441,9 @@ 'w_path': 'w.tif', 'ws_inverse_path': 'ws_inverse.tif', 'e_prime_path': 'e_prime.tif', - 'weighted_avg_aspect_path': 'weighted_avg_aspect.tif', 'drainage_mask': 'what_drains_to_stream.tif', } -_TMP_BASE_FILES = { - 'aligned_dem_path': 'aligned_dem.tif', - 'aligned_drainage_path': 'aligned_drainage.tif', - 'aligned_erodibility_path': 'aligned_erodibility.tif', - 'aligned_erosivity_path': 'aligned_erosivity.tif', - 'aligned_lulc_path': 'aligned_lulc.tif', -} # Target nodata is for general rasters that are positive, and _IC_NODATA are # for rasters that are any range @@ -501,42 +494,27 @@ def execute(args): """ file_suffix = utils.make_suffix_string(args, 'results_suffix') - biophysical_table = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']) # Test to see if c or p values are outside of 0..1 - for table_key in ['usle_c', 'usle_p']: - for (lulc_code, table) in biophysical_table.items(): - try: - float(lulc_code) - except ValueError: - raise ValueError( - f'Value "{lulc_code}" from the "lucode" column of the ' - f'biophysical table is not a number. Please check the ' - f'formatting of {args["biophysical_table_path"]}') - try: - float_value = float(table[table_key]) - if float_value < 0 or float_value > 1: - raise ValueError( - f'{float_value} is not within range 0..1') - except ValueError: + for key in ['usle_c', 'usle_p']: + for lulc_code, row in biophysical_df.iterrows(): + if row[key] < 0 or row[key] > 1: raise ValueError( f'A value in the biophysical table is not a number ' f'within range 0..1. The offending value is in ' - f'column "{table_key}", lucode row "{lulc_code}", ' - f'and has value "{table[table_key]}"') + f'column "{key}", lucode row "{lulc_code}", ' + f'and has value "{row[key]}"') intermediate_output_dir = os.path.join( args['workspace_dir'], INTERMEDIATE_DIR_NAME) output_dir = os.path.join(args['workspace_dir']) - churn_dir = os.path.join( - intermediate_output_dir, 'churn_dir_not_for_humans') - utils.make_directories([output_dir, intermediate_output_dir, churn_dir]) + utils.make_directories([output_dir, intermediate_output_dir]) f_reg = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), - (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), - (_TMP_BASE_FILES, churn_dir)], file_suffix) + (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix) try: n_workers = int(args['n_workers']) @@ -546,7 +524,8 @@ def execute(args): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( - churn_dir, n_workers, reporting_interval=5.0) + os.path.join(output_dir, 'taskgraph_cache'), + n_workers, reporting_interval=5.0) base_list = [] aligned_list = [] @@ -617,14 +596,6 @@ def execute(args): dependent_task_list=[pit_fill_task], task_name='flow direction calculation') - weighted_avg_aspect_task = task_graph.add_task( - func=sdr_core.calculate_average_aspect, - args=(f_reg['flow_direction_path'], - f_reg['weighted_avg_aspect_path']), - target_path_list=[f_reg['weighted_avg_aspect_path']], - dependent_task_list=[flow_dir_task], - task_name='weighted average of multiple-flow aspects') - flow_accumulation_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=( @@ -639,13 +610,11 @@ def execute(args): args=( f_reg['flow_accumulation_path'], f_reg['slope_path'], - f_reg['weighted_avg_aspect_path'], float(args['l_max']), f_reg['ls_path']), target_path_list=[f_reg['ls_path']], dependent_task_list=[ - flow_accumulation_task, slope_task, - weighted_avg_aspect_task], + flow_accumulation_task, slope_task], task_name='ls factor calculation') stream_task = task_graph.add_task( @@ -675,19 +644,21 @@ def execute(args): drainage_raster_path_task = ( f_reg['stream_path'], stream_task) + lulc_to_c = biophysical_df['usle_c'].to_dict() threshold_w_task = task_graph.add_task( func=_calculate_w, args=( - biophysical_table, f_reg['aligned_lulc_path'], f_reg['w_path'], + lulc_to_c, f_reg['aligned_lulc_path'], f_reg['w_path'], f_reg['thresholded_w_path']), target_path_list=[f_reg['w_path'], f_reg['thresholded_w_path']], dependent_task_list=[align_task], task_name='calculate W') + lulc_to_cp = (biophysical_df['usle_c'] * biophysical_df['usle_p']).to_dict() cp_task = task_graph.add_task( func=_calculate_cp, args=( - biophysical_table, f_reg['aligned_lulc_path'], + lulc_to_cp, f_reg['aligned_lulc_path'], f_reg['cp_factor_path']), target_path_list=[f_reg['cp_factor_path']], dependent_task_list=[align_task], @@ -1029,26 +1000,61 @@ def _what_drains_to_stream(flow_dir_mfd, dist_to_channel): def _calculate_ls_factor( - flow_accumulation_path, slope_path, avg_aspect_path, l_max, - target_ls_prime_factor_path): + flow_accumulation_path, slope_path, l_max, + target_ls_factor_path): """Calculate LS factor. - Calculates a modified LS factor as Equation 3 from "Extension and + Calculates the LS factor using Equation 3 from "Extension and validation of a geographic information system-based method for calculating the Revised Universal Soil Loss Equation length-slope factor for erosion - risk assessments in large watersheds" where the ``x`` term is the average - aspect ratio weighted by proportional flow to account for multiple flow - direction. + risk assessments in large watersheds". + + The equation for this is:: + + (upstream_area + pixel_area)^(m+1) - upstream_area^(m+1) + LS = S * -------------------------------------------------------- + (pixel_area^(m+2)) * aspect_dir * 22.13^(m) + + Where + + * ``S`` is the slope factor defined in equation 4 from the same paper, + calculated by the following where ``b`` is the slope in radians: + + * ``S = 10.8 * sin(b) + 0.03`` where slope < 9% + * ``S = 16.8 * sin(b) - 0.50`` where slope >= 9% + + * ``upstream_area`` is interpreted as the square root of the + catchment area, to match SAGA-GIS's method for calculating LS + Factor. + * ``pixel_area`` is the area of the pixel in square meters. + * ``m`` is the slope-length exponent of the RUSLE LS-factor, + which, as discussed in Oliveira et al. 2013 is a function of the + on-pixel slope theta: + + * ``m = 0.2`` when ``theta <= 1%`` + * ``m = 0.3`` when ``1% < theta <= 3.5%`` + * ``m = 0.4`` when ``3.5% < theta <= 5%`` + * ``m = 0.5`` when ``5% < theta <= 9%`` + * ``m = (beta / (1+beta)`` when ``theta > 9%``, where + ``beta = (sin(theta) / 0.0896) / (3*sin(theta)^0.8 + 0.56)`` + + * ``aspect_dir`` is calculated by ``|sin(alpha)| + |cos(alpha)|`` + for the given pixel. + + Oliveira et al can be found at: + + Oliveira, A.H., Silva, M.A. da, Silva, M.L.N., Curi, N., Neto, G.K., + Freitas, D.A.F. de, 2013. Development of Topographic Factor Modeling + for Application in Soil Erosion Models, in: Intechopen (Ed.), Soil + Processes and Current Trends in Quality Assessment. p. 28. Args: flow_accumulation_path (string): path to raster, pixel values are the contributing upslope area at that cell. Pixel size is square. slope_path (string): path to slope raster as a percent - avg_aspect_path (string): The path to to raster of the weighted average - of aspects based on proportional flow. l_max (float): if the calculated value of L exceeds this value it is clamped to this value. - target_ls_prime_factor_path (string): path to output ls_prime_factor + target_ls_factor_path (string): path to output ls_prime_factor raster Returns: @@ -1056,8 +1062,6 @@ def _calculate_ls_factor( """ slope_nodata = pygeoprocessing.get_raster_info(slope_path)['nodata'][0] - avg_aspect_nodata = pygeoprocessing.get_raster_info( - avg_aspect_path)['nodata'][0] flow_accumulation_info = pygeoprocessing.get_raster_info( flow_accumulation_path) @@ -1065,14 +1069,12 @@ def _calculate_ls_factor( cell_size = abs(flow_accumulation_info['pixel_size'][0]) cell_area = cell_size ** 2 - def ls_factor_function( - percent_slope, flow_accumulation, avg_aspect, l_max): - """Calculate the LS' factor. + def ls_factor_function(percent_slope, flow_accumulation, l_max): + """Calculate the LS factor. Args: percent_slope (numpy.ndarray): slope in percent flow_accumulation (numpy.ndarray): upslope pixels - avg_aspect (numpy.ndarray): the weighted average aspect from MFD l_max (float): max L factor, clamp to this value if L exceeds it Returns: @@ -1082,16 +1084,27 @@ def ls_factor_function( # avg aspect intermediate output should always have a defined # nodata value from pygeoprocessing valid_mask = ( - (~utils.array_equals_nodata(avg_aspect, avg_aspect_nodata)) & ~utils.array_equals_nodata(percent_slope, slope_nodata) & ~utils.array_equals_nodata( flow_accumulation, flow_accumulation_nodata)) result = numpy.empty(valid_mask.shape, dtype=numpy.float32) result[:] = _TARGET_NODATA - contributing_area = (flow_accumulation[valid_mask]-1) * cell_area + # Although Desmet & Govers (1996) discusses "upstream contributing + # area", this is not strictly defined. We decided to use the square + # root of the upstream contributing area here as an estimate, which + # matches the SAGA LS Factor option "square root of catchment area". + # See the InVEST ADR-0001 for more information. + # We subtract 1 from the flow accumulation because FA includes itself + # in its count of pixels upstream and our LS factor equation wants only + # those pixels that are strictly upstream. + contributing_area = numpy.sqrt( + (flow_accumulation[valid_mask]-1) * cell_area) slope_in_radians = numpy.arctan(percent_slope[valid_mask] / 100.0) + aspect_length = (numpy.fabs(numpy.sin(slope_in_radians)) + + numpy.fabs(numpy.cos(slope_in_radians))) + # From Equation 4 in "Extension and validation of a geographic # information system ..." slope_factor = numpy.where( @@ -1121,7 +1134,7 @@ def ls_factor_function( l_factor = ( ((contributing_area + cell_area)**(m_exp+1) - contributing_area ** (m_exp+1)) / - ((cell_size ** (m_exp + 2)) * (avg_aspect[valid_mask]**m_exp) * + ((cell_size ** (m_exp + 2)) * (aspect_length**m_exp) * (22.13**m_exp))) # threshold L factor to l_max @@ -1130,12 +1143,10 @@ def ls_factor_function( result[valid_mask] = l_factor * slope_factor return result - # call vectorize datasets to calculate the ls_factor pygeoprocessing.raster_calculator( - [(path, 1) for path in [ - slope_path, flow_accumulation_path, avg_aspect_path]] + [ + [(path, 1) for path in [slope_path, flow_accumulation_path]] + [ (l_max, 'raw')], - ls_factor_function, target_ls_prime_factor_path, gdal.GDT_Float32, + ls_factor_function, target_ls_factor_path, gdal.GDT_Float32, _TARGET_NODATA) @@ -1277,15 +1288,14 @@ def add_drainage_op(stream, drainage): def _calculate_w( - biophysical_table, lulc_path, w_factor_path, + lulc_to_c, lulc_path, w_factor_path, out_thresholded_w_factor_path): """W factor: map C values from LULC and lower threshold to 0.001. W is a factor in calculating d_up accumulation for SDR. Args: - biophysical_table (dict): map of LULC codes to dictionaries that - contain at least a 'usle_c' field + lulc_to_c (dict): mapping of LULC codes to C values lulc_path (string): path to LULC raster w_factor_path (string): path to outputed raw W factor out_thresholded_w_factor_path (string): W factor from `w_factor_path` @@ -1295,9 +1305,6 @@ def _calculate_w( None """ - lulc_to_c = dict( - [(lulc_code, float(table['usle_c'])) for - (lulc_code, table) in biophysical_table.items()]) if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None: # will get a case where the raster might be masked but nothing to # replace so 0 is used by default. Ensure this exists in lookup. @@ -1326,13 +1333,11 @@ def threshold_w(w_val): gdal.GDT_Float32, _TARGET_NODATA) -def _calculate_cp(biophysical_table, lulc_path, cp_factor_path): +def _calculate_cp(lulc_to_cp, lulc_path, cp_factor_path): """Map LULC to C*P value. Args: - biophysical_table (dict): map of lulc codes to dictionaries that - contain at least the entry 'usle_c" and 'usle_p' corresponding to - those USLE components. + lulc_to_cp (dict): mapping of lulc codes to CP values lulc_path (string): path to LULC raster cp_factor_path (string): path to output raster of LULC mapped to C*P values @@ -1341,9 +1346,6 @@ def _calculate_cp(biophysical_table, lulc_path, cp_factor_path): None """ - lulc_to_cp = dict( - [(lulc_code, float(table['usle_c']) * float(table['usle_p'])) for - (lulc_code, table) in biophysical_table.items()]) if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None: # will get a case where the raster might be masked but nothing to # replace so 0 is used by default. Ensure this exists in lookup. diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index 2c8440097c..ada579f54a 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -675,127 +675,3 @@ def calculate_sediment_deposition( LOGGER.info('Sediment deposition 100% complete') sediment_deposition_raster.close() - - -def calculate_average_aspect( - mfd_flow_direction_path, target_average_aspect_path): - """Calculate the Weighted Average Aspect Ratio from MFD. - - Calculates the average aspect ratio weighted by proportional flow - direction. - - Args: - mfd_flow_direction_path (string): The path to an MFD flow direction - raster. - target_average_aspect_path (string): The path to where the calculated - weighted average aspect raster should be written. - - Returns: - ``None``. - - """ - LOGGER.info('Calculating average aspect') - - cdef float average_aspect_nodata = -1 - pygeoprocessing.new_raster_from_base( - mfd_flow_direction_path, target_average_aspect_path, - gdal.GDT_Float32, [average_aspect_nodata], [average_aspect_nodata]) - - flow_direction_info = pygeoprocessing.get_raster_info( - mfd_flow_direction_path) - cdef int mfd_flow_direction_nodata = flow_direction_info['nodata'][0] - cdef int n_cols, n_rows - n_cols, n_rows = flow_direction_info['raster_size'] - - cdef _ManagedRaster mfd_flow_direction_raster = _ManagedRaster( - mfd_flow_direction_path, 1, False) - - cdef _ManagedRaster average_aspect_raster = _ManagedRaster( - target_average_aspect_path, 1, True) - - cdef int seed_row = 0 - cdef int seed_col = 0 - cdef int n_pixels_visited = 0 - cdef int win_xsize, win_ysize, xoff, yoff - cdef int row_index, col_index, neighbor_index - cdef int flow_weight_in_direction - cdef int weight_sum - cdef int seed_flow_value - cdef float aspect_weighted_average, aspect_weighted_sum - - # the flow_lengths array is the functional equivalent - # of calculating |sin(alpha)| + |cos(alpha)|. - cdef float* flow_lengths = [ - 1, SQRT2, - 1, SQRT2, - 1, SQRT2, - 1, SQRT2 - ] - - # Loop over iterblocks to maintain cache locality - # Find each non-nodata pixel and calculate proportional flow - # Multiply proportional flow times the flow length x_d - # write the final value to the raster. - for offset_dict in pygeoprocessing.iterblocks( - (mfd_flow_direction_path, 1), offset_only=True, largest_block=0): - win_xsize = offset_dict['win_xsize'] - win_ysize = offset_dict['win_ysize'] - xoff = offset_dict['xoff'] - yoff = offset_dict['yoff'] - - LOGGER.info('Average aspect %.2f%% complete', 100 * ( - n_pixels_visited / float(n_cols * n_rows))) - - for row_index in range(win_ysize): - seed_row = yoff + row_index - for col_index in range(win_xsize): - seed_col = xoff + col_index - seed_flow_value = mfd_flow_direction_raster.get( - seed_col, seed_row) - - # Skip this seed if it's nodata (Currently expected to be 0). - # No need to set the nodata value here since we have already - # filled the raster with nodata values at creation time. - if seed_flow_value == mfd_flow_direction_nodata: - continue - - weight_sum = 0 - aspect_weighted_sum = 0 - for neighbor_index in range(8): - neighbor_row = seed_row + ROW_OFFSETS[neighbor_index] - if neighbor_row == -1 or neighbor_row == n_rows: - continue - - neighbor_col = seed_col + COL_OFFSETS[neighbor_index] - if neighbor_col == -1 or neighbor_col == n_cols: - continue - - flow_weight_in_direction = (seed_flow_value >> ( - neighbor_index * 4) & 0xF) - weight_sum += flow_weight_in_direction - - aspect_weighted_sum += ( - flow_lengths[neighbor_index] * - flow_weight_in_direction) - - # Weight sum should never be less than 0. - # Since it's an int, we can compare it directly against the - # value of 0. - if weight_sum == 0: - aspect_weighted_average = average_aspect_nodata - else: - # We already know that weight_sum will be > 0 because we - # check for it in the condition above. - with cython.cdivision(True): - aspect_weighted_average = ( - aspect_weighted_sum / weight_sum) - - average_aspect_raster.set( - seed_col, seed_row, aspect_weighted_average) - - n_pixels_visited += win_xsize * win_ysize - - LOGGER.info('Average aspect 100.00% complete') - - mfd_flow_direction_raster.close() - average_aspect_raster.close() diff --git a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py index d8c2bbf028..e4b9a0342c 100644 --- a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py +++ b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py @@ -107,6 +107,7 @@ }, "biophysical_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "cn_[SOIL_GROUP]": { @@ -137,6 +138,7 @@ }, "rain_events_table_path": { "type": "csv", + "index_col": "month", "columns": { "month": { "type": "number", @@ -212,6 +214,7 @@ }, "climate_zone_table_path": { "type": "csv", + "index_col": "cz_id", "columns": { "cz_id": { "type": "integer", @@ -253,6 +256,7 @@ }, "monthly_alpha_path": { "type": "csv", + "index_col": "month", "columns": { "month": { "type": "number", @@ -409,10 +413,62 @@ "bands": {1: { "type": "integer" }} + }, + 'Si.tif': { + "about": gettext("Map of the S_i factor derived from CN"), + "bands": {1: {"type": "number", "units": u.inch}} + }, + 'lulc_aligned.tif': { + "about": gettext("Copy of LULC input, aligned and clipped " + "to match the other spatial inputs"), + "bands": {1: {"type": "integer"}} + }, + 'dem_aligned.tif': { + "about": gettext("Copy of DEM input, aligned and clipped " + "to match the other spatial inputs"), + "bands": {1: {"type": "number", "units": u.meter}} + }, + 'pit_filled_dem.tif': { + "about": gettext("Pit filled DEM"), + "bands": {1: {"type": "number", "units": u.meter}} + }, + 'soil_group_aligned.tif': { + "about": gettext("Copy of soil groups input, aligned and " + "clipped to match the other spatial inputs"), + "bands": {1: {"type": "integer"}} + }, + 'flow_accum.tif': spec_utils.FLOW_ACCUMULATION, + 'prcp_a[MONTH].tif': { + "bands": {1: {"type": "number", "units": u.millimeter/u.year}}, + "about": gettext("Monthly precipitation rasters, aligned and " + "clipped to match the other spatial inputs") + }, + 'n_events[MONTH].tif': { + "about": gettext("Map of monthly rain events"), + "bands": {1: {"type": "integer"}} + }, + 'et0_a[MONTH].tif': { + "bands": {1: {"type": "number", "units": u.millimeter}}, + "about": gettext("Monthly ET0 rasters, aligned and " + "clipped to match the other spatial inputs") + }, + 'kc_[MONTH].tif': { + "about": gettext("Map of monthly KC values"), + "bands": {1: {"type": "number", "units": u.none}} + }, + 'l_aligned.tif': { + "about": gettext("Copy of user-defined local recharge input, " + "aligned and clipped to match the other spatial inputs"), + "bands": {1: {"type": "number", "units": u.millimeter}} + }, + 'cz_aligned.tif': { + "about": gettext("Copy of user-defined climate zones raster, " + "aligned and clipped to match the other spatial inputs"), + "bands": {1: {"type": "integer"}} } } }, - "cache_dir": spec_utils.TASKGRAPH_DIR + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -437,18 +493,10 @@ 'flow_dir_mfd_path': 'flow_dir_mfd.tif', 'qfm_path_list': ['qf_%d.tif' % (x+1) for x in range(N_MONTHS)], 'stream_path': 'stream.tif', -} - -_TMP_BASE_FILES = { - 'outflow_direction_path': 'outflow_direction.tif', - 'outflow_weights_path': 'outflow_weights.tif', - 'kc_path': 'kc.tif', 'si_path': 'Si.tif', 'lulc_aligned_path': 'lulc_aligned.tif', 'dem_aligned_path': 'dem_aligned.tif', 'dem_pit_filled_path': 'pit_filled_dem.tif', - 'loss_path': 'loss.tif', - 'zero_absorption_source_path': 'zero_absorption.tif', 'soil_group_aligned_path': 'soil_group_aligned.tif', 'flow_accum_path': 'flow_accum.tif', 'precip_path_aligned_list': ['prcp_a%d.tif' % x for x in range(N_MONTHS)], @@ -457,7 +505,6 @@ 'kc_path_list': ['kc_%d.tif' % x for x in range(N_MONTHS)], 'l_aligned_path': 'l_aligned.tif', 'cz_aligned_raster_path': 'cz_aligned.tif', - 'l_sum_pre_clamp': 'l_sum_pre_clamp.tif' } @@ -561,41 +608,20 @@ def _execute(args): # fail early on a missing required rain events table if (not args['user_defined_local_recharge'] and not args['user_defined_climate_zones']): - rain_events_lookup = ( - utils.read_csv_to_dataframe( - args['rain_events_table_path'], 'month' - ).to_dict(orient='index')) - - biophysical_table = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') - - bad_value_list = [] - for lucode, value in biophysical_table.items(): - for biophysical_id in ['cn_a', 'cn_b', 'cn_c', 'cn_d'] + [ - 'kc_%d' % (month_index+1) for month_index in range(N_MONTHS)]: - try: - _ = float(value[biophysical_id]) - except ValueError: - bad_value_list.append( - (biophysical_id, lucode, value[biophysical_id])) + rain_events_df = utils.read_csv_to_dataframe( + args['rain_events_table_path'], + MODEL_SPEC['args']['rain_events_table_path']) - if bad_value_list: - raise ValueError( - 'biophysical_table at %s seems to have the following incorrect ' - 'values (expecting all floating point numbers): %s' % ( - args['biophysical_table_path'], ','.join( - ['%s(lucode %d): "%s"' % ( - lucode, biophysical_id, bad_value) - for lucode, biophysical_id, bad_value in - bad_value_list]))) + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], + MODEL_SPEC['args']['biophysical_table_path']) if args['monthly_alpha']: # parse out the alpha lookup table of the form (month_id: alpha_val) - alpha_month_map = dict( - (key, val['alpha']) for key, val in - utils.read_csv_to_dataframe( - args['monthly_alpha_path'], 'month' - ).to_dict(orient='index').items()) + alpha_month_map = utils.read_csv_to_dataframe( + args['monthly_alpha_path'], + MODEL_SPEC['args']['monthly_alpha_path'] + )['alpha'].to_dict() else: # make all 12 entries equal to args['alpha_m'] alpha_m = float(fractions.Fraction(args['alpha_m'])) @@ -610,9 +636,8 @@ def _execute(args): file_suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_output_dir = os.path.join( args['workspace_dir'], 'intermediate_outputs') - cache_dir = os.path.join(args['workspace_dir'], 'cache_dir') output_dir = args['workspace_dir'] - utils.make_directories([intermediate_output_dir, cache_dir, output_dir]) + utils.make_directories([intermediate_output_dir, output_dir]) try: n_workers = int(args['n_workers']) @@ -622,13 +647,13 @@ def _execute(args): # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( - cache_dir, n_workers, reporting_interval=5.0) + os.path.join(args['workspace_dir'], 'taskgraph_cache'), + n_workers, reporting_interval=5) LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), - (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), - (_TMP_BASE_FILES, cache_dir)], file_suffix) + (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix) LOGGER.info('Checking that the AOI is not the output aggregate vector') if (os.path.normpath(args['aoi_path']) == @@ -706,7 +731,7 @@ def _execute(args): args=( (file_registry['dem_aligned_path'], 1), file_registry['dem_pit_filled_path']), - kwargs={'working_dir': cache_dir}, + kwargs={'working_dir': intermediate_output_dir}, target_path_list=[file_registry['dem_pit_filled_path']], dependent_task_list=[align_task], task_name='fill dem pits') @@ -716,7 +741,7 @@ def _execute(args): args=( (file_registry['dem_pit_filled_path'], 1), file_registry['flow_dir_mfd_path']), - kwargs={'working_dir': cache_dir}, + kwargs={'working_dir': intermediate_output_dir}, target_path_list=[file_registry['flow_dir_mfd_path']], dependent_task_list=[fill_pit_task], task_name='flow dir mfd') @@ -762,22 +787,18 @@ def _execute(args): 'table_name': 'Climate Zone'} for month_id in range(N_MONTHS): if args['user_defined_climate_zones']: - cz_rain_events_lookup = ( - utils.read_csv_to_dataframe( - args['climate_zone_table_path'], 'cz_id' - ).to_dict(orient='index')) - month_label = MONTH_ID_TO_LABEL[month_id] - climate_zone_rain_events_month = dict([ - (cz_id, cz_rain_events_lookup[cz_id][month_label]) for - cz_id in cz_rain_events_lookup]) - n_events_nodata = -1 + cz_rain_events_df = utils.read_csv_to_dataframe( + args['climate_zone_table_path'], + MODEL_SPEC['args']['climate_zone_table_path']) + climate_zone_rain_events_month = ( + cz_rain_events_df[MONTH_ID_TO_LABEL[month_id]].to_dict()) n_events_task = task_graph.add_task( func=utils.reclassify_raster, args=( (file_registry['cz_aligned_raster_path'], 1), climate_zone_rain_events_month, file_registry['n_events_path_list'][month_id], - gdal.GDT_Float32, n_events_nodata, + gdal.GDT_Float32, TARGET_NODATA, reclass_error_details), target_path_list=[ file_registry['n_events_path_list'][month_id]], @@ -785,15 +806,14 @@ def _execute(args): task_name='n_events for month %d' % month_id) reclassify_n_events_task_list.append(n_events_task) else: - # rain_events_lookup defined near entry point of execute - n_events = rain_events_lookup[month_id+1]['events'] n_events_task = task_graph.add_task( func=pygeoprocessing.new_raster_from_base, args=( file_registry['dem_aligned_path'], file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, [TARGET_NODATA]), - kwargs={'fill_value_list': (n_events,)}, + kwargs={'fill_value_list': ( + rain_events_df['events'][month_id+1],)}, target_path_list=[ file_registry['n_events_path_list'][month_id]], dependent_task_list=[align_task], @@ -806,7 +826,8 @@ def _execute(args): args=( file_registry['lulc_aligned_path'], file_registry['soil_group_aligned_path'], - biophysical_table, file_registry['cn_path']), + biophysical_df, + file_registry['cn_path']), target_path_list=[file_registry['cn_path']], dependent_task_list=[align_task], task_name='calculate curve number') @@ -827,8 +848,6 @@ def _execute(args): func=_calculate_monthly_quick_flow, args=( file_registry['precip_path_aligned_list'][month_index], - file_registry['lulc_aligned_path'], - file_registry['cn_path'], file_registry['n_events_path_list'][month_index], file_registry['stream_path'], file_registry['si_path'], @@ -855,16 +874,13 @@ def _execute(args): 'raster_name': 'LULC', 'column_name': 'lucode', 'table_name': 'Biophysical'} for month_index in range(N_MONTHS): - kc_lookup = dict([ - (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)]) - for lucode in biophysical_table]) - kc_nodata = -1 # a reasonable nodata value + kc_lookup = biophysical_df['kc_%d' % (month_index+1)].to_dict() kc_task = task_graph.add_task( func=utils.reclassify_raster, args=( (file_registry['lulc_aligned_path'], 1), kc_lookup, file_registry['kc_path_list'][month_index], - gdal.GDT_Float32, kc_nodata, reclass_error_details), + gdal.GDT_Float32, TARGET_NODATA, reclass_error_details), target_path_list=[file_registry['kc_path_list'][month_index]], dependent_task_list=[align_task], task_name='classify kc month %d' % month_index) @@ -978,7 +994,7 @@ def _calculate_vri(l_path, target_vri_path): None. """ - qb_sum = 0.0 + qb_sum = 0 qb_valid_count = 0 l_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0] @@ -1039,122 +1055,167 @@ def qfi_sum_op(*qf_values): qfi_sum_op, target_qf_path, gdal.GDT_Float32, qf_nodata) -def _calculate_monthly_quick_flow( - precip_path, lulc_raster_path, cn_path, n_events_raster_path, - stream_path, si_path, qf_monthly_path): +def _calculate_monthly_quick_flow(precip_path, n_events_path, stream_path, + si_path, qf_monthly_path): """Calculate quick flow for a month. Args: - precip_path (string): path to file that correspond to monthly - precipitation - lulc_raster_path (string): path to landcover raster - cn_path (string): path to curve number raster - n_events_raster_path (string): a path to a raster where each pixel + precip_path (string): path to monthly precipitation raster + n_events_path (string): a path to a raster where each pixel indicates the number of rain events. stream_path (string): path to stream mask raster where 1 indicates a stream pixel, 0 is a non-stream but otherwise valid area from the original DEM, and nodata indicates areas outside the valid DEM. si_path (string): path to raster that has potential maximum retention - qf_monthly_path_list (list of string): list of paths to output monthly - rasters. + qf_monthly_path (string): path to output monthly QF raster. Returns: None """ - si_nodata = pygeoprocessing.get_raster_info(si_path)['nodata'][0] - - qf_nodata = -1 p_nodata = pygeoprocessing.get_raster_info(precip_path)['nodata'][0] - n_events_nodata = pygeoprocessing.get_raster_info( - n_events_raster_path)['nodata'][0] + n_nodata = pygeoprocessing.get_raster_info(n_events_path)['nodata'][0] stream_nodata = pygeoprocessing.get_raster_info(stream_path)['nodata'][0] + si_nodata = pygeoprocessing.get_raster_info(si_path)['nodata'][0] - def qf_op(p_im, s_i, n_events, stream_array): + def qf_op(p_im, s_i, n_m, stream): """Calculate quick flow as in Eq [1] in user's guide. Args: p_im (numpy.array): precipitation at pixel i on month m s_i (numpy.array): factor that is 1000/CN_i - 10 - (Equation 1b from user's guide) - n_events (numpy.array): number of rain events on the pixel - stream_mask (numpy.array): 1 if stream, otherwise not a stream - pixel. + n_m (numpy.array): number of rain events on pixel i in month m + stream (numpy.array): 1 if stream, otherwise not a stream pixel. Returns: quick flow (numpy.array) - """ - # s_i is an intermediate output which will always have a defined - # nodata value - valid_mask = ((p_im != 0.0) & - (stream_array != 1) & - (n_events > 0) & - ~utils.array_equals_nodata(s_i, si_nodata)) - if p_nodata is not None: - valid_mask &= ~utils.array_equals_nodata(p_im, p_nodata) - if n_events_nodata is not None: - valid_mask &= ~utils.array_equals_nodata(n_events, n_events_nodata) - # stream_nodata is the only input that carry over nodata values from + valid_p_mask = ~utils.array_equals_nodata(p_im, p_nodata) + valid_n_mask = ~utils.array_equals_nodata(n_m, n_nodata) + # precip mask: both p_im and n_m are defined and greater than 0 + precip_mask = valid_p_mask & valid_n_mask & (p_im > 0) & (n_m > 0) + stream_mask = stream == 1 + # stream_nodata is the only input that carries over nodata values from # the aligned DEM. - if stream_nodata is not None: - valid_mask &= ~utils.array_equals_nodata( - stream_array, stream_nodata) - - valid_n_events = n_events[valid_mask] - valid_si = s_i[valid_mask] - + valid_mask = ( + valid_p_mask & + valid_n_mask & + ~utils.array_equals_nodata(stream, stream_nodata) & + ~utils.array_equals_nodata(s_i, si_nodata)) + + # QF is defined in terms of three cases: + # + # 1. Where there is no precipitation, QF = 0 + # (even if stream or s_i is undefined) + # + # 2. Where there is precipitation and we're on a stream, QF = P + # (even if s_i is undefined) + # + # 3. Where there is precipitation and we're not on a stream, use the + # quickflow equation (only if all four inputs are defined): + # QF_im = 25.4 * n_m * ( + # (a_im - s_i) * exp(-0.2 * s_i / a_im) + + # s_i^2 / a_im * exp(0.8 * s_i / a_im) * E1(s_i / a_im) + # ) + # + # When evaluating the QF equation, there are a few edge cases: + # + # 3a. Where s_i = 0, you get NaN and a warning from numpy because + # E1(0 / a_im) = infinity. In this case, per conversation with + # Rafa, the final term of the equation should evaluate to 0, and + # the equation can be simplified to QF_im = P_im + # (which makes sense because if s_i = 0, no water is retained). + # + # Solution: Preemptively set QF_im equal to P_im where s_i = 0 in + # order to avoid calculations with infinity. + # + # 3b. When the ratio s_i / a_im becomes large, QF approaches 0. + # [NOTE: I don't know how to prove this mathematically, but it + # holds true when I tested with reasonable values of s_i and a_im]. + # The exp() term becomes very large, while the E1() term becomes + # very small. + # + # Per conversation with Rafa and Lisa, large s_i / a_im ratios + # shouldn't happen often with real world data. But if they did, it + # would be a situation where there is very little precipitation + # spread out over relatively many rain events and the soil is very + # absorbent, so logically, QF should be effectively zero. + # + # To avoid overflow, we set a threshold of 100 for the s_i / a_im + # ratio. Where s_i / a_im > 100, we set QF to 0. 100 was chosen + # because it's a nice whole number that gets us close to the + # float32 max without surpassing it (exp(0.8*100) = 5e34). When + # s_i / a_im = 100, the actual result of the QF equation is on the + # order of 1e-6, so it should be rounded down to 0 anyway. + # + # 3c. Otherwise, evaluate the QF equation as usual. + # + # 3d. With certain inputs [for example: n_m = 10, CN = 50, p_im = 30], + # it's possible that the QF equation evaluates to a very small + # negative value. Per conversation with Lisa and Rafa, this is an + # edge case that the equation was not designed for. Negative QF + # doesn't make sense, so we set any negative QF values to 0. + + # qf_im is the quickflow at pixel i on month m + qf_im = numpy.full(p_im.shape, TARGET_NODATA, dtype=numpy.float32) + + # case 1: where there is no precipitation + qf_im[~precip_mask] = 0 + + # case 2: where there is precipitation and we're on a stream + qf_im[precip_mask & stream_mask] = p_im[precip_mask & stream_mask] + + # case 3: where there is precipitation and we're not on a stream + case_3_mask = valid_mask & precip_mask & ~stream_mask + + # for consistent indexing, make a_im the same shape as the other + # arrays even though we only use it in case 3 + a_im = numpy.full(p_im.shape, numpy.nan, dtype=numpy.float32) # a_im is the mean rain depth on a rainy day at pixel i on month m - # the 25.4 converts inches to mm since Si is in inches - a_im = numpy.empty(valid_n_events.shape) - a_im = p_im[valid_mask] / (valid_n_events * 25.4) - qf_im = numpy.empty(p_im.shape) - qf_im[:] = qf_nodata - - # Precompute the last two terms in quickflow so we can handle a - # numerical instability when s_i is large and/or a_im is small - # on large valid_si/a_im this number will be zero and the latter - # exponent will also be zero because of a divide by zero. rather than - # raise that numerical warning, just handle it manually - E1 = scipy.special.expn(1, valid_si / a_im) - E1[valid_si == 0] = 0 - nonzero_e1_mask = E1 != 0 - exp_result = numpy.zeros(valid_si.shape) - exp_result[nonzero_e1_mask] = numpy.exp( - (0.8 * valid_si[nonzero_e1_mask]) / a_im[nonzero_e1_mask] + - numpy.log(E1[nonzero_e1_mask])) - - # qf_im is the quickflow at pixel i on month m Eq. [1] - qf_im[valid_mask] = (25.4 * valid_n_events * ( - (a_im - valid_si) * numpy.exp(-0.2 * valid_si / a_im) + - valid_si ** 2 / a_im * exp_result)) - - # if precip is 0, then QF should be zero - qf_im[(p_im == 0) | (n_events == 0)] = 0.0 - # if we're on a stream, set quickflow to the precipitation - valid_stream_precip_mask = stream_array == 1 - if p_nodata is not None: - valid_stream_precip_mask &= ~utils.array_equals_nodata( - p_im, p_nodata) - qf_im[valid_stream_precip_mask] = p_im[valid_stream_precip_mask] + # the 25.4 converts inches to mm since s_i is in inches + a_im[case_3_mask] = p_im[case_3_mask] / (n_m[case_3_mask] * 25.4) + + # case 3a: when s_i = 0, qf = p + case_3a_mask = case_3_mask & (s_i == 0) + qf_im[case_3a_mask] = p_im[case_3a_mask] + + # case 3b: set quickflow to 0 when the s_i/a_im ratio is too large + case_3b_mask = case_3_mask & (s_i / a_im > 100) + qf_im[case_3b_mask] = 0 + + # case 3c: evaluate the equation as usual + case_3c_mask = case_3_mask & ~(case_3a_mask | case_3b_mask) + qf_im[case_3c_mask] = ( + 25.4 * n_m[case_3c_mask] * ( + ((a_im[case_3c_mask] - s_i[case_3c_mask]) * + numpy.exp(-0.2 * s_i[case_3c_mask] / a_im[case_3c_mask])) + + (s_i[case_3c_mask] ** 2 / a_im[case_3c_mask] * + numpy.exp(0.8 * s_i[case_3c_mask] / a_im[case_3c_mask]) * + scipy.special.exp1(s_i[case_3c_mask] / a_im[case_3c_mask])) + ) + ) + + # case 3d: set any negative values to 0 + qf_im[valid_mask & (qf_im < 0)] = 0 + return qf_im pygeoprocessing.raster_calculator( [(path, 1) for path in [ - precip_path, si_path, n_events_raster_path, stream_path]], qf_op, - qf_monthly_path, gdal.GDT_Float32, qf_nodata) + precip_path, si_path, n_events_path, stream_path]], + qf_op, qf_monthly_path, gdal.GDT_Float32, TARGET_NODATA) def _calculate_curve_number_raster( - lulc_raster_path, soil_group_path, biophysical_table, cn_path): + lulc_raster_path, soil_group_path, biophysical_df, cn_path): """Calculate the CN raster from the landcover and soil group rasters. Args: lulc_raster_path (string): path to landcover raster soil_group_path (string): path to raster indicating soil group where pixel values are in [1,2,3,4] - biophysical_table (dict): maps landcover IDs to dictionaries that - contain at least the keys 'cn_a', 'cn_b', 'cn_c', 'cn_d', that - map to the curve numbers for that landcover and soil type. + biophysical_df (pandas.DataFrame): table mapping landcover IDs to the + columns 'cn_a', 'cn_b', 'cn_c', 'cn_d', that contain + the curve number values for that landcover and soil type. cn_path (string): path to output curve number raster to be output which will be the dimensions of the intersection of `lulc_raster_path` and `soil_group_path` the cell size of @@ -1172,12 +1233,11 @@ def _calculate_curve_number_raster( 4: 'cn_d', } # curve numbers are always positive so -1 a good nodata choice - cn_nodata = -1 lulc_to_soil = {} lulc_nodata = pygeoprocessing.get_raster_info( lulc_raster_path)['nodata'][0] - lucodes = list(biophysical_table) + lucodes = biophysical_df.index.to_list() if lulc_nodata is not None: lucodes.append(lulc_nodata) @@ -1190,12 +1250,12 @@ def _calculate_curve_number_raster( for lucode in sorted(lucodes): if lucode != lulc_nodata: lulc_to_soil[soil_id]['cn_values'].append( - biophysical_table[lucode][soil_column]) + biophysical_df[soil_column][lucode]) lulc_to_soil[soil_id]['lulc_values'].append(lucode) else: # handle the lulc nodata with cn nodata lulc_to_soil[soil_id]['lulc_values'].append(lulc_nodata) - lulc_to_soil[soil_id]['cn_values'].append(cn_nodata) + lulc_to_soil[soil_id]['cn_values'].append(TARGET_NODATA) # Making the landcover array a float32 in case the user provides a # float landcover map like Kate did. @@ -1213,7 +1273,7 @@ def _calculate_curve_number_raster( def cn_op(lulc_array, soil_group_array): """Map lulc code and soil to a curve number.""" cn_result = numpy.empty(lulc_array.shape) - cn_result[:] = cn_nodata + cn_result[:] = TARGET_NODATA # if lulc_array value not in lulc_to_soil[soil_group_id]['lulc_values'] # then numpy.digitize will not bin properly and cause an IndexError @@ -1252,10 +1312,9 @@ def cn_op(lulc_array, soil_group_array): cn_result[current_soil_mask] = cn_values[current_soil_mask] return cn_result - cn_nodata = -1 pygeoprocessing.raster_calculator( [(lulc_raster_path, 1), (soil_group_path, 1)], cn_op, cn_path, - gdal.GDT_Float32, cn_nodata) + gdal.GDT_Float32, TARGET_NODATA) def _calculate_si_raster(cn_path, stream_path, si_path): @@ -1269,7 +1328,6 @@ def _calculate_si_raster(cn_path, stream_path, si_path): Returns: None """ - si_nodata = -1 cn_nodata = pygeoprocessing.get_raster_info(cn_path)['nodata'][0] def si_op(ci_factor, stream_mask): @@ -1278,17 +1336,17 @@ def si_op(ci_factor, stream_mask): ~utils.array_equals_nodata(ci_factor, cn_nodata) & (ci_factor > 0)) si_array = numpy.empty(ci_factor.shape) - si_array[:] = si_nodata + si_array[:] = TARGET_NODATA # multiply by the stream mask != 1 so we get 0s on the stream and # unaffected results everywhere else si_array[valid_mask] = ( - (1000.0 / ci_factor[valid_mask] - 10) * ( + (1000 / ci_factor[valid_mask] - 10) * ( stream_mask[valid_mask] != 1)) return si_array pygeoprocessing.raster_calculator( [(cn_path, 1), (stream_path, 1)], si_op, si_path, gdal.GDT_Float32, - si_nodata) + TARGET_NODATA) def _aggregate_recharge( @@ -1350,7 +1408,7 @@ def _aggregate_recharge( "no coverage for polygon %s", ', '.join( [str(poly_feat.GetField(_)) for _ in range( poly_feat.GetFieldCount())])) - value = 0.0 + value = 0 elif op_type == 'sum': value = aggregate_stats[poly_index]['sum'] poly_feat.SetField(aggregate_field_id, float(value)) diff --git a/src/natcap/invest/stormwater.py b/src/natcap/invest/stormwater.py index afae65498c..a765b327dd 100644 --- a/src/natcap/invest/stormwater.py +++ b/src/natcap/invest/stormwater.py @@ -46,6 +46,7 @@ "precipitation_path": spec_utils.PRECIP, "biophysical_table": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "emc_[POLLUTANT]": { @@ -363,10 +364,10 @@ "calculated by convolving the search kernel with the " "retention ratio raster."), "bands": {1: {"type": "ratio"}} - }, - "cache_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -438,14 +439,14 @@ def execute(args): suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = args['workspace_dir'] intermediate_dir = os.path.join(output_dir, 'intermediate') - cache_dir = os.path.join(intermediate_dir, 'cache_dir') - utils.make_directories( - [args['workspace_dir'], intermediate_dir, cache_dir]) + utils.make_directories([args['workspace_dir'], intermediate_dir]) files = utils.build_file_registry( [(INTERMEDIATE_OUTPUTS, intermediate_dir), (FINAL_OUTPUTS, output_dir)], suffix) - task_graph = taskgraph.TaskGraph(cache_dir, int(args.get('n_workers', -1))) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), + int(args.get('n_workers', -1))) # get the necessary base raster info source_lulc_raster_info = pygeoprocessing.get_raster_info( @@ -482,11 +483,12 @@ def execute(args): task_name='align input rasters') # Build a lookup dictionary mapping each LULC code to its row - biophysical_dict = utils.read_csv_to_dataframe( - args['biophysical_table'], 'lucode').to_dict(orient='index') - # sort the LULC codes upfront because we use the sorted list in multiple + # sort by the LULC codes upfront because we use the sorted list in multiple # places. it's more efficient to do this once. - sorted_lucodes = sorted(biophysical_dict) + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table'], MODEL_SPEC['args']['biophysical_table'] + ).sort_index() + sorted_lucodes = biophysical_df.index.to_list() # convert the nested dictionary in to a 2D array where rows are LULC codes # in sorted order and columns correspond to soil groups in order @@ -498,10 +500,8 @@ def execute(args): # up with their indices in the array. this is more efficient than # decrementing the whole soil group array by 1. retention_ratio_array = numpy.array([ - [1 - biophysical_dict[lucode][f'rc_{soil_group}'] - for soil_group in ['a', 'b', 'c', 'd'] - ] for lucode in sorted_lucodes - ], dtype=numpy.float32) + 1 - biophysical_df[f'rc_{soil_group}'].to_numpy() + for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T # Calculate stormwater retention ratio and volume from # LULC, soil groups, biophysical data, and precipitation @@ -522,10 +522,6 @@ def execute(args): if args['adjust_retention_ratios']: # in raster coord system units radius = float(args['retention_radius']) - # boolean mapping for each LULC code whether it's connected - is_connected_map = { - lucode: 1 if biophysical_dict[lucode]['is_connected'] else 0 - for lucode in biophysical_dict} reproject_roads_task = task_graph.add_task( func=pygeoprocessing.reproject_vector, @@ -591,7 +587,7 @@ def execute(args): func=pygeoprocessing.reclassify_raster, args=( (files['lulc_aligned_path'], 1), - is_connected_map, + biophysical_df['is_connected'].astype(int).to_dict(), files['connected_lulc_path'], gdal.GDT_Byte, UINT8_NODATA), @@ -706,14 +702,12 @@ def execute(args): # (Optional) Calculate stormwater percolation ratio and volume from # LULC, soil groups, biophysical table, and precipitation - if 'pe_a' in next(iter(biophysical_dict.values())): + if 'pe_a' in biophysical_df.columns: LOGGER.info('percolation data detected in biophysical table. ' 'Will calculate percolation ratio and volume rasters.') percolation_ratio_array = numpy.array([ - [biophysical_dict[lucode][f'pe_{soil_group}'] - for soil_group in ['a', 'b', 'c', 'd'] - ] for lucode in sorted_lucodes - ], dtype=numpy.float32) + biophysical_df[f'pe_{soil_group}'].to_numpy() + for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T percolation_ratio_task = task_graph.add_task( func=lookup_ratios, args=( @@ -749,8 +743,8 @@ def execute(args): # get all EMC columns from an arbitrary row in the dictionary # strip the first four characters off 'EMC_pollutant' to get pollutant name - pollutants = [key[4:] for key in next(iter(biophysical_dict.values())) - if key.startswith('emc_')] + pollutants = [ + col[4:] for col in biophysical_df.columns if col.startswith('emc_')] LOGGER.debug(f'Pollutants found in biophysical table: {pollutants}') # Calculate avoided pollutant load for each pollutant from retention volume @@ -766,9 +760,7 @@ def execute(args): output_dir, f'actual_pollutant_load_{pollutant}{suffix}.tif') actual_load_paths.append(actual_pollutant_load_path) # make an array mapping each LULC code to the pollutant EMC value - emc_array = numpy.array( - [biophysical_dict[lucode][f'emc_{pollutant}'] - for lucode in sorted_lucodes], dtype=numpy.float32) + emc_array = biophysical_df[f'emc_{pollutant}'].to_numpy(dtype=numpy.float32) # calculate avoided load from retention volume avoided_load_task = task_graph.add_task( diff --git a/src/natcap/invest/urban_cooling_model.py b/src/natcap/invest/urban_cooling_model.py index 39c4decda5..6ef6e39eed 100644 --- a/src/natcap/invest/urban_cooling_model.py +++ b/src/natcap/invest/urban_cooling_model.py @@ -55,6 +55,7 @@ "biophysical_table_path": { "name": gettext("biophysical table"), "type": "csv", + "index_col": "lucode", "columns": { "lucode": spec_utils.LULC_TABLE_COLUMN, "kc": { @@ -170,6 +171,7 @@ "energy_consumption_table_path": { "name": gettext("energy consumption table"), "type": "csv", + "index_col": "type", "columns": { "type": { "type": "integer", @@ -340,10 +342,10 @@ "reference of the LULC."), "geometries": spec_utils.POLYGONS, "fields": {} - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -410,8 +412,8 @@ def execute(args): intermediate_dir = os.path.join( args['workspace_dir'], 'intermediate') utils.make_directories([args['workspace_dir'], intermediate_dir]) - biophysical_lucode_map = utils.read_csv_to_dataframe( - args['biophysical_table_path'], 'lucode').to_dict(orient='index') + biophysical_df = utils.read_csv_to_dataframe( + args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']) # cast to float and calculate relative weights # Use default weights for shade, albedo, eti if the user didn't provide @@ -454,7 +456,7 @@ def execute(args): n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( - os.path.join(intermediate_dir, '_taskgraph_working_dir'), n_workers) + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # align all the input rasters. aligned_lulc_raster_path = os.path.join( @@ -496,16 +498,13 @@ def execute(args): 'raster_name': 'LULC', 'column_name': 'lucode', 'table_name': 'Biophysical'} for prop in reclassification_props: - prop_map = dict( - (lucode, x[prop]) - for lucode, x in biophysical_lucode_map.items()) - prop_raster_path = os.path.join( intermediate_dir, f'{prop}{file_suffix}.tif') prop_task = task_graph.add_task( func=utils.reclassify_raster, args=( - (aligned_lulc_raster_path, 1), prop_map, prop_raster_path, + (aligned_lulc_raster_path, 1), + biophysical_df[prop].to_dict(), prop_raster_path, gdal.GDT_Float32, TARGET_NODATA, reclass_error_details), target_path_list=[prop_raster_path], dependent_task_list=[align_task], @@ -1079,8 +1078,9 @@ def calculate_energy_savings( for field in target_building_layer.schema] type_field_index = fieldnames.index('type') - energy_consumption_table = utils.read_csv_to_dataframe( - energy_consumption_table_path, 'type').to_dict(orient='index') + energy_consumption_df = utils.read_csv_to_dataframe( + energy_consumption_table_path, + MODEL_SPEC['args']['energy_consumption_table_path']) target_building_layer.StartTransaction() last_time = time.time() @@ -1104,7 +1104,7 @@ def calculate_energy_savings( # Building type should be an integer and has to match the building # types in the energy consumption table. target_type = target_feature.GetField(int(type_field_index)) - if target_type not in energy_consumption_table: + if target_type not in energy_consumption_df.index: target_building_layer.CommitTransaction() target_building_layer = None target_building_vector = None @@ -1114,16 +1114,14 @@ def calculate_energy_savings( "that has no corresponding entry in the energy consumption " f"table at {energy_consumption_table_path}") - consumption_increase = float( - energy_consumption_table[target_type]['consumption']) + consumption_increase = energy_consumption_df['consumption'][target_type] # Load building cost if we can, but don't adjust the value if the cost # column is not there. # NOTE: if the user has an empty column value but the 'cost' column # exists, this will raise an error. try: - building_cost = float( - energy_consumption_table[target_type]['cost']) + building_cost = energy_consumption_df['cost'][target_type] except KeyError: # KeyError when cost column not present. building_cost = 1 diff --git a/src/natcap/invest/urban_flood_risk_mitigation.py b/src/natcap/invest/urban_flood_risk_mitigation.py index f1294eaf76..69ada14c77 100644 --- a/src/natcap/invest/urban_flood_risk_mitigation.py +++ b/src/natcap/invest/urban_flood_risk_mitigation.py @@ -57,6 +57,7 @@ }, "curve_number_table_path": { "type": "csv", + "index_col": "lucode", "columns": { "lucode": { "type": "integer", @@ -91,6 +92,7 @@ }, "infrastructure_damage_loss_table_path": { "type": "csv", + "index_col": "type", "columns": { "type": { "type": "integer", @@ -184,12 +186,7 @@ "the same spatial reference as the LULC."), "geometries": spec_utils.POLYGONS, "fields": {} - } - } - }, - "temp_working_dir_not_for_humans": { - "type": "directory", - "contents": { + }, "aligned_lulc.tif": { "about": "Aligned and clipped copy of the LULC.", "bands": {1: {"type": "integer"}} @@ -205,10 +202,10 @@ "s_max.tif": { "about": "Map of potential retention.", "bands": {1: {"type": "number", "units": u.millimeter}} - }, - "taskgraph_data.db": {} + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -259,12 +256,10 @@ def execute(args): file_suffix = utils.make_suffix_string(args, 'results_suffix') - temporary_working_dir = os.path.join( - args['workspace_dir'], 'temp_working_dir_not_for_humans') intermediate_dir = os.path.join( args['workspace_dir'], 'intermediate_files') utils.make_directories([ - args['workspace_dir'], intermediate_dir, temporary_working_dir]) + args['workspace_dir'], intermediate_dir]) try: n_workers = int(args['n_workers']) @@ -273,13 +268,14 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. - task_graph = taskgraph.TaskGraph(temporary_working_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # Align LULC with soils aligned_lulc_path = os.path.join( - temporary_working_dir, f'aligned_lulc{file_suffix}.tif') + intermediate_dir, f'aligned_lulc{file_suffix}.tif') aligned_soils_path = os.path.join( - temporary_working_dir, + intermediate_dir, f'aligned_soils_hydrological_group{file_suffix}.tif') lulc_raster_info = pygeoprocessing.get_raster_info( @@ -306,20 +302,20 @@ def execute(args): task_name='align raster stack') # Load CN table - cn_table = utils.read_csv_to_dataframe( - args['curve_number_table_path'], 'lucode').to_dict(orient='index') + cn_df = utils.read_csv_to_dataframe( + args['curve_number_table_path'], + MODEL_SPEC['args']['curve_number_table_path']) # make cn_table into a 2d array where first dim is lucode, second is # 0..3 to correspond to CN_A..CN_D data = [] row_ind = [] col_ind = [] - for lucode in cn_table: - data.extend([ - cn_table[lucode][f'cn_{soil_id}'] - for soil_id in ['a', 'b', 'c', 'd']]) - row_ind.extend([int(lucode)] * 4) + for lucode, row in cn_df.iterrows(): + data.extend([row[f'cn_{soil_id}'] for soil_id in ['a', 'b', 'c', 'd']]) + row_ind.extend([lucode] * 4) col_ind = [0, 1, 2, 3] * (len(row_ind) // 4) + lucode_to_cn_table = scipy.sparse.csr_matrix((data, (row_ind, col_ind))) cn_nodata = -1 @@ -327,7 +323,7 @@ def execute(args): soil_type_nodata = soil_raster_info['nodata'][0] cn_raster_path = os.path.join( - temporary_working_dir, f'cn_raster{file_suffix}.tif') + intermediate_dir, f'cn_raster{file_suffix}.tif') align_raster_stack_task.join() cn_raster_task = task_graph.add_task( @@ -344,7 +340,7 @@ def execute(args): # Generate S_max s_max_nodata = -9999 s_max_raster_path = os.path.join( - temporary_working_dir, f's_max{file_suffix}.tif') + intermediate_dir, f's_max{file_suffix}.tif') s_max_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( @@ -649,7 +645,9 @@ def _calculate_damage_to_infrastructure_in_aoi( infrastructure_layer = infrastructure_vector.GetLayer() damage_type_map = utils.read_csv_to_dataframe( - structures_damage_table, 'type').to_dict(orient='index') + structures_damage_table, + MODEL_SPEC['args']['infrastructure_damage_loss_table_path'] + )['damage'].to_dict() infrastructure_layer_defn = infrastructure_layer.GetLayerDefn() type_index = -1 @@ -703,8 +701,8 @@ def _calculate_damage_to_infrastructure_in_aoi( intersection_geometry = aoi_geometry_shapely.intersection( infrastructure_geometry) damage_type = int(infrastructure_feature.GetField(type_index)) - damage = damage_type_map[damage_type]['damage'] - total_damage += intersection_geometry.area * damage + total_damage += ( + intersection_geometry.area * damage_type_map[damage_type]) aoi_damage[aoi_feature.GetFID()] = total_damage @@ -939,5 +937,25 @@ def validate(args, limit_to=None): be an empty list if validation succeeds. """ - return validation.validate(args, MODEL_SPEC['args'], - MODEL_SPEC['args_with_spatial_overlap']) + validation_warnings = validation.validate( + args, MODEL_SPEC['args'], MODEL_SPEC['args_with_spatial_overlap']) + + sufficient_keys = validation.get_sufficient_keys(args) + invalid_keys = validation.get_invalid_keys(validation_warnings) + + if ("curve_number_table_path" not in invalid_keys and + "curve_number_table_path" in sufficient_keys): + # Load CN table. Resulting DF has index and CN_X columns only. + cn_df = utils.read_csv_to_dataframe( + args['curve_number_table_path'], + MODEL_SPEC['args']['curve_number_table_path']) + # Check for NaN values. + nan_mask = cn_df.isna() + if nan_mask.any(axis=None): + nan_lucodes = nan_mask[nan_mask.any(axis=1)].index + lucode_list = list(nan_lucodes.values) + validation_warnings.append(( + ['curve_number_table_path'], + f'Missing curve numbers for lucode(s) {lucode_list}')) + + return validation_warnings diff --git a/src/natcap/invest/urban_nature_access.py b/src/natcap/invest/urban_nature_access.py index 113a1e5b3d..99a9d26cf5 100644 --- a/src/natcap/invest/urban_nature_access.py +++ b/src/natcap/invest/urban_nature_access.py @@ -1,5 +1,4 @@ import collections -import functools import logging import math import os @@ -76,14 +75,17 @@ "corresponding values in this table. Each row is a land use " "land cover class." ), + 'index_col': 'lucode', 'columns': { 'lucode': spec_utils.LULC_TABLE_COLUMN, 'urban_nature': { - 'type': 'number', - 'units': u.none, + 'type': 'ratio', 'about': ( - "Binary code indicating whether the LULC type is " - "(1) or is not (0) an urban nature type." + "The proportion (0-1) indicating the naturalness of " + "the land types. 0 indicates the naturalness level of " + "this LULC type is lowest (0% nature), while 1 " + "indicates that of this LULC type is the highest " + "(100% nature)" ), }, 'search_radius_m': { @@ -256,9 +258,10 @@ 'name': 'population group radii table', 'type': 'csv', 'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"', + 'index_col': 'pop_group', 'columns': { "pop_group": { - "type": "ratio", + "type": "freestyle_string", "required": False, "about": gettext( "The name of the population group. Names must match " @@ -302,8 +305,9 @@ 'output': { "type": "directory", "contents": { - "urban_nature_supply.tif": { - "about": "The calculated supply of urban nature.", + "urban_nature_supply_percapita.tif": { + "about": ( + "The calculated supply per capita of urban nature."), "bands": {1: { "type": "number", "units": u.m**2, @@ -412,13 +416,44 @@ "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", - } + }, + + # when RADIUS_OPT_UNIFORM + "accessible_urban_nature.tif": { + "about": gettext( + "The area of greenspace available within the defined " + "radius, weighted by the selected decay function."), + "bands": {1: {"type": "number", "units": u.m**2}}, + "created_if": + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + }, + + # When RADIUS_OPT_URBAN_NATURE + "accessible_urban_nature_lucode_[LUCODE].tif": { + "about": gettext( + "The area of greenspace available within the radius " + "associated with urban nature class LUCODE, weighted " + "by the selected decay function."), + "bands": {1: {"type": "number", "units": u.m**2}}, + "created_if": + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", + }, + + # When RADIUS_OPT_POP_GROUP + "accessible_urban_nature_to_[POP_GROUP].tif": { + "about": gettext( + "The area of greenspace available within the radius " + "associated with group POP_GROUP, weighted by the " + "selected decay function."), + "bands": {1: {"type": "number", "units": u.m**2}}, + "created_if": + f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", + }, }, }, 'intermediate': { 'type': 'directory', 'contents': { - '_taskgraph_working_dir': spec_utils.TASKGRAPH_DIR, "aligned_lulc.tif": { "about": gettext( "A copy of the user's land use land cover raster. " @@ -445,6 +480,7 @@ ), "bands": {1: {'type': 'number', 'units': u.count}}, }, + # when RADIUS_OPT_UNIFORM "distance_weighted_population_within_[SEARCH_RADIUS].tif": { "about": gettext( @@ -484,13 +520,13 @@ "created_if": f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, - "urban_nature_supply_lucode_[LUCODE].tif": { + "urban_nature_supply_percapita_lucode_[LUCODE].tif": { "about": gettext( "The urban nature supplied to populations due to the " "land use land cover code LUCODE"), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": - f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'", + f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, "urban_nature_population_ratio_lucode_[LUCODE].tif": { "about": gettext( @@ -501,14 +537,6 @@ "created_if": f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", }, - "urban_nature_supply_lucode_[LUCODE].tif": { - "about": gettext( - "The urban nature supplied to populations due to " - "the land use land cover class LUCODE."), - "bands": {1: {"type": "number", "units": u.m**2/u.person}}, - "created_if": - f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'", - }, # When RADIUS_OPT_POP_GROUP "population_in_[POP_GROUP].tif": { @@ -547,10 +575,10 @@ "created_if": f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", }, - "urban_nature_supply_to_[POP_GROUP].tif": { + "urban_nature_supply_percapita_to_[POP_GROUP].tif": { "about": gettext( - "The urban nature supply to population group " - "POP_GROUP."), + "The urban nature supply per capita to population " + "group POP_GROUP."), "bands": {1: {"type": "number", "units": u.m**2/u.person}}, "created_if": f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", @@ -570,16 +598,16 @@ "bands": {1: {"type": "number", "units": u.people}}, "created_if": f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'", - }, - }, - - } + } + } + }, + 'taskgraph_cache': spec_utils.TASKGRAPH_DIR, } } _OUTPUT_BASE_FILES = { - 'urban_nature_supply': 'urban_nature_supply.tif', + 'urban_nature_supply_percapita': 'urban_nature_supply_percapita.tif', 'admin_boundaries': 'admin_boundaries.gpkg', 'urban_nature_balance_percapita': 'urban_nature_balance_percapita.tif', 'urban_nature_balance_totalpop': 'urban_nature_balance_totalpop.tif', @@ -620,9 +648,10 @@ def execute(args): CSV with the following columns: * ``lucode``: (required) the integer landcover code represented. - * ``urban_nature``: (required) ``0`` or ``1`` indicating whether - this landcover code is (``1``) or is not (``0``) an urban nature - pixel. + * ``urban_nature``: (required) a proportion (0-1) representing + how much of this landcover type is urban nature. ``0`` + indicates none of this type's area is urban nature, ``1`` + indicates all of this type's area is urban nature. * ``search_radius_m``: (conditionally required) the search radius for this urban nature LULC class in meters. Required for all urban nature LULC codes if ``args['search_radius_mode'] == @@ -677,7 +706,6 @@ def execute(args): (_INTERMEDIATE_BASE_FILES, intermediate_dir)], suffix) - work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -685,7 +713,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous execution - graph = taskgraph.TaskGraph(work_token_dir, n_workers) + graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) kernel_creation_functions = { KERNEL_LABEL_DICHOTOMY: _kernel_dichotomy, @@ -902,14 +931,16 @@ def execute(args): aoi_reprojection_task, lulc_mask_task] ) - attr_table = utils.read_csv_to_dataframe(args['lulc_attribute_table']) + attr_table = utils.read_csv_to_dataframe( + args['lulc_attribute_table'], + MODEL_SPEC['args']['lulc_attribute_table']) kernel_paths = {} # search_radius, kernel path kernel_tasks = {} # search_radius, kernel task if args['search_radius_mode'] == RADIUS_OPT_UNIFORM: search_radii = set([float(args['search_radius'])]) elif args['search_radius_mode'] == RADIUS_OPT_URBAN_NATURE: - urban_nature_attrs = attr_table[attr_table['urban_nature'] == 1] + urban_nature_attrs = attr_table[attr_table['urban_nature'] > 0] try: search_radii = set(urban_nature_attrs['search_radius_m'].unique()) except KeyError as missing_key: @@ -918,16 +949,14 @@ def execute(args): f"attribute table {args['lulc_attribute_table']}") # Build an iterable of plain tuples: (lucode, search_radius_m) lucode_to_search_radii = list( - urban_nature_attrs[['lucode', 'search_radius_m']].itertuples( - index=False, name=None)) + urban_nature_attrs[['search_radius_m']].itertuples(name=None)) elif args['search_radius_mode'] == RADIUS_OPT_POP_GROUP: pop_group_table = utils.read_csv_to_dataframe( - args['population_group_radii_table']) + args['population_group_radii_table'], + MODEL_SPEC['args']['population_group_radii_table']) search_radii = set(pop_group_table['search_radius_m'].unique()) # Build a dict of {pop_group: search_radius_m} - search_radii_by_pop_group = dict( - pop_group_table[['pop_group', 'search_radius_m']].itertuples( - index=False, name=None)) + search_radii_by_pop_group = pop_group_table['search_radius_m'].to_dict() else: valid_options = ', '.join( MODEL_SPEC['args']['search_radius_mode']['options'].keys()) @@ -989,6 +1018,21 @@ def execute(args): dependent_task_list=[lulc_mask_task] ) + accessible_urban_nature_path = os.path.join( + output_dir, f'accessible_urban_nature{suffix}.tif') + _ = graph.add_task( + _convolve_and_set_lower_bound, + kwargs={ + "signal_path_band": (urban_nature_pixels_path, 1), + "kernel_path_band": (kernel_paths[search_radius_m], 1), + "target_path": accessible_urban_nature_path, + "working_dir": intermediate_dir, + }, + task_name='Accessible urban nature', + target_path_list=[accessible_urban_nature_path], + dependent_task_list=[urban_nature_reclassification_task] + ) + urban_nature_population_ratio_path = os.path.join( intermediate_dir, f'urban_nature_population_ratio{suffix}.tif') @@ -1005,17 +1049,17 @@ def execute(args): urban_nature_reclassification_task, decayed_population_task, ]) - urban_nature_supply_task = graph.add_task( + urban_nature_supply_percapita_task = graph.add_task( _convolve_and_set_lower_bound, kwargs={ 'signal_path_band': ( urban_nature_population_ratio_path, 1), 'kernel_path_band': (kernel_path, 1), - 'target_path': file_registry['urban_nature_supply'], + 'target_path': file_registry['urban_nature_supply_percapita'], 'working_dir': intermediate_dir, }, task_name='2SFCA - urban nature supply', - target_path_list=[file_registry['urban_nature_supply']], + target_path_list=[file_registry['urban_nature_supply_percapita']], dependent_task_list=[ kernel_tasks[search_radius_m], urban_nature_population_ratio_task]) @@ -1044,8 +1088,8 @@ def execute(args): dependent_task_list=[ kernel_tasks[search_radius_m], population_mask_task]) - partial_urban_nature_supply_paths = [] - partial_urban_nature_supply_tasks = [] + partial_urban_nature_supply_percapita_paths = [] + partial_urban_nature_supply_percapita_tasks = [] for lucode, search_radius_m in lucode_to_search_radii: urban_nature_pixels_path = os.path.join( intermediate_dir, @@ -1063,6 +1107,22 @@ def execute(args): dependent_task_list=[lulc_mask_task] ) + accessible_urban_nature_path = os.path.join( + output_dir, + f'accessible_urban_nature_lucode_{lucode}{suffix}.tif') + _ = graph.add_task( + _convolve_and_set_lower_bound, + kwargs={ + "signal_path_band": (urban_nature_pixels_path, 1), + "kernel_path_band": (kernel_paths[search_radius_m], 1), + "target_path": accessible_urban_nature_path, + "working_dir": intermediate_dir, + }, + task_name='Accessible urban nature', + target_path_list=[accessible_urban_nature_path], + dependent_task_list=[urban_nature_reclassification_task] + ) + urban_nature_population_ratio_path = os.path.join( intermediate_dir, f'urban_nature_population_ratio_lucode_{lucode}{suffix}.tif') @@ -1080,35 +1140,37 @@ def execute(args): decayed_population_tasks[search_radius_m], ]) - urban_nature_supply_path = os.path.join( + urban_nature_supply_percapita_path = os.path.join( intermediate_dir, - f'urban_nature_supply_lucode_{lucode}{suffix}.tif') - partial_urban_nature_supply_paths.append(urban_nature_supply_path) - partial_urban_nature_supply_tasks.append(graph.add_task( + f'urban_nature_supply_percapita_lucode_{lucode}{suffix}.tif') + partial_urban_nature_supply_percapita_paths.append( + urban_nature_supply_percapita_path) + partial_urban_nature_supply_percapita_tasks.append(graph.add_task( pygeoprocessing.convolve_2d, kwargs={ 'signal_path_band': ( urban_nature_population_ratio_path, 1), 'kernel_path_band': (kernel_paths[search_radius_m], 1), - 'target_path': urban_nature_supply_path, + 'target_path': urban_nature_supply_percapita_path, 'working_dir': intermediate_dir, }, task_name=f'2SFCA - urban_nature supply for lucode {lucode}', - target_path_list=[urban_nature_supply_path], + target_path_list=[urban_nature_supply_percapita_path], dependent_task_list=[ kernel_tasks[search_radius_m], urban_nature_population_ratio_task])) - urban_nature_supply_task = graph.add_task( + urban_nature_supply_percapita_task = graph.add_task( ndr._sum_rasters, kwargs={ - 'raster_path_list': partial_urban_nature_supply_paths, + 'raster_path_list': partial_urban_nature_supply_percapita_paths, 'target_nodata': FLOAT32_NODATA, - 'target_result_path': file_registry['urban_nature_supply'], + 'target_result_path': + file_registry['urban_nature_supply_percapita'], }, task_name='2SFCA - urban nature supply total', - target_path_list=[file_registry['urban_nature_supply']], - dependent_task_list=partial_urban_nature_supply_tasks + target_path_list=[file_registry['urban_nature_supply_percapita']], + dependent_task_list=partial_urban_nature_supply_percapita_tasks ) # Search radius mode 3: search radii are defined per population group. @@ -1133,6 +1195,23 @@ def execute(args): decayed_population_in_group_tasks = [] for pop_group in split_population_fields: search_radius_m = search_radii_by_pop_group[pop_group] + + accessible_urban_nature_path = os.path.join( + output_dir, + f'accessible_urban_nature_to_{pop_group}{suffix}.tif') + _ = graph.add_task( + _convolve_and_set_lower_bound, + kwargs={ + "signal_path_band": (urban_nature_pixels_path, 1), + "kernel_path_band": (kernel_paths[search_radius_m], 1), + "target_path": accessible_urban_nature_path, + "working_dir": intermediate_dir, + }, + task_name='Accessible urban nature', + target_path_list=[accessible_urban_nature_path], + dependent_task_list=[urban_nature_reclassification_task] + ) + decayed_population_in_group_path = os.path.join( intermediate_dir, f'distance_weighted_population_in_{pop_group}{suffix}.tif') @@ -1185,42 +1264,36 @@ def execute(args): sum_of_decayed_population_task, ]) - # Create a dict of {pop_group: search_radius_m} - group_radii_table = utils.read_csv_to_dataframe( - args['population_group_radii_table']) - search_radii = dict( - group_radii_table[['pop_group', 'search_radius_m']].itertuples( - index=False, name=None)) - urban_nature_supply_by_group_paths = {} - urban_nature_supply_by_group_tasks = [] + urban_nature_supply_percapita_by_group_paths = {} + urban_nature_supply_percapita_by_group_tasks = [] urban_nature_balance_totalpop_by_group_paths = {} urban_nature_balance_totalpop_by_group_tasks = [] supply_population_paths = {'over': {}, 'under': {}} supply_population_tasks = {'over': {}, 'under': {}} for pop_group, proportional_pop_path in ( proportional_population_paths.items()): - search_radius_m = search_radii[pop_group] - urban_nature_supply_to_group_path = os.path.join( + search_radius_m = search_radii_by_pop_group[pop_group] + urban_nature_supply_percapita_to_group_path = os.path.join( intermediate_dir, - f'urban_nature_supply_to_{pop_group}{suffix}.tif') - urban_nature_supply_by_group_paths[ - pop_group] = urban_nature_supply_to_group_path - urban_nature_supply_by_group_task = graph.add_task( + f'urban_nature_supply_percapita_to_{pop_group}{suffix}.tif') + urban_nature_supply_percapita_by_group_paths[ + pop_group] = urban_nature_supply_percapita_to_group_path + urban_nature_supply_percapita_by_group_task = graph.add_task( _convolve_and_set_lower_bound, kwargs={ 'signal_path_band': ( file_registry['urban_nature_population_ratio'], 1), 'kernel_path_band': (kernel_paths[search_radius_m], 1), - 'target_path': urban_nature_supply_to_group_path, + 'target_path': urban_nature_supply_percapita_to_group_path, 'working_dir': intermediate_dir, }, task_name=f'2SFCA - urban nature supply for {pop_group}', - target_path_list=[urban_nature_supply_to_group_path], + target_path_list=[urban_nature_supply_percapita_to_group_path], dependent_task_list=[ kernel_tasks[search_radius_m], urban_nature_population_ratio_task]) - urban_nature_supply_by_group_tasks.append( - urban_nature_supply_by_group_task) + urban_nature_supply_percapita_by_group_tasks.append( + urban_nature_supply_percapita_by_group_task) # Calculate SUP_DEMi_cap for each population group. per_cap_urban_nature_balance_pop_group_path = os.path.join( @@ -1230,7 +1303,7 @@ def execute(args): pygeoprocessing.raster_calculator, kwargs={ 'base_raster_path_band_const_list': [ - (urban_nature_supply_to_group_path, 1), + (urban_nature_supply_percapita_to_group_path, 1), (float(args['urban_nature_demand']), 'raw') ], 'local_op': _urban_nature_balance_percapita_op, @@ -1244,7 +1317,7 @@ def execute(args): target_path_list=[ per_cap_urban_nature_balance_pop_group_path], dependent_task_list=[ - urban_nature_supply_by_group_task, + urban_nature_supply_percapita_by_group_task, ]) urban_nature_balance_totalpop_by_group_path = os.path.join( @@ -1303,21 +1376,21 @@ def execute(args): proportional_population_tasks[pop_group], ]) - urban_nature_supply_task = graph.add_task( + urban_nature_supply_percapita_task = graph.add_task( _weighted_sum, kwargs={ 'raster_path_list': - [urban_nature_supply_by_group_paths[group] for group in + [urban_nature_supply_percapita_by_group_paths[group] for group in sorted(split_population_fields)], 'weight_raster_list': [pop_group_proportion_paths[group] for group in sorted(split_population_fields)], - 'target_path': file_registry['urban_nature_supply'], + 'target_path': file_registry['urban_nature_supply_percapita'], }, task_name='2SFCA - urban nature supply total', - target_path_list=[file_registry['urban_nature_supply']], + target_path_list=[file_registry['urban_nature_supply_percapita']], dependent_task_list=[ - *urban_nature_supply_by_group_tasks, + *urban_nature_supply_percapita_by_group_tasks, *pop_group_proportion_tasks.values(), ]) @@ -1325,7 +1398,7 @@ def execute(args): pygeoprocessing.raster_calculator, kwargs={ 'base_raster_path_band_const_list': [ - (file_registry['urban_nature_supply'], 1), + (file_registry['urban_nature_supply_percapita'], 1), (float(args['urban_nature_demand']), 'raw') ], 'local_op': _urban_nature_balance_percapita_op, @@ -1337,7 +1410,7 @@ def execute(args): task_name='Calculate per-capita urban nature balance', target_path_list=[file_registry['urban_nature_balance_percapita']], dependent_task_list=[ - urban_nature_supply_task, + urban_nature_supply_percapita_task, ]) urban_nature_balance_totalpop_task = graph.add_task( @@ -1388,7 +1461,7 @@ def execute(args): pygeoprocessing.raster_calculator, kwargs={ 'base_raster_path_band_const_list': [ - (file_registry['urban_nature_supply'], 1), + (file_registry['urban_nature_supply_percapita'], 1), (float(args['urban_nature_demand']), 'raw') ], 'local_op': _urban_nature_balance_percapita_op, @@ -1400,7 +1473,7 @@ def execute(args): task_name='Calculate per-capita urban nature balance', target_path_list=[file_registry['urban_nature_balance_percapita']], dependent_task_list=[ - urban_nature_supply_task, + urban_nature_supply_percapita_task, ]) # This is "SUP_DEMi" from the user's guide @@ -1730,13 +1803,16 @@ def _reclassify_urban_nature_area( """Reclassify LULC pixels into the urban nature area they represent. After execution, urban nature pixels will have values representing the - pixel's area, while pixels that are not urban nature will have a pixel - value of 0. Nodata values will propagate to the output raster. + pixel's area of urban nature (pixel area * proportion of urban nature), + while pixels that are not urban nature will have a pixel value of 0. + Nodata values will propagate to the output raster. Args: lulc_raster_path (string): The path to a land-use/land-cover raster. lulc_attribute_table (string): The path to a CSV table representing LULC attributes. Must have "lucode" and "urban_nature" columns. + The "urban_nature" column represents a proportion 0-1 of how much + of the pixel's area represents urban nature. target_raster_path (string): Where the reclassified urban nature raster should be written. only_these_urban_nature_codes=None (iterable or None): If ``None``, all @@ -1748,8 +1824,8 @@ def _reclassify_urban_nature_area( Returns: ``None`` """ - attribute_table_dict = utils.read_csv_to_dataframe( - lulc_attribute_table, 'lucode').to_dict(orient='index') + lulc_attribute_df = utils.read_csv_to_dataframe( + lulc_attribute_table, MODEL_SPEC['args']['lulc_attribute_table']) squared_pixel_area = abs( numpy.multiply(*_square_off_pixels(lulc_raster_path))) @@ -1758,14 +1834,15 @@ def _reclassify_urban_nature_area( valid_urban_nature_codes = set(only_these_urban_nature_codes) else: valid_urban_nature_codes = set( - lucode for lucode, attributes in attribute_table_dict.items() - if (attributes['urban_nature']) == 1) + lulc_attribute_df[lulc_attribute_df['urban_nature'] > 0].index) urban_nature_area_map = {} - for lucode, attributes in attribute_table_dict.items(): + for row in lulc_attribute_df[['urban_nature']].itertuples(): + lucode = row.Index + urban_nature_proportion = row.urban_nature urban_nature_area = 0 if lucode in valid_urban_nature_codes: - urban_nature_area = squared_pixel_area + urban_nature_area = squared_pixel_area * urban_nature_proportion urban_nature_area_map[lucode] = urban_nature_area lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path) diff --git a/src/natcap/invest/utils.py b/src/natcap/invest/utils.py index 1f603eecb8..e70893395e 100644 --- a/src/natcap/invest/utils.py +++ b/src/natcap/invest/utils.py @@ -597,85 +597,119 @@ def expand_path(path, base_path): return os.path.abspath(os.path.join(os.path.dirname(base_path), path)) -def read_csv_to_dataframe( - path, index_col=False, usecols=None, convert_cols_to_lower=True, - convert_vals_to_lower=True, expand_path_cols=None, sep=None, engine='python', - encoding='utf-8-sig', **kwargs): +def read_csv_to_dataframe(path, spec, **kwargs): """Return a dataframe representation of the CSV. - Wrapper around ``pandas.read_csv`` that standardizes the column names by - stripping leading/trailing whitespace and optionally making all lowercase. - This helps avoid common errors caused by user-supplied CSV files with - column names that don't exactly match the specification. Strips - leading/trailing whitespace from data entries as well. + Wrapper around ``pandas.read_csv`` that performs some common data cleaning + based on information in the arg spec. - Also sets custom defaults for some kwargs passed to ``pandas.read_csv``. + Columns are filtered to just those that match a pattern in the spec. + Column names are lowercased and whitespace is stripped off. Empty rows are + dropped. Values in each column are processed and cast to an appropriate + dtype according to the type in the spec: + + - Values in raster, vector, csv, file, and directory columns are cast to + str, whitespace stripped, and expanded as paths relative to the input path + - Values in freestyle_string and option_string columns are cast to str, + whitespace stripped, and converted to lowercase + - Values in number, ratio, and percent columns are cast to float + - Values in integer columns are cast to int + - Values in boolean columns are cast to bool + + Empty or NA cells are returned as ``numpy.nan`` (for floats) or + ``pandas.NA`` (for all other types). + + Also sets custom defaults for some kwargs passed to ``pandas.read_csv``, + which you can override with kwargs: + + - sep=None: lets the Python engine infer the separator + - engine='python': The 'python' engine supports the sep=None option. + - encoding='utf-8-sig': 'utf-8-sig' handles UTF-8 with or without BOM. Args: path (str): path to a CSV file - index_col (str): name of column to use as the dataframe index. If - ``convert_cols_to_lower``, this column name and the dataframe column names - will be lowercased before they are compared. If ``usecols`` - is defined, this must be included in ``usecols``. - usecols (list(str)): list of column names to subset from the dataframe. - If ``convert_cols_to_lower``, these names and the dataframe column names - will be lowercased before they are compared. - convert_cols_to_lower (bool): if True, convert all column names to lowercase - convert_vals_to_lower (bool): if True, convert all table values to lowercase - expand_path_cols (list[string])): if provided, a list of the names of - columns that contain paths to expand. Any relative paths in these - columns will be expanded to absolute paths. It is assumed that - relative paths are relative to the CSV's path. - sep: kwarg of ``pandas.read_csv``. Defaults to None, which - lets the Python engine infer the separator - engine (str): kwarg of ``pandas.read_csv``. The 'python' engine - supports the sep=None option. - encoding (str): kwarg of ``pandas.read_csv``. Using the 'utf-8-sig' - encoding handles UTF-8 with or without BOM. + spec (dict): dictionary specifying the structure of the CSV table **kwargs: additional kwargs will be passed to ``pandas.read_csv`` Returns: pandas.DataFrame with the contents of the given CSV - """ + # build up a list of regex patterns to match columns against columns from + # the table that match a pattern in this list (after stripping whitespace + # and lowercasing) will be included in the dataframe + patterns = [] + for column in spec['columns']: + column = column.lower() + match = re.match(r'(.*)\[(.+)\](.*)', column) + if match: + # for column name patterns, convert it to a regex pattern + groups = match.groups() + patterns.append(f'{groups[0]}(.+){groups[2]}') + else: + # for regular column names, use the exact name as the pattern + patterns.append(column.replace('(', '\(').replace(')', '\)')) + try: # set index_col=False to force pandas not to index by any column # this is useful in case of trailing separators # we'll explicitly set the index column later on - dataframe = pandas.read_csv( - path, index_col=False, sep=sep, engine=engine, encoding=encoding, **kwargs) + df = pandas.read_csv( + path, + index_col=False, + usecols=lambda col: any( + re.fullmatch(pattern, col.strip().lower()) for pattern in patterns + ), + **{ + 'sep': None, + 'engine': 'python', + 'encoding': 'utf-8-sig', + **kwargs + }) except UnicodeDecodeError as error: LOGGER.error( f'The file {path} must be encoded as UTF-8 or ASCII') raise error - # strip whitespace from column names + # strip whitespace from column names and convert to lowercase # this won't work on integer types, which happens if you set header=None # however, there's little reason to use this function if there's no header - dataframe.columns = dataframe.columns.str.strip() - - # convert column names to lowercase - if convert_cols_to_lower: - dataframe.columns = dataframe.columns.str.lower() - # if 'to_lower`, case handling is done before trying to access the data. - # the columns are stripped of leading/trailing whitespace in - # ``read_csv_to_dataframe``, and also lowercased if ``to_lower`` so we only - # need to convert the rest of the table. - if index_col and isinstance(index_col, str): - index_col = index_col.lower() - # lowercase column names - if usecols: - usecols = [col.lower() for col in usecols] - - # Subset dataframe by columns if desired - if usecols: - dataframe = dataframe[usecols] - - # Set 'index_col' as the index of the dataframe - if index_col: + df.columns = df.columns.str.strip().str.lower() + + # drop any empty rows + df = df.dropna(how="all") + + available_cols = set(df.columns) + + for col_spec, pattern in zip(spec['columns'].values(), patterns): + matching_cols = [c for c in available_cols if re.match(pattern, c)] + available_cols -= set(matching_cols) + for col in matching_cols: + try: + if col_spec['type'] in ['csv', 'directory', 'file', 'raster', 'vector', {'vector', 'raster'}]: + df[col] = df[col].apply( + lambda p: p if pandas.isna(p) else expand_path(str(p).strip(), path)) + df[col] = df[col].astype(pandas.StringDtype()) + elif col_spec['type'] in {'freestyle_string', 'option_string'}: + df[col] = df[col].apply( + lambda s: s if pandas.isna(s) else str(s).strip().lower()) + df[col] = df[col].astype(pandas.StringDtype()) + elif col_spec['type'] in {'number', 'percent', 'ratio'}: + df[col] = df[col].astype(float) + elif col_spec['type'] == 'integer': + df[col] = df[col].astype(pandas.Int64Dtype()) + elif col_spec['type'] == 'boolean': + df[col] = df[col].astype('boolean') + except ValueError as err: + raise ValueError( + f'Value(s) in the "{col}" column of the table {path} ' + f'could not be interpreted as {col_spec["type"]}s. ' + f'Original error: {err}') + + # set the index column, if specified + if 'index_col' in spec and spec['index_col'] is not None: + index_col = spec['index_col'].lower() try: - dataframe = dataframe.set_index( - index_col, drop=False, verify_integrity=True) + df = df.set_index(index_col, verify_integrity=True) except KeyError: # If 'index_col' is not a column then KeyError is raised for using # it as the index column @@ -683,33 +717,7 @@ def read_csv_to_dataframe( f"in the table {path}") raise - # convert table values to lowercase - if convert_vals_to_lower: - dataframe = dataframe.applymap( - lambda x: x.lower() if isinstance(x, str) else x) - - # expand paths - if expand_path_cols: - for col in expand_path_cols: - # allow for the case where a column is optional - if col in dataframe: - dataframe[col] = dataframe[col].apply( - # if the whole column is empty, cells will be parsed as NaN - # catch that before trying to expand them as paths - lambda p: '' if pandas.isna(p) else expand_path(p, path)) - - # drop any empty rows - dataframe = dataframe.dropna(how="all") - - # fill the rest of empty or NaN values with empty string - dataframe = dataframe.fillna(value="") - - # strip whitespace from table values - # Remove values with leading ('^ +') and trailing (' +$') whitespace. - # Regular expressions using 'replace' only substitute on strings. - dataframe = dataframe.replace(r"^ +| +$", r"", regex=True) - - return dataframe + return df def make_directories(directory_list): diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 0b0da62c79..6391ca682b 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -56,7 +56,8 @@ 'NOT_AN_INTEGER': gettext('Value "{value}" does not represent an integer'), 'NOT_BOOLEAN': gettext("Value must be either True or False, not {value}"), 'NO_PROJECTION': gettext('Spatial file {filepath} has no projection'), - 'BBOX_NOT_INTERSECT': gettext("Bounding boxes do not intersect: {bboxes}"), + 'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each ' + 'other. All bounding boxes must intersect: {bboxes}'), 'NEED_PERMISSION': gettext('You must have {permission} access to this file'), } diff --git a/src/natcap/invest/wave_energy.py b/src/natcap/invest/wave_energy.py index a03c7e76f0..75e9ea4e5f 100644 --- a/src/natcap/invest/wave_energy.py +++ b/src/natcap/invest/wave_energy.py @@ -423,6 +423,7 @@ "Table of value ranges for each captured wave energy " "quantile group as well as the number of pixels for " "each group."), + "index_col": "Percentile Group", "columns": { **PERCENTILE_TABLE_FIELDS, "Value Range (megawatt hours per year, MWh/yr)": { @@ -459,6 +460,7 @@ "Table of value ranges for each net present value " "quantile group as well as the number of pixels for " "each group."), + "index_col": "Percentile Group", "columns": { **PERCENTILE_TABLE_FIELDS, "Value Range (thousands of currency units, currency)": { @@ -497,6 +499,7 @@ "about": gettext( "Table of value ranges for each wave power quantile " "group as well as the number of pixels for each group."), + "index_col": "Percentile Group", "columns": { **PERCENTILE_TABLE_FIELDS, "Value Range (wave power per unit width of wave crest length, kW/m)": { @@ -597,10 +600,10 @@ "LandPts.txt": { "created_if": "valuation_container", "about": "This text file logs records of the landing point coordinates." - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -722,8 +725,6 @@ def execute(args): utils.make_directories([intermediate_dir, output_dir]) # Initialize a TaskGraph - taskgraph_working_dir = os.path.join( - intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -731,7 +732,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # Append a _ to the suffix if it's not empty and doesn't already have one file_suffix = utils.make_suffix_string(args, 'results_suffix') @@ -743,7 +745,7 @@ def execute(args): # arrays. Also store the amount of energy the machine produces # in a certain wave period/height state as a 2D array machine_perf_dict = {} - machine_perf_data = utils.read_csv_to_dataframe(args['machine_perf_path']) + machine_perf_data = pandas.read_csv(args['machine_perf_path']) # Get the wave period fields, starting from the second column of the table machine_perf_dict['periods'] = machine_perf_data.columns.values[1:] # Build up the height field by taking the first column of the table @@ -775,12 +777,15 @@ def execute(args): # Check if required column fields are entered in the land grid csv file if 'land_gridPts_path' in args: - # Create a grid_land_data dataframe for later use in valuation - grid_land_data = utils.read_csv_to_dataframe( - args['land_gridPts_path'], convert_vals_to_lower=False) - required_col_names = ['id', 'type', 'lat', 'long', 'location'] - grid_land_data, missing_grid_land_fields = _get_validated_dataframe( - args['land_gridPts_path'], required_col_names) + # Create a grid_land_df dataframe for later use in valuation + grid_land_df = utils.read_csv_to_dataframe( + args['land_gridPts_path'], + MODEL_SPEC['args']['land_gridPts_path']) + missing_grid_land_fields = [] + for field in ['id', 'type', 'lat', 'long', 'location']: + if field not in grid_land_df.columns: + missing_grid_land_fields.append(field) + if missing_grid_land_fields: raise ValueError( 'The following column fields are missing from the Grid ' @@ -1080,20 +1085,12 @@ def execute(args): grid_vector_path = os.path.join( output_dir, 'GridPts_prj%s.shp' % file_suffix) - grid_data = grid_land_data.loc[ - grid_land_data['type'].str.upper() == 'GRID'] - land_data = grid_land_data.loc[ - grid_land_data['type'].str.upper() == 'LAND'] - - grid_dict = grid_data.to_dict('index') - land_dict = land_data.to_dict('index') - # Make a point shapefile for grid points LOGGER.info('Creating Grid Points Vector.') create_grid_points_vector_task = task_graph.add_task( func=_dict_to_point_vector, - args=(grid_dict, grid_vector_path, 'grid_points', analysis_area_sr_wkt, - aoi_sr_wkt), + args=(grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index'), + grid_vector_path, 'grid_points', analysis_area_sr_wkt, aoi_sr_wkt), target_path_list=[grid_vector_path], task_name='create_grid_points_vector') @@ -1101,8 +1098,8 @@ def execute(args): LOGGER.info('Creating Landing Points Vector.') create_land_points_vector_task = task_graph.add_task( func=_dict_to_point_vector, - args=(land_dict, land_vector_path, 'land_points', analysis_area_sr_wkt, - aoi_sr_wkt), + args=(grid_land_df[grid_land_df['type'] == 'land'].to_dict('index'), + land_vector_path, 'land_points', analysis_area_sr_wkt, aoi_sr_wkt), target_path_list=[land_vector_path], task_name='create_land_points_vector') @@ -1413,26 +1410,6 @@ def _add_target_fields_to_wave_vector( target_wave_vector = None -def _get_validated_dataframe(csv_path, field_list): - """Return a dataframe with upper cased fields, and a list of missing fields. - - Args: - csv_path (str): path to the csv to be converted to a dataframe. - field_list (list): a list of fields in string format. - - Returns: - dataframe (pandas.DataFrame): from csv with upper-cased fields. - missing_fields (list): missing fields as string format in dataframe. - - """ - dataframe = utils.read_csv_to_dataframe(csv_path, convert_vals_to_lower=False) - missing_fields = [] - for field in field_list: - if field not in dataframe.columns: - missing_fields.append(field) - return dataframe, missing_fields - - def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name, base_sr_wkt, target_sr_wkt): """Given a dictionary of data create a point shapefile that represents it. @@ -1493,6 +1470,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name, for point_dict in base_dict_data.values(): latitude = float(point_dict['lat']) longitude = float(point_dict['long']) + point_dict['id'] = int(point_dict['id']) # When projecting to WGS84, extents -180 to 180 are used for longitude. # In case input longitude is from -360 to 0 convert if longitude < -180: @@ -1505,7 +1483,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name, target_layer.CreateFeature(output_feature) for field_name in point_dict: - output_feature.SetField(field_name, point_dict[field_name]) + output_feature.SetField(field_name.upper(), point_dict[field_name]) output_feature.SetGeometryDirectly(geom) target_layer.SetFeature(output_feature) output_feature = None @@ -1670,9 +1648,13 @@ def _machine_csv_to_dict(machine_csv_path): machine_dict = {} # make columns and indexes lowercased and strip whitespace machine_data = utils.read_csv_to_dataframe( - machine_csv_path, 'name', convert_vals_to_lower=False) - machine_data.index = machine_data.index.str.strip() - machine_data.index = machine_data.index.str.lower() + machine_csv_path, + { + 'index_col': 'name', + 'columns': { + 'name': {'type': 'freestyle_string'}, + 'value': {'type': 'number'} + }}) # drop NaN indexed rows in dataframe machine_data = machine_data[machine_data.index.notnull()] diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py index 52fda89b85..da9e78bdca 100644 --- a/src/natcap/invest/wind_energy.py +++ b/src/natcap/invest/wind_energy.py @@ -328,6 +328,7 @@ }, "grid_points_path": { "type": "csv", + "index_col": "id", "columns": { "id": { "type": "integer", @@ -382,6 +383,7 @@ }, "wind_schedule": { "type": "csv", + "index_col": "year", "columns": { "year": { "type": "number", @@ -524,10 +526,10 @@ "about": "Wind data", "geometries": spec_utils.POINT, "fields": OUTPUT_WIND_DATA_FIELDS - }, - "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR + } } - } + }, + "taskgraph_cache": spec_utils.TASKGRAPH_DIR } } @@ -654,7 +656,6 @@ def execute(args): suffix = utils.make_suffix_string(args, 'results_suffix') # Initialize a TaskGraph - taskgraph_working_dir = os.path.join(inter_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): @@ -662,7 +663,8 @@ def execute(args): # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. - task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) + task_graph = taskgraph.TaskGraph( + os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers) # Resample the bathymetry raster if it does not have square pixel size try: @@ -754,9 +756,11 @@ def execute(args): # If Price Table provided use that for price of energy, validate inputs time = int(val_parameters_dict['time_period']) if args['price_table']: - wind_price_df = utils.read_csv_to_dataframe(args['wind_schedule']) + wind_price_df = utils.read_csv_to_dataframe( + args['wind_schedule'], MODEL_SPEC['args']['wind_schedule'] + ).sort_index() # sort by year - year_count = len(wind_price_df['year']) + year_count = len(wind_price_df) if year_count != time + 1: raise ValueError( "The 'time' argument in the Global Wind Energy Parameters " @@ -765,7 +769,6 @@ def execute(args): # Save the price values into a list where the indices of the list # indicate the time steps for the lifespan of the wind farm - wind_price_df.sort_values('year', inplace=True) price_list = wind_price_df['price'].tolist() else: change_rate = float(args["rate_change"]) @@ -1134,19 +1137,11 @@ def execute(args): # Read the grid points csv, and convert it to land and grid dictionary grid_land_df = utils.read_csv_to_dataframe( - args['grid_points_path'], convert_vals_to_lower=False) - - # Make separate dataframes based on 'TYPE' - grid_df = grid_land_df.loc[( - grid_land_df['type'].str.upper() == 'GRID')] - land_df = grid_land_df.loc[( - grid_land_df['type'].str.upper() == 'LAND')] + args['grid_points_path'], MODEL_SPEC['args']['grid_points_path']) # Convert the dataframes to dictionaries, using 'ID' (the index) as key - grid_df.set_index('id', inplace=True) - grid_dict = grid_df.to_dict('index') - land_df.set_index('id', inplace=True) - land_dict = land_df.to_dict('index') + grid_dict = grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index') + land_dict = grid_land_df[grid_land_df['type'] == 'land'].to_dict('index') grid_vector_path = os.path.join( inter_dir, 'val_grid_points%s.shp' % suffix) @@ -1974,7 +1969,8 @@ def _read_csv_wind_data(wind_data_path, hub_height): """ wind_point_df = utils.read_csv_to_dataframe( - wind_data_path, convert_cols_to_lower=False, convert_vals_to_lower=False) + wind_data_path, MODEL_SPEC['args']['wind_data_path']) + wind_point_df.columns = wind_point_df.columns.str.upper() # Calculate scale value at new hub height given reference values. # See equation 3 in users guide @@ -2631,10 +2627,12 @@ def _clip_vector_by_vector( shutil.rmtree(temp_dir, ignore_errors=True) if empty_clip: + # The "clip_vector_path" is always the AOI. raise ValueError( f"Clipping {base_vector_path} by {clip_vector_path} returned 0" - " features. If an AOI was provided this could mean the AOI and" - " Wind Data do not intersect spatially.") + f" features. This means the AOI and {base_vector_path} do not" + " intersect spatially. Please check that the AOI has spatial" + " overlap with all input data.") LOGGER.info('Finished _clip_vector_by_vector') diff --git a/tests/test_coastal_blue_carbon.py b/tests/test_coastal_blue_carbon.py index 9b42771ac2..3bec143036 100644 --- a/tests/test_coastal_blue_carbon.py +++ b/tests/test_coastal_blue_carbon.py @@ -10,6 +10,7 @@ import unittest import numpy +import pandas import pygeoprocessing from natcap.invest import utils from osgeo import gdal @@ -151,10 +152,9 @@ def test_sample_data(self): pprint.pformat(non_suffixed_files))) expected_landcover_codes = set(range(0, 24)) - found_landcover_codes = set(utils.read_csv_to_dataframe( - os.path.join(outputs_dir, - 'carbon_biophysical_table_template_150225.csv'), - 'code').to_dict(orient='index').keys()) + found_landcover_codes = set(pandas.read_csv( + os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv') + )['code'].values) self.assertEqual(expected_landcover_codes, found_landcover_codes) def test_transition_table(self): @@ -188,25 +188,27 @@ def test_transition_table(self): lulc_csv.write('0,mangrove,True\n') lulc_csv.write('1,parking lot,False\n') - landcover_table = utils.read_csv_to_dataframe( - landcover_table_path, 'code').to_dict(orient='index') + landcover_df = utils.read_csv_to_dataframe( + landcover_table_path, + preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path']) target_table_path = os.path.join(self.workspace_dir, 'transition_table.csv') # Remove landcover code 1 from the table; expect error. - del landcover_table[1] + landcover_df = landcover_df.drop(1) with self.assertRaises(ValueError) as context: preprocessor._create_transition_table( - landcover_table, [filename_a, filename_b], target_table_path) + landcover_df, [filename_a, filename_b], target_table_path) self.assertIn('missing a row with the landuse code 1', str(context.exception)) # Re-load the landcover table - landcover_table = utils.read_csv_to_dataframe( - landcover_table_path, 'code').to_dict(orient='index') + landcover_df = utils.read_csv_to_dataframe( + landcover_table_path, + preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path']) preprocessor._create_transition_table( - landcover_table, [filename_a, filename_b], target_table_path) + landcover_df, [filename_a, filename_b], target_table_path) with open(target_table_path) as transition_table: self.assertEqual( @@ -235,46 +237,13 @@ def tearDown(self): """Remove workspace after each test function.""" shutil.rmtree(self.workspace_dir) - def test_extract_snapshots(self): - """CBC: Extract snapshots from a snapshot CSV.""" - from natcap.invest.coastal_blue_carbon import coastal_blue_carbon - csv_path = os.path.join(self.workspace_dir, 'snapshots.csv') - - transition_years = (2000, 2010, 2020) - transition_rasters = [] - with open(csv_path, 'w') as transitions_csv: - # Check that we can interpret varying case. - transitions_csv.write('snapshot_YEAR,raster_PATH\n') - for transition_year in transition_years: - # Write absolute paths. - transition_file_path = os.path.join( - self.workspace_dir, f'{transition_year}.tif)') - transition_rasters.append(transition_file_path) - transitions_csv.write( - f'{transition_year},{transition_file_path}\n') - - # Make one path relative to the workspace, where the transitions - # CSV also lives. - # The expected raster path is absolute. - transitions_csv.write('2030,some_path.tif\n') - transition_years += (2030,) - transition_rasters.append(os.path.join(self.workspace_dir, - 'some_path.tif')) - - extracted_transitions = ( - coastal_blue_carbon._extract_snapshots_from_table(csv_path)) - - self.assertEqual( - extracted_transitions, - dict(zip(transition_years, transition_rasters))) - def test_read_invalid_transition_matrix(self): """CBC: Test exceptions in invalid transition structure.""" # The full biophysical table will have much, much more information. To # keep the test simple, I'm only tracking the columns I know I'll need # in this function. from natcap.invest.coastal_blue_carbon import coastal_blue_carbon - biophysical_table = { + biophysical_table = pandas.DataFrame({ 1: {'lulc-class': 'a', 'soil-yearly-accumulation': 2, 'biomass-yearly-accumulation': 3, @@ -290,7 +259,7 @@ def test_read_invalid_transition_matrix(self): 'biomass-yearly-accumulation': 11, 'soil-high-impact-disturb': 12, 'biomass-high-impact-disturb': 13} - } + }).T transition_csv_path = os.path.join(self.workspace_dir, 'transitions.csv') @@ -332,7 +301,7 @@ def test_read_transition_matrix(self): # keep the test simple, I'm only tracking the columns I know I'll need # in this function. from natcap.invest.coastal_blue_carbon import coastal_blue_carbon - biophysical_table = { + biophysical_table = pandas.DataFrame({ 1: {'lulc-class': 'a', 'soil-yearly-accumulation': 2, 'biomass-yearly-accumulation': 3, @@ -348,7 +317,7 @@ def test_read_transition_matrix(self): 'biomass-yearly-accumulation': 11, 'soil-high-impact-disturb': 12, 'biomass-high-impact-disturb': 13} - } + }).T transition_csv_path = os.path.join(self.workspace_dir, 'transitions.csv') @@ -366,14 +335,14 @@ def test_read_transition_matrix(self): expected_biomass_disturbance = numpy.zeros((4, 4), dtype=numpy.float32) expected_biomass_disturbance[1, 3] = ( - biophysical_table[1]['biomass-high-impact-disturb']) + biophysical_table['biomass-high-impact-disturb'][1]) numpy.testing.assert_allclose( expected_biomass_disturbance, disturbance_matrices['biomass'].toarray()) expected_soil_disturbance = numpy.zeros((4, 4), dtype=numpy.float32) expected_soil_disturbance[1, 3] = ( - biophysical_table[1]['soil-high-impact-disturb']) + biophysical_table['soil-high-impact-disturb'][1]) numpy.testing.assert_allclose( expected_soil_disturbance, disturbance_matrices['soil'].toarray()) @@ -381,22 +350,22 @@ def test_read_transition_matrix(self): expected_biomass_accumulation = numpy.zeros( (4, 4), dtype=numpy.float32) expected_biomass_accumulation[3, 1] = ( - biophysical_table[1]['biomass-yearly-accumulation']) + biophysical_table['biomass-yearly-accumulation'][1]) expected_biomass_accumulation[1, 2] = ( - biophysical_table[2]['biomass-yearly-accumulation']) + biophysical_table['biomass-yearly-accumulation'][2]) expected_biomass_accumulation[2, 3] = ( - biophysical_table[3]['biomass-yearly-accumulation']) + biophysical_table['biomass-yearly-accumulation'][3]) numpy.testing.assert_allclose( expected_biomass_accumulation, accumulation_matrices['biomass'].toarray()) expected_soil_accumulation = numpy.zeros((4, 4), dtype=numpy.float32) expected_soil_accumulation[3, 1] = ( - biophysical_table[1]['soil-yearly-accumulation']) + biophysical_table['soil-yearly-accumulation'][1]) expected_soil_accumulation[1, 2] = ( - biophysical_table[2]['soil-yearly-accumulation']) + biophysical_table['soil-yearly-accumulation'][2]) expected_soil_accumulation[2, 3] = ( - biophysical_table[3]['soil-yearly-accumulation']) + biophysical_table['soil-yearly-accumulation'][3]) numpy.testing.assert_allclose( expected_soil_accumulation, accumulation_matrices['soil'].toarray()) @@ -649,8 +618,10 @@ def test_model_one_transition_no_analysis_year(self): args = TestCBC2._create_model_args(self.workspace_dir) args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace') - prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table( - args['landcover_snapshot_csv']) + prior_snapshots = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() baseline_year = min(prior_snapshots.keys()) baseline_raster = prior_snapshots[baseline_year] with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv: @@ -825,8 +796,10 @@ def test_model_no_transitions(self): args = TestCBC2._create_model_args(self.workspace_dir) args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace') - prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table( - args['landcover_snapshot_csv']) + prior_snapshots = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() baseline_year = min(prior_snapshots.keys()) baseline_raster = prior_snapshots[baseline_year] with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv: @@ -889,8 +862,10 @@ def test_validation(self): raster.write('not a raster') # Write over the landcover snapshot CSV - prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table( - args['landcover_snapshot_csv']) + prior_snapshots = utils.read_csv_to_dataframe( + args['landcover_snapshot_csv'], + coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv'] + )['raster_path'].to_dict() baseline_year = min(prior_snapshots) with open(args['landcover_snapshot_csv'], 'w') as snapshot_table: snapshot_table.write('snapshot_year,raster_path\n') @@ -902,8 +877,18 @@ def test_validation(self): # analysis year must be >= the last transition year. args['analysis_year'] = baseline_year + # Write invalid entries to landcover transition table + with open(args['landcover_transitions_table'], 'w') as transition_table: + transition_table.write('lulc-class,Developed,Forest,Water\n') + transition_table.write('Developed,NCC,,invalid\n') + transition_table.write('Forest,accum,disturb,low-impact-disturb\n') + transition_table.write('Water,disturb,med-impact-disturb,high-impact-disturb\n') + transition_options = [ + 'accum', 'high-impact-disturb', 'med-impact-disturb', + 'low-impact-disturb', 'ncc'] + validation_warnings = coastal_blue_carbon.validate(args) - self.assertEqual(len(validation_warnings), 2) + self.assertEqual(len(validation_warnings), 3) self.assertIn( coastal_blue_carbon.INVALID_SNAPSHOT_RASTER_MSG.format( snapshot_year=baseline_year + 10), @@ -912,6 +897,11 @@ def test_validation(self): coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format( analysis_year=2000, latest_year=2010), validation_warnings[1][1]) + self.assertIn( + coastal_blue_carbon.INVALID_TRANSITION_VALUES_MSG.format( + model_transitions=transition_options, + transition_values=['disturb', 'invalid']), + validation_warnings[2][1]) def test_track_first_disturbance(self): """CBC: Track disturbances over time.""" diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index 0817de02ed..e754dd1b0f 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -63,14 +63,11 @@ def test_crop_production_percentile(self): pandas.testing.assert_frame_equal( expected_agg_result_table, agg_result_table, check_dtype=False) - result_table_path = os.path.join( - args['workspace_dir'], 'result_table.csv') - expected_result_table_path = os.path.join( - TEST_DATA_PATH, 'expected_result_table.csv') expected_result_table = pandas.read_csv( - expected_result_table_path) + os.path.join(TEST_DATA_PATH, 'expected_result_table.csv') + ) result_table = pandas.read_csv( - result_table_path) + os.path.join(args['workspace_dir'], 'result_table.csv')) pandas.testing.assert_frame_equal( expected_result_table, result_table, check_dtype=False) @@ -314,14 +311,10 @@ def test_crop_production_regression(self): crop_production_regression.execute(args) - agg_result_table_path = os.path.join( - args['workspace_dir'], 'aggregate_results.csv') - expected_agg_result_table_path = os.path.join( - TEST_DATA_PATH, 'expected_regression_aggregate_results.csv') expected_agg_result_table = pandas.read_csv( - expected_agg_result_table_path) + os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv')) agg_result_table = pandas.read_csv( - agg_result_table_path) + os.path.join(args['workspace_dir'], 'aggregate_results.csv')) pandas.testing.assert_frame_equal( expected_agg_result_table, agg_result_table, check_dtype=False) @@ -387,14 +380,10 @@ def test_crop_production_regression_no_nodata(self): crop_production_regression.execute(args) - result_table_path = os.path.join( - args['workspace_dir'], 'result_table.csv') - expected_result_table_path = os.path.join( - TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv') - expected_result_table = pandas.read_csv( - expected_result_table_path) + expected_result_table = pandas.read_csv(os.path.join( + TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv')) result_table = pandas.read_csv( - result_table_path) + os.path.join(args['workspace_dir'], 'result_table.csv')) pandas.testing.assert_frame_equal( expected_result_table, result_table, check_dtype=False) diff --git a/tests/test_datastack.py b/tests/test_datastack.py index dbf30ec566..21ce2f6841 100644 --- a/tests/test_datastack.py +++ b/tests/test_datastack.py @@ -378,8 +378,14 @@ def test_archive_extraction(self): filecmp.cmp(archive_params[key], params[key], shallow=False)) spatial_csv_dict = utils.read_csv_to_dataframe( - archive_params['spatial_table'], 'ID', - convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index') + archive_params['spatial_table'], + { + 'index_col': 'id', + 'columns': { + 'id': {'type': 'integer'}, + 'path': {'type': 'file'} + } + }).to_dict(orient='index') spatial_csv_dir = os.path.dirname(archive_params['spatial_table']) numpy.testing.assert_allclose( pygeoprocessing.raster_to_numpy_array( diff --git a/tests/test_habitat_quality.py b/tests/test_habitat_quality.py index 2c4d9a4d6d..fc9ceab06d 100644 --- a/tests/test_habitat_quality.py +++ b/tests/test_habitat_quality.py @@ -1934,7 +1934,7 @@ def test_habitat_quality_argspec_spatial_overlap(self): self.assertTrue( validate_result, "expected failed validations instead didn't get any.") - self.assertIn("Bounding boxes do not intersect", validate_result[0][1]) + self.assertIn("bounding boxes must intersect", validate_result[0][1]) def test_habitat_quality_argspec_missing_projection(self): """Habitat Quality: raise error on missing projection.""" diff --git a/tests/test_hra.py b/tests/test_hra.py index 07b6b15c6c..0999810ea4 100644 --- a/tests/test_hra.py +++ b/tests/test_hra.py @@ -514,40 +514,6 @@ def test_polygonize_mask(self): (source_array != nodata).astype(numpy.uint8) ) - def test_create_raster_from_bounding_box(self): - """HRA: test creation of a raster from a bbox.""" - from natcap.invest import hra - - # [minx, miny, maxx, maxy] - bounding_box = [ - ORIGIN[0], - ORIGIN[1] - 100, # force rounding up of pixel dimensions - ORIGIN[0] + 90, # no rounding up needed - ORIGIN[1], - ] - pixel_size = (30, -30) - target_raster_path = os.path.join(self.workspace_dir, 'raster.tif') - hra._create_raster_from_bounding_box( - target_raster_path, bounding_box, pixel_size, gdal.GDT_Byte, - SRS_WKT, target_nodata=2, fill_value=2) - - try: - raster = gdal.OpenEx(target_raster_path) - band = raster.GetRasterBand(1) - self.assertEqual( - raster.GetGeoTransform(), - (ORIGIN[0], pixel_size[0], 0.0, ORIGIN[1], 0.0, pixel_size[1]) - ) - self.assertEqual(raster.RasterXSize, 3) - self.assertEqual(raster.RasterYSize, 4) - self.assertEqual(band.GetNoDataValue(), 2) - numpy.testing.assert_array_equal( - band.ReadAsArray(), - numpy.full((4, 3), 2, dtype=numpy.uint8)) - finally: - band = None - raster = None - def test_align(self): """HRA: test alignment function.""" from natcap.invest import hra @@ -749,21 +715,22 @@ def test_table_format_loading(self): # No matter the supported file format, make sure we have consistent # table headings. - source_df = pandas.read_csv(io.StringIO(textwrap.dedent("""\ - FOO,bar,BaZ,path - 1, 2, 3,foo.tif"""))) + source_df = pandas.DataFrame({ + 'name': pandas.Series(['1'], dtype='string'), + 'type': pandas.Series(['2'], dtype='string'), + 'stressor buffer (meters)': pandas.Series([3], dtype=float), + 'path': pandas.Series(['foo.tif'], dtype='string') + }) - expected_df = source_df.copy() # defaults to a deepcopy. - expected_df.columns = expected_df.columns.str.lower() - expected_df['path'] = [os.path.join(self.workspace_dir, 'foo.tif')] + expected_df = source_df.copy().set_index('name') # defaults to a deepcopy. + expected_df['path']['1'] = os.path.join(self.workspace_dir, 'foo.tif') for filename, func in [('target.csv', source_df.to_csv), ('target.xlsx', source_df.to_excel)]: full_filepath = os.path.join(self.workspace_dir, filename) func(full_filepath, index=False) - opened_df = hra._open_table_as_dataframe(full_filepath) - pandas.testing.assert_frame_equal(expected_df, opened_df) + pandas.testing.assert_frame_equal(expected_df, opened_df, check_index_type=False) def test_pairwise_risk(self): """HRA: check pairwise risk calculations.""" diff --git a/tests/test_model_specs.py b/tests/test_model_specs.py index 443e8a82fc..59682629a3 100644 --- a/tests/test_model_specs.py +++ b/tests/test_model_specs.py @@ -173,7 +173,11 @@ def validate_output(self, spec, key, parent_type=None): spec['columns'][column], f'{key}.columns.{column}', parent_type=t) + if 'index_col' in spec: + self.assertIn(spec['index_col'], spec['columns']) + attrs.discard('columns') + attrs.discard('index_col') elif t == 'directory': # directory type should have a contents property that maps each @@ -249,6 +253,7 @@ def validate_args(self, arg, name, parent_type=None): types = arg['type'] if isinstance( arg['type'], set) else [arg['type']] attrs = set(arg.keys()) + for t in types: self.assertIn(t, valid_nested_types[parent_type]) @@ -395,8 +400,12 @@ def validate_args(self, arg, name, parent_type=None): f'{name}.{direction}.{header}', parent_type=t) - attrs.discard('rows') - attrs.discard('columns') + if 'index_col' in arg: + self.assertIn(arg['index_col'], arg['columns']) + attrs.discard('index_col') + + attrs.discard('rows') + attrs.discard('columns') # csv type may optionally have an 'excel_ok' attribute if 'excel_ok' in arg: diff --git a/tests/test_ndr.py b/tests/test_ndr.py index a30498eeaa..6bc4792913 100644 --- a/tests/test_ndr.py +++ b/tests/test_ndr.py @@ -107,7 +107,6 @@ def test_missing_headers(self): # use predefined directory so test can clean up files during teardown args = NDRTests.generate_base_args(self.workspace_dir) - # make args explicit that this is a base run of SWY args['biophysical_table_path'] = os.path.join( REGRESSION_DATA, 'input', 'biophysical_table_missing_headers.csv') with self.assertRaises(ValueError): @@ -171,7 +170,6 @@ def test_missing_lucode(self): # use predefined directory so test can clean up files during teardown args = NDRTests.generate_base_args(self.workspace_dir) - # make args explicit that this is a base run of SWY args['biophysical_table_path'] = os.path.join( REGRESSION_DATA, 'input', 'biophysical_table_missing_lucode.csv') with self.assertRaises(KeyError) as cm: @@ -187,7 +185,6 @@ def test_no_nutrient_selected(self): # use predefined directory so test can clean up files during teardown args = NDRTests.generate_base_args(self.workspace_dir) - # make args explicit that this is a base run of SWY args['calc_n'] = False args['calc_p'] = False with self.assertRaises(ValueError): @@ -210,8 +207,6 @@ def test_base_regression(self): os.path.join(self.workspace_dir, 'watershed_results_ndr.gpkg'), 'wb') as f: f.write(b'') - - # make args explicit that this is a base run of SWY ndr.execute(args) result_vector = ogr.Open(os.path.join( @@ -248,6 +243,53 @@ def test_base_regression(self): args['workspace_dir'], 'intermediate_outputs', 'what_drains_to_stream.tif'))) + def test_regression_undefined_nodata(self): + """NDR test when DEM, LULC and runoff proxy have undefined nodata.""" + from natcap.invest.ndr import ndr + + # use predefined directory so test can clean up files during teardown + args = NDRTests.generate_base_args(self.workspace_dir) + + # unset nodata values for DEM, LULC, and runoff proxy + # this is ok because the test data is 100% valid + # regression test for https://github.com/natcap/invest/issues/1005 + for key in ['runoff_proxy_path', 'dem_path', 'lulc_path']: + target_path = os.path.join(self.workspace_dir, f'{key}_no_nodata.tif') + source = gdal.OpenEx(args[key], gdal.OF_RASTER) + driver = gdal.GetDriverByName('GTIFF') + target = driver.CreateCopy(target_path, source) + target.GetRasterBand(1).DeleteNoDataValue() + source, target = None, None + args[key] = target_path + + ndr.execute(args) + + result_vector = ogr.Open(os.path.join( + args['workspace_dir'], 'watershed_results_ndr.gpkg')) + result_layer = result_vector.GetLayer() + result_feature = result_layer.GetFeature(1) + result_layer = None + result_vector = None + mismatch_list = [] + # these values were generated by manual inspection of regression + # results + for field, expected_value in [ + ('p_surface_load', 41.921860), + ('p_surface_export', 5.899117), + ('n_surface_load', 2978.519775), + ('n_surface_export', 289.0498), + ('n_subsurface_load', 28.614094), + ('n_subsurface_export', 15.61077), + ('n_total_export', 304.660614)]: + val = result_feature.GetField(field) + if not numpy.isclose(val, expected_value): + mismatch_list.append( + (field, 'expected: %f' % expected_value, + 'actual: %f' % val)) + result_feature = None + if mismatch_list: + raise RuntimeError("results not expected: %s" % mismatch_list) + def test_validation(self): """NDR test argument validation.""" from natcap.invest import validation diff --git a/tests/test_recreation.py b/tests/test_recreation.py index 1d6d62e486..2878587f4f 100644 --- a/tests/test_recreation.py +++ b/tests/test_recreation.py @@ -581,14 +581,14 @@ def test_all_metrics_local_server(self): expected_grid_vector_path = os.path.join( REGRESSION_DATA, 'predictor_data_all_metrics.shp') utils._assert_vectors_equal( - out_grid_vector_path, expected_grid_vector_path, 1e-3) + expected_grid_vector_path, out_grid_vector_path, 1e-3) out_scenario_path = os.path.join( args['workspace_dir'], 'scenario_results.shp') expected_scenario_path = os.path.join( REGRESSION_DATA, 'scenario_results_all_metrics.shp') utils._assert_vectors_equal( - out_scenario_path, expected_scenario_path, 1e-3) + expected_scenario_path, out_scenario_path, 1e-3) def test_results_suffix_on_serverside_files(self): """Recreation test suffix gets added to files created on server.""" @@ -924,7 +924,7 @@ def test_square_grid(self): REGRESSION_DATA, 'square_grid_vector_path.shp') utils._assert_vectors_equal( - out_grid_vector_path, expected_grid_vector_path) + expected_grid_vector_path, out_grid_vector_path) def test_hex_grid(self): """Recreation hex grid regression test.""" @@ -941,7 +941,7 @@ def test_hex_grid(self): REGRESSION_DATA, 'hex_grid_vector_path.shp') utils._assert_vectors_equal( - out_grid_vector_path, expected_grid_vector_path) + expected_grid_vector_path, out_grid_vector_path) @unittest.skip("skipping to avoid remote server call (issue #3753)") def test_no_grid_execute(self): @@ -1007,7 +1007,7 @@ def test_existing_output_shapefiles(self): REGRESSION_DATA, 'hex_grid_vector_path.shp') utils._assert_vectors_equal( - out_grid_vector_path, expected_grid_vector_path) + expected_grid_vector_path, out_grid_vector_path) def test_existing_regression_coef(self): """Recreation test regression coefficients handle existing output.""" @@ -1030,8 +1030,9 @@ def test_existing_regression_coef(self): # make outputs to be overwritten predictor_dict = utils.read_csv_to_dataframe( - predictor_table_path, 'id', - convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index') + predictor_table_path, + recmodel_client.MODEL_SPEC['args']['predictor_table_path'] + ).to_dict(orient='index') predictor_list = predictor_dict.keys() tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir) empty_json_list = [ @@ -1057,7 +1058,7 @@ def test_existing_regression_coef(self): REGRESSION_DATA, 'test_regression_coefficients.shp') utils._assert_vectors_equal( - out_coefficient_vector_path, expected_coeff_vector_path, 1e-6) + expected_coeff_vector_path, out_coefficient_vector_path, 1e-6) def test_predictor_table_absolute_paths(self): """Recreation test validation from full path.""" diff --git a/tests/test_scenic_quality.py b/tests/test_scenic_quality.py index 22cc270adf..dd2b912e0f 100644 --- a/tests/test_scenic_quality.py +++ b/tests/test_scenic_quality.py @@ -126,7 +126,8 @@ def test_exception_when_invalid_geometry_type(self): with self.assertRaises(AssertionError) as cm: scenic_quality._determine_valid_viewpoints( dem_path, viewpoints_path) - self.assertIn('Feature 1 is not a Point geometry', str(cm.exception)) + self.assertIn('Feature 1 must be a POINT geometry, not LINESTRING', + str(cm.exception)) def test_exception_when_no_structures_aoi_overlap(self): """SQ: model raises exception when AOI does not overlap structures.""" diff --git a/tests/test_sdr.py b/tests/test_sdr.py index 1c710c3225..bbbec57290 100644 --- a/tests/test_sdr.py +++ b/tests/test_sdr.py @@ -141,11 +141,11 @@ def test_base_regression(self): sdr.execute(args) expected_results = { - 'usle_tot': 13.90210914612, - 'sed_export': 0.55185163021, - 'sed_dep': 8.80130577087, - 'avoid_exp': 57971.87890625, - 'avoid_eros': 1458232.5, + 'usle_tot': 2.62457418442, + 'sed_export': 0.09748090804, + 'sed_dep': 1.71672844887, + 'avoid_exp': 10199.7490234375, + 'avoid_eros': 274510.75, } vector_path = os.path.join( @@ -213,10 +213,10 @@ def test_regression_with_undefined_nodata(self): sdr.execute(args) expected_results = { - 'sed_export': 0.55185163021, - 'usle_tot': 13.90210914612, - 'avoid_exp': 57971.87890625, - 'avoid_eros': 1458232.5, + 'sed_export': 0.09748090804, + 'usle_tot': 2.62457418442, + 'avoid_exp': 10199.7490234375, + 'avoid_eros': 274510.75, } vector_path = os.path.join( @@ -238,10 +238,10 @@ def test_non_square_dem(self): sdr.execute(args) expected_results = { - 'sed_export': 0.67064666748, - 'usle_tot': 12.6965303421, - 'avoid_exp': 69130.8203125, - 'avoid_eros': 1317588.375, + 'sed_export': 0.08896198869, + 'usle_tot': 1.86480903625, + 'avoid_exp': 9204.283203125, + 'avoid_eros': 194613.28125, } vector_path = os.path.join( @@ -264,10 +264,10 @@ def test_drainage_regression(self): sdr.execute(args) expected_results = { - 'sed_export': 0.97192692757, - 'usle_tot': 12.68887424469, - 'avoid_exp': 100960.9609375, - 'avoid_eros': 1329122.0, + 'sed_export': 0.17336219549, + 'usle_tot': 2.56186032295, + 'avoid_exp': 17980.52734375, + 'avoid_eros': 267931.71875, } vector_path = os.path.join( @@ -303,8 +303,7 @@ def test_base_usle_p_nan(self): with self.assertRaises(ValueError) as context: sdr.execute(args) self.assertIn( - f'A value in the biophysical table is not a number ' - f'within range 0..1.', str(context.exception)) + f'could not be interpreted as ratios', str(context.exception)) def test_lucode_not_a_number(self): """SDR test expected exception for invalid data in lucode column.""" @@ -325,8 +324,7 @@ def test_lucode_not_a_number(self): with self.assertRaises(ValueError) as context: sdr.execute(args) self.assertIn( - f'Value "{invalid_value}" from the "lucode" column of the ' - f'biophysical table is not a number.', str(context.exception)) + 'could not be interpreted as integers', str(context.exception)) def test_missing_lulc_value(self): """SDR test for ValueError when LULC value not found in table.""" @@ -393,3 +391,43 @@ def test_what_drains_to_stream(self): what_drains = pygeoprocessing.raster_to_numpy_array( target_what_drains_path) numpy.testing.assert_allclose(what_drains, expected_drainage) + + def test_ls_factor(self): + """SDR test for our LS Factor function.""" + from natcap.invest.sdr import sdr + + nodata = -1 + + # These varying percent slope values should cover all of the slope + # factor and slope table cases. + pct_slope_array = numpy.array( + [[1.5, 4, 8, 10, 15, nodata]], dtype=numpy.float32) + flow_accum_array = numpy.array( + [[100, 100, 100, 100, 10000000, nodata]], dtype=numpy.float32) + l_max = 25 # affects the last item in the array only + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # NAD83 / UTM zone 11N + srs_wkt = srs.ExportToWkt() + origin = (463250, 4929700) + pixel_size = (30, -30) + + pct_slope_path = os.path.join(self.workspace_dir, 'pct_slope.tif') + pygeoprocessing.numpy_array_to_raster( + pct_slope_array, nodata, pixel_size, origin, srs_wkt, + pct_slope_path) + + flow_accum_path = os.path.join(self.workspace_dir, 'flow_accum.tif') + pygeoprocessing.numpy_array_to_raster( + flow_accum_array, nodata, pixel_size, origin, srs_wkt, + flow_accum_path) + + target_ls_factor_path = os.path.join(self.workspace_dir, 'ls.tif') + sdr._calculate_ls_factor(flow_accum_path, pct_slope_path, l_max, + target_ls_factor_path) + + ls = pygeoprocessing.raster_to_numpy_array(target_ls_factor_path) + expected_ls = numpy.array( + [[0.253996, 0.657229, 1.345856, 1.776729, 49.802994, -1]], + dtype=numpy.float32) + numpy.testing.assert_allclose(ls, expected_ls, rtol=1e-6) diff --git a/tests/test_seasonal_water_yield_regression.py b/tests/test_seasonal_water_yield_regression.py index a967463f9b..e7cb6ae4cd 100644 --- a/tests/test_seasonal_water_yield_regression.py +++ b/tests/test_seasonal_water_yield_regression.py @@ -737,8 +737,8 @@ def test_bad_biophysical_table(self): with self.assertRaises(ValueError) as context: seasonal_water_yield.execute(args) - self.assertTrue( - 'expecting all floating point numbers' in str(context.exception)) + self.assertIn( + 'could not be interpreted as numbers', str(context.exception)) def test_monthly_alpha_regression(self): """SWY monthly alpha values regression test on sample data. @@ -974,12 +974,6 @@ def test_monthly_quickflow_undefined_nodata(self): precip_array = numpy.array([ [10, 10], [10, 10]], dtype=numpy.float32) - lulc_array = numpy.array([ - [1, 1], - [2, 2]], dtype=numpy.float32) - cn_array = numpy.array([ - [40, 40], - [80, 80]], dtype=numpy.float32) si_array = numpy.array([ [15, 15], [2.5, 2.5]], dtype=numpy.float32) @@ -990,13 +984,12 @@ def test_monthly_quickflow_undefined_nodata(self): [0, 0], [0, 0]], dtype=numpy.float32) + # results calculated by wolfram alpha expected_quickflow_array = numpy.array([ - [-4.82284552e-36, -4.82284552e-36], - [ 6.19275831e-01, 6.19275831e-01]]) + [0, 0], + [0.61928378, 0.61928378]]) precip_path = os.path.join(self.workspace_dir, 'precip.tif') - lulc_path = os.path.join(self.workspace_dir, 'lulc.tif') - cn_path = os.path.join(self.workspace_dir, 'cn.tif') si_path = os.path.join(self.workspace_dir, 'si.tif') n_events_path = os.path.join(self.workspace_dir, 'n_events.tif') stream_path = os.path.join(self.workspace_dir, 'stream.tif') @@ -1008,13 +1001,11 @@ def test_monthly_quickflow_undefined_nodata(self): # write all the test arrays to raster files for array, path in [(precip_array, precip_path), - (lulc_array, lulc_path), (n_events_array, n_events_path)]: # make the nodata value undefined for user inputs pygeoprocessing.numpy_array_to_raster( array, None, (1, -1), (1180000, 690000), project_wkt, path) - for array, path in [(cn_array, cn_path), - (si_array, si_path), + for array, path in [(si_array, si_path), (stream_mask, stream_path)]: # define a nodata value for intermediate outputs pygeoprocessing.numpy_array_to_raster( @@ -1022,13 +1013,119 @@ def test_monthly_quickflow_undefined_nodata(self): # save the quickflow results raster to quickflow.tif seasonal_water_yield._calculate_monthly_quick_flow( - precip_path, lulc_path, cn_path, n_events_path, stream_path, - si_path, output_path) + precip_path, n_events_path, stream_path, si_path, output_path) # read the raster output back in to a numpy array quickflow_array = pygeoprocessing.raster_to_numpy_array(output_path) # assert each element is close to the expected value - self.assertTrue(numpy.isclose( - quickflow_array, expected_quickflow_array).all()) + numpy.testing.assert_allclose( + quickflow_array, expected_quickflow_array, atol=1e-5) + + def test_monthly_quickflow_si_zero(self): + """Test `_calculate_monthly_quick_flow` when s_i is zero""" + from natcap.invest.seasonal_water_yield import seasonal_water_yield + + # QF should be equal to P when s_i is 0 + precip_array = numpy.array([[10.5]], dtype=numpy.float32) + si_array = numpy.array([[0]], dtype=numpy.float32) + n_events_array = numpy.array([[10]], dtype=numpy.float32) + stream_mask = numpy.array([[0]], dtype=numpy.float32) + expected_quickflow_array = numpy.array([[10.5]]) + + precip_path = os.path.join(self.workspace_dir, 'precip.tif') + si_path = os.path.join(self.workspace_dir, 'si.tif') + n_events_path = os.path.join(self.workspace_dir, 'n_events.tif') + stream_path = os.path.join(self.workspace_dir, 'stream.tif') + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # UTM Zone 10N + project_wkt = srs.ExportToWkt() + output_path = os.path.join(self.workspace_dir, 'quickflow.tif') + + # write all the test arrays to raster files + for array, path in [(precip_array, precip_path), + (n_events_array, n_events_path), + (si_array, si_path), + (stream_mask, stream_path)]: + # define a nodata value for intermediate outputs + pygeoprocessing.numpy_array_to_raster( + array, -1, (1, -1), (1180000, 690000), project_wkt, path) + seasonal_water_yield._calculate_monthly_quick_flow( + precip_path, n_events_path, stream_path, si_path, output_path) + numpy.testing.assert_allclose( + pygeoprocessing.raster_to_numpy_array(output_path), + expected_quickflow_array, atol=1e-5) + + def test_monthly_quickflow_large_si_aim_ratio(self): + """Test `_calculate_monthly_quick_flow` with large s_i/a_im ratio""" + from natcap.invest.seasonal_water_yield import seasonal_water_yield + + # with these values, the QF equation would overflow float32 if + # we didn't catch it early + precip_array = numpy.array([[6]], dtype=numpy.float32) + si_array = numpy.array([[23.33]], dtype=numpy.float32) + n_events_array = numpy.array([[10]], dtype=numpy.float32) + stream_mask = numpy.array([[0]], dtype=numpy.float32) + expected_quickflow_array = numpy.array([[0]]) + + precip_path = os.path.join(self.workspace_dir, 'precip.tif') + si_path = os.path.join(self.workspace_dir, 'si.tif') + n_events_path = os.path.join(self.workspace_dir, 'n_events.tif') + stream_path = os.path.join(self.workspace_dir, 'stream.tif') + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # UTM Zone 10N + project_wkt = srs.ExportToWkt() + output_path = os.path.join(self.workspace_dir, 'quickflow.tif') + + # write all the test arrays to raster files + for array, path in [(precip_array, precip_path), + (n_events_array, n_events_path), + (si_array, si_path), + (stream_mask, stream_path)]: + # define a nodata value for intermediate outputs + pygeoprocessing.numpy_array_to_raster( + array, -1, (1, -1), (1180000, 690000), project_wkt, path) + seasonal_water_yield._calculate_monthly_quick_flow( + precip_path, n_events_path, stream_path, si_path, output_path) + numpy.testing.assert_allclose( + pygeoprocessing.raster_to_numpy_array(output_path), + expected_quickflow_array, atol=1e-5) + + def test_monthly_quickflow_negative_values_set_to_zero(self): + """Test `_calculate_monthly_quick_flow` with negative QF result""" + from natcap.invest.seasonal_water_yield import seasonal_water_yield + + # with these values, the QF equation evaluates to a small negative + # number. assert that it is set to zero + precip_array = numpy.array([[30]], dtype=numpy.float32) + si_array = numpy.array([[10]], dtype=numpy.float32) + n_events_array = numpy.array([[10]], dtype=numpy.float32) + stream_mask = numpy.array([[0]], dtype=numpy.float32) + expected_quickflow_array = numpy.array([[0]]) + + precip_path = os.path.join(self.workspace_dir, 'precip.tif') + si_path = os.path.join(self.workspace_dir, 'si.tif') + n_events_path = os.path.join(self.workspace_dir, 'n_events.tif') + stream_path = os.path.join(self.workspace_dir, 'stream.tif') + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # UTM Zone 10N + project_wkt = srs.ExportToWkt() + output_path = os.path.join(self.workspace_dir, 'quickflow.tif') + + # write all the test arrays to raster files + for array, path in [(precip_array, precip_path), + (n_events_array, n_events_path), + (si_array, si_path), + (stream_mask, stream_path)]: + # define a nodata value for intermediate outputs + pygeoprocessing.numpy_array_to_raster( + array, -1, (1, -1), (1180000, 690000), project_wkt, path) + seasonal_water_yield._calculate_monthly_quick_flow( + precip_path, n_events_path, stream_path, si_path, output_path) + numpy.testing.assert_allclose( + pygeoprocessing.raster_to_numpy_array(output_path), + expected_quickflow_array, atol=1e-5) def test_calculate_annual_qfi_different_nodata_areas(self): """Test with qf rasters with different areas of nodata.""" @@ -1079,8 +1176,8 @@ def test_local_recharge_undefined_nodata(self): [100, 100], [200, 200]], dtype=numpy.float32) quickflow_array = numpy.array([ - [-4.8e-36, -4.822e-36], - [ 6.1e-01, 6.1e-01]], dtype=numpy.float32) + [0, 0], + [0.61, 0.61]], dtype=numpy.float32) flow_dir_array = numpy.array([ [15, 25], [50, 50]], dtype=numpy.float32) diff --git a/tests/test_ufrm.py b/tests/test_ufrm.py index 243eee3e6e..9369d2843a 100644 --- a/tests/test_ufrm.py +++ b/tests/test_ufrm.py @@ -360,3 +360,17 @@ def test_validate(self): [(['curve_number_table_path'], validation.MESSAGES['MATCHED_NO_HEADERS'].format( header='column', header_name='cn_a'))]) + + # test missing CN_X values raise warnings + args = self._make_args() + cn_table = pandas.read_csv(args['curve_number_table_path']) + cn_table.at[0, 'CN_A'] = numpy.nan + new_cn_path = os.path.join( + self.workspace_dir, 'cn_missing_value_table.csv') + cn_table.to_csv(new_cn_path, index=False) + args['curve_number_table_path'] = new_cn_path + result = urban_flood_risk_mitigation.validate(args) + self.assertEqual( + result, + [(['curve_number_table_path'], + 'Missing curve numbers for lucode(s) [0]')]) diff --git a/tests/test_urban_nature_access.py b/tests/test_urban_nature_access.py index f09fcb5c4b..1a857f6c06 100644 --- a/tests/test_urban_nature_access.py +++ b/tests/test_urban_nature_access.py @@ -85,7 +85,8 @@ def _build_model_args(workspace): 6,0,100 7,1,100 8,0,100 - 9,1,100""")) + 9,1,100 + """)) admin_geom = [ shapely.geometry.box( @@ -342,7 +343,7 @@ def test_urban_nature_balance(self): from natcap.invest import urban_nature_access nodata = urban_nature_access.FLOAT32_NODATA - urban_nature_supply = numpy.array([ + urban_nature_supply_percapita = numpy.array([ [nodata, 100.5], [75, 100]], dtype=numpy.float32) urban_nature_demand = 50 @@ -353,7 +354,7 @@ def test_urban_nature_balance(self): urban_nature_budget = ( urban_nature_access._urban_nature_balance_percapita_op( - urban_nature_supply, urban_nature_demand)) + urban_nature_supply_percapita, urban_nature_demand)) expected_urban_nature_budget = numpy.array([ [nodata, 50.5], [25, 50]], dtype=numpy.float32) @@ -480,6 +481,16 @@ def test_core_model(self): admin_vector = None admin_layer = None + accessible_urban_nature_array = pygeoprocessing.raster_to_numpy_array( + os.path.join(args['workspace_dir'], 'output', + 'accessible_urban_nature_suffix.tif')) + valid_mask = ~utils.array_equals_nodata( + accessible_urban_nature_array, urban_nature_access.FLOAT32_NODATA) + valid_pixels = accessible_urban_nature_array[valid_mask] + self.assertAlmostEqual(numpy.sum(valid_pixels), 6221004.41259766) + self.assertAlmostEqual(numpy.min(valid_pixels), 1171.7352294921875) + self.assertAlmostEqual(numpy.max(valid_pixels), 11898.0712890625) + def test_split_urban_nature(self): from natcap.invest import urban_nature_access @@ -532,6 +543,23 @@ def test_split_urban_nature(self): admin_vector = None admin_layer = None + output_dir = os.path.join(args['workspace_dir'], 'output') + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_lucode_1_suffix.tif'), + 72000.0, 0.0, 900.0) + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_lucode_3_suffix.tif'), + 1034934.9864730835, 0.0, 4431.1650390625) + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_lucode_5_suffix.tif'), + 2837622.9519348145, 0.0, 8136.6884765625) + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_lucode_7_suffix.tif'), + 8112734.805541992, 2019.2935791015625, 17729.431640625) + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_lucode_9_suffix.tif'), + 7744116.974121094, 1567.57958984375, 12863.4619140625) + def test_split_population(self): """UNA: test split population optional module. @@ -602,6 +630,36 @@ def _read_and_sum_raster(path): rtol=1e-6 ) + def _assert_urban_nature(self, path, sum_value, min_value, max_value): + """Compare a raster's sum, min and max to given values. + + The raster is assumed to be an accessible urban nature raster. + + Args: + path (str): The path to an urban nature raster. + sum_value (float): The expected sum of the raster. + min_value (float): The expected min of the raster. + max_value (float): The expected max of the raster. + + Returns: + ``None`` + + Raises: + AssertionError: When the raster's sum, min or max values are not + numerically close to the expected values. + """ + from natcap.invest import urban_nature_access + + accessible_urban_nature_array = ( + pygeoprocessing.raster_to_numpy_array(path)) + valid_mask = ~utils.array_equals_nodata( + accessible_urban_nature_array, + urban_nature_access.FLOAT32_NODATA) + valid_pixels = accessible_urban_nature_array[valid_mask] + self.assertAlmostEqual(numpy.sum(valid_pixels), sum_value) + self.assertAlmostEqual(numpy.min(valid_pixels), min_value) + self.assertAlmostEqual(numpy.max(valid_pixels), max_value) + def test_radii_by_pop_group(self): """UNA: Test defining radii by population group.""" from natcap.invest import urban_nature_access @@ -666,11 +724,19 @@ def test_radii_by_pop_group(self): self.assertAlmostEqual( expected_value, summary_feature.GetField(fieldname)) + output_dir = os.path.join(args['workspace_dir'], 'output') + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_to_pop_male.tif'), + 6221004.412597656, 1171.7352294921875, 11898.0712890625) + self._assert_urban_nature(os.path.join( + output_dir, 'accessible_urban_nature_to_pop_female.tif'), + 6221004.412597656, 1171.7352294921875, 11898.0712890625) + def test_modes_same_radii_same_results(self): """UNA: all modes have same results when consistent radii. Although the different modes have different ways of defining their - search radii, the urban_nature_supply raster should be numerically + search radii, the urban_nature_supply_percapita raster should be numerically equivalent if they all use the same search radii. This is a good gut-check of basic model behavior across modes. @@ -772,16 +838,19 @@ def test_modes_same_radii_same_results(self): uniform_radius_supply = pygeoprocessing.raster_to_numpy_array( os.path.join(uniform_args['workspace_dir'], 'output', - 'urban_nature_supply_uniform.tif')) - split_urban_nature_supply = pygeoprocessing.raster_to_numpy_array( - os.path.join(split_urban_nature_args['workspace_dir'], 'output', - 'urban_nature_supply_urban_nature.tif')) + 'urban_nature_supply_percapita_uniform.tif')) + split_urban_nature_supply_percapita = ( + pygeoprocessing.raster_to_numpy_array( + os.path.join( + split_urban_nature_args['workspace_dir'], 'output', + 'urban_nature_supply_percapita_urban_nature.tif'))) split_pop_groups_supply = pygeoprocessing.raster_to_numpy_array( os.path.join(pop_group_args['workspace_dir'], 'output', - 'urban_nature_supply_popgroup.tif')) + 'urban_nature_supply_percapita_popgroup.tif')) numpy.testing.assert_allclose( - uniform_radius_supply, split_urban_nature_supply, rtol=1e-6) + uniform_radius_supply, split_urban_nature_supply_percapita, + rtol=1e-6) numpy.testing.assert_allclose( uniform_radius_supply, split_pop_groups_supply, rtol=1e-6) @@ -893,9 +962,76 @@ def test_write_vector(self): # TODO pass + def test_urban_nature_proportion(self): + """UNA: Run the model with urban nature proportion.""" + from natcap.invest import urban_nature_access + + args = _build_model_args(self.workspace_dir) + args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM + args['search_radius'] = 1000 + with open(args['lulc_attribute_table'], 'a') as attr_table: + attr_table.write("10,0.5,100\n") + + # make sure our inputs validate + validation_results = urban_nature_access.validate(args) + self.assertEqual(validation_results, []) + + urban_nature_access.execute(args) + + def test_reclassify_urban_nature(self): + """UNA: Test for urban nature area reclassification.""" + from natcap.invest import urban_nature_access + args = _build_model_args(self.workspace_dir) + + # Rewrite the lulc attribute table to use proportions of urban nature. + with open(args['lulc_attribute_table'], 'w') as attr_table: + attr_table.write(textwrap.dedent( + """\ + lucode,urban_nature,search_radius_m + 0,0,100 + 1,0.1,100 + 2,0,100 + 3,0.3,100 + 4,0,100 + 5,0.5,100 + 6,0,100 + 7,0.7,100 + 8,0,100 + 9,0.9,100 + """)) + + urban_nature_area_path = os.path.join( + self.workspace_dir, 'urban_nature_area.tif') + + for limit_to_lucodes in (None, set([1, 3])): + urban_nature_access._reclassify_urban_nature_area( + args['lulc_raster_path'], args['lulc_attribute_table'], + urban_nature_area_path, + only_these_urban_nature_codes=limit_to_lucodes) + + # The source lulc is randomized, so need to programmatically build + # up the expected array. + source_lulc_array = pygeoprocessing.raster_to_numpy_array( + args['lulc_raster_path']) + pixel_area = abs(_DEFAULT_PIXEL_SIZE[0] * _DEFAULT_PIXEL_SIZE[1]) + expected_array = numpy.zeros(source_lulc_array.shape, + dtype=numpy.float32) + for i in range(1, 10, 2): + if limit_to_lucodes is not None: + if i not in limit_to_lucodes: + continue + factor = float(f"0.{i}") + expected_array[source_lulc_array == i] = factor * pixel_area + + reclassified_array = pygeoprocessing.raster_to_numpy_array( + urban_nature_area_path) + numpy.testing.assert_array_almost_equal( + reclassified_array, expected_array) + def test_validate(self): """UNA: Basic test for validation.""" from natcap.invest import urban_nature_access args = _build_model_args(self.workspace_dir) - args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_URBAN_NATURE + args['search_radius_mode'] = ( + urban_nature_access.RADIUS_OPT_URBAN_NATURE) self.assertEqual(urban_nature_access.validate(args), []) diff --git a/tests/test_utils.py b/tests/test_utils.py index eb75c186e0..0e33941538 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,6 +15,7 @@ import numpy import numpy.testing +import pandas as pd import pygeoprocessing from osgeo import gdal from osgeo import ogr @@ -254,7 +255,7 @@ def gkern(): # The sigma*3 is the maximum radius from the center # Anything greater than that distance should be set to 0 by the # gaussian kernel creation function. - kernel[dist_from_center > (sigma * 3)] = 0.0 + kernel[dist_from_center > (sigma * 3)] = 0 return kernel / numpy.sum(kernel) expected_matrix = gkern() @@ -619,12 +620,14 @@ def test_read_csv_to_dataframe(self): with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - HEADER, - A, + header, + a, b """ )) - df = utils.read_csv_to_dataframe(csv_file) + df = utils.read_csv_to_dataframe( + csv_file, + {'columns': {'header': {'type': 'freestyle_string'}}}) # header and table values should be lowercased self.assertEqual(df.columns[0], 'header') self.assertEqual(df['header'][0], 'a') @@ -642,15 +645,19 @@ def test_unique_key_not_first_column(self): with open(table_path, 'w') as table_file: table_file.write(csv_text) - result = utils.read_csv_to_dataframe( - table_path, 'lucode').to_dict(orient='index') - expected_result = { - 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1}, - 2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2}, - 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3}, - 4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}} - - self.assertDictEqual(result, expected_result) + df = utils.read_csv_to_dataframe( + table_path, + { + 'index_col': 'lucode', + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}) + self.assertEqual(df.index.name, 'lucode') + self.assertEqual(list(df.index.values), [1, 2, 3, 4]) + self.assertEqual(df['desc'][2], 'bread') def test_non_unique_keys(self): """utils: test error is raised if keys are not unique.""" @@ -665,7 +672,16 @@ def test_non_unique_keys(self): table_file.write(csv_text) with self.assertRaises(ValueError): - utils.read_csv_to_dataframe(table_path, 'lucode') + utils.read_csv_to_dataframe( + table_path, + { + 'index_col': 'lucode', + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}) def test_missing_key_field(self): """utils: test error is raised when missing key field.""" @@ -680,29 +696,16 @@ def test_missing_key_field(self): table_file.write(csv_text) with self.assertRaises(KeyError): - utils.read_csv_to_dataframe(table_path, 'lucode') - - def test_nan_holes(self): - """utils: test empty strings returned when missing data is present.""" - from natcap.invest import utils - csv_text = ("lucode,desc,val1,val2\n" - "1,corn,0.5,2\n" - "2,,1,4\n" - "3,beans,0.5,4\n" - "4,butter,,1") - table_path = os.path.join(self.workspace_dir, 'table.csv') - with open(table_path, 'w') as table_file: - table_file.write(csv_text) - - result = utils.read_csv_to_dataframe( - table_path, 'lucode').to_dict(orient='index') - expected_result = { - 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1}, - 2: {'desc': '', 'val1': 1, 'val2': 4, 'lucode': 2}, - 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3}, - 4: {'desc': 'butter', 'val1': '', 'val2': 1, 'lucode': 4}} - - self.assertDictEqual(result, expected_result) + utils.read_csv_to_dataframe( + table_path, + { + 'index_col': 'lucode', + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}) def test_nan_row(self): """utils: test NaN row is dropped.""" @@ -717,60 +720,88 @@ def test_nan_row(self): table_file.write(csv_text) result = utils.read_csv_to_dataframe( - table_path, 'lucode').to_dict(orient='index') + table_path, + { + 'index_col': 'lucode', + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}).to_dict(orient='index') expected_result = { - 1.0: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1.0}, - 3.0: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3.0}, - 4.0: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4.0}} + 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2}, + 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4}, + 4: {'desc': 'butter', 'val1': 9, 'val2': 1}} self.assertDictEqual(result, expected_result) def test_column_subset(self): """utils: test column subset is properly returned.""" from natcap.invest import utils - csv_text = ("lucode,desc,val1,val2\n" - "1,corn,0.5,2\n" - "2,bread,1,4\n" - "3,beans,0.5,4\n" - "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: - table_file.write(csv_text) - - result = utils.read_csv_to_dataframe( - table_path, 'lucode', - usecols=['lucode', 'val1', 'val2']).to_dict(orient='index') - - expected_result = { - 1: {'val1': 0.5, 'val2': 2, 'lucode': 1}, - 2: {'val1': 1, 'val2': 4, 'lucode': 2}, - 3: {'val1': 0.5, 'val2': 4, 'lucode': 3}, - 4: {'val1': 9, 'val2': 1, 'lucode': 4}} - - self.assertDictEqual(result, expected_result) + table_file.write( + "lucode,desc,val1,val2\n" + "1,corn,0.5,2\n" + "2,bread,1,4\n" + "3,beans,0.5,4\n" + "4,butter,9,1") + df = utils.read_csv_to_dataframe( + table_path, + { + 'columns': { + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + } + }) + self.assertEqual(list(df.columns), ['lucode', 'val1', 'val2']) + + def test_column_pattern_matching(self): + """utils: test column subset is properly returned.""" + from natcap.invest import utils + table_path = os.path.join(self.workspace_dir, 'table.csv') + with open(table_path, 'w') as table_file: + table_file.write( + "lucode,grassland_value,forest_value,wetland_valueee\n" + "1,0.5,2\n" + "2,1,4\n" + "3,0.5,4\n" + "4,9,1") + df = utils.read_csv_to_dataframe( + table_path, { + 'columns': { + 'lucode': {'type': 'integer'}, + '[HABITAT]_value': {'type': 'number'} + } + }) + self.assertEqual( + list(df.columns), ['lucode', 'grassland_value', 'forest_value']) def test_trailing_comma(self): """utils: test a trailing comma on first line is handled properly.""" from natcap.invest import utils - csv_text = ("lucode,desc,val1,val2\n" - "1,corn,0.5,2,\n" - "2,bread,1,4\n" - "3,beans,0.5,4\n" - "4,butter,9,1") table_path = os.path.join(self.workspace_dir, 'table.csv') with open(table_path, 'w') as table_file: - table_file.write(csv_text) - + table_file.write( + "lucode,desc,val1,val2\n" + "1,corn,0.5,2,\n" + "2,bread,1,4\n" + "3,beans,0.5,4\n" + "4,butter,9,1") result = utils.read_csv_to_dataframe( - table_path, 'lucode').to_dict(orient='index') + table_path, + { + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}) + self.assertEqual(result['val2'][0], 2) + self.assertEqual(result['lucode'][1], 2) - expected_result = { - 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1}, - 2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2}, - 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3}, - 4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}} - - self.assertDictEqual(result, expected_result) def test_trailing_comma_second_line(self): """utils: test a trailing comma on second line is handled properly.""" @@ -785,226 +816,244 @@ def test_trailing_comma_second_line(self): table_file.write(csv_text) result = utils.read_csv_to_dataframe( - table_path, 'lucode').to_dict(orient='index') + table_path, + { + 'index_col': 'lucode', + 'columns': { + 'desc': {'type': 'freestyle_string'}, + 'lucode': {'type': 'integer'}, + 'val1': {'type': 'number'}, + 'val2': {'type': 'number'} + }}).to_dict(orient='index') expected_result = { - 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1}, - 2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2}, - 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3}, - 4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}} + 1: {'desc': 'corn', 'val1': 0.5, 'val2': 2}, + 2: {'desc': 'bread', 'val1': 1, 'val2': 4}, + 3: {'desc': 'beans', 'val1': 0.5, 'val2': 4}, + 4: {'desc': 'butter', 'val1': 9, 'val2': 1}} self.assertDictEqual(result, expected_result) - def test_results_lowercase_non_numeric(self): - """utils: text handling of converting to lowercase.""" + def test_csv_dialect_detection_semicolon_delimited(self): + """utils: test that we can parse semicolon-delimited CSVs.""" from natcap.invest import utils csv_file = os.path.join(self.workspace_dir, 'csv.csv') with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header1,HEADER2,header3 - 1,2,bar - 4,5,FOO + header1;HEADER2;header3; + 1;2;3; + 4;FOO;bar; """ )) - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header1').to_dict(orient='index') - - self.assertEqual(lookup_dict[4]['header3'], 'foo') - self.assertEqual(lookup_dict[1]['header2'], 2) + df = utils.read_csv_to_dataframe( + csv_file, + {'columns': { + 'header1': {'type': 'integer'}, + 'header2': {'type': 'freestyle_string'}, + 'header3': {'type': 'freestyle_string'} + } + }) + self.assertEqual(df['header2'][1], 'foo') + self.assertEqual(df['header3'][1], 'bar') + self.assertEqual(df['header1'][0], 1) - def test_results_uppercase_numeric_cast(self): - """utils: test handling of uppercase, num. casting, blank values.""" + def test_convert_cols_to_lower(self): + """utils: test that column names are converted to lowercase""" from natcap.invest import utils csv_file = os.path.join(self.workspace_dir, 'csv.csv') + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header1,HEADER2,header3,missing_column, - 1,2,3, - 4,FOO,bar, + header, + A, + b """ )) + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'header': {'type': 'freestyle_string'} + }}) + self.assertEqual(df['header'][0], 'a') - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header1', - convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index') - - self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO') - self.assertEqual(lookup_dict[4]['header3'], 'bar') - self.assertEqual(lookup_dict[1]['header1'], 1) - - def test_csv_dialect_detection_semicolon_delimited(self): - """utils: test that we can parse semicolon-delimited CSVs.""" + def test_convert_vals_to_lower(self): + """utils: test that values are converted to lowercase""" from natcap.invest import utils csv_file = os.path.join(self.workspace_dir, 'csv.csv') + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header1;HEADER2;header3; - 1;2;3; - 4;FOO;bar; + HEADER, + a, + b """ )) + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'header': {'type': 'freestyle_string'} + }}) + self.assertEqual(df.columns[0], 'header') - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header1', - convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index') - - self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO') - self.assertEqual(lookup_dict[4]['header3'], 'bar') - self.assertEqual(lookup_dict[1]['header1'], 1) - - def test_csv_utf8_encoding(self): - """utils: test that CSV read correctly with UTF-8 encoding.""" + def test_integer_type_columns(self): + """utils: integer column values are returned as integers.""" from natcap.invest import utils - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - with open(csv_file, 'w', encoding='utf-8') as file_obj: + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header1,HEADER2,header3 - 1,2,bar - 4,5,FOO + id,header, + 1,5.0, + 2,-1, + 3, """ )) - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header1').to_dict(orient='index') - self.assertEqual(lookup_dict[4]['header2'], 5) - self.assertEqual(lookup_dict[4]['header3'], 'foo') - self.assertEqual(lookup_dict[1]['header1'], 1) - - def test_csv_utf8_bom_encoding(self): - """utils: test that CSV read correctly with UTF-8 BOM encoding.""" + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'id': {'type': 'integer'}, + 'header': {'type': 'integer', 'na_allowed': True}}}) + self.assertIsInstance(df['header'][0], numpy.int64) + self.assertIsInstance(df['header'][1], numpy.int64) + # empty values are returned as pandas.NA + self.assertTrue(pd.isna(df['header'][2])) + + def test_float_type_columns(self): + """utils: float column values are returned as floats.""" from natcap.invest import utils - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - # writing with utf-8-sig will prepend the BOM - with open(csv_file, 'w', encoding='utf-8-sig') as file_obj: + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header1,HEADER2,header3 - 1,2,bar - 4,5,FOO + h1,h2,h3 + 5,0.5,.4 + -1,-.3, """ )) - # confirm that the file has the BOM prefix - with open(csv_file, 'rb') as file_obj: - self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8)) - - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header1').to_dict(orient='index') - # assert the BOM prefix was correctly parsed and skipped - self.assertEqual(lookup_dict[4]['header2'], 5) - self.assertEqual(lookup_dict[4]['header3'], 'foo') - self.assertEqual(lookup_dict[1]['header1'], 1) - - def test_csv_latin_1_encoding(self): - """utils: test that CSV read correctly with Latin-1 encoding.""" + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'h1': {'type': 'number'}, + 'h2': {'type': 'ratio'}, + 'h3': {'type': 'percent', 'na_allowed': True}, + }}) + self.assertEqual(df['h1'].dtype, float) + self.assertEqual(df['h2'].dtype, float) + self.assertEqual(df['h3'].dtype, float) + # empty values are returned as numpy.nan + self.assertTrue(numpy.isnan(df['h3'][1])) + + def test_string_type_columns(self): + """utils: string column values are returned as strings.""" from natcap.invest import utils - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj: + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header 1,HEADER 2,header 3 - 1,2,bar1 - 4,5,FOO + h1,h2,h3 + 1,a,foo + 2,b, """ )) - - lookup_dict = utils.read_csv_to_dataframe( - csv_file, 'header 1').to_dict(orient='index') - - self.assertEqual(lookup_dict[4]['header 2'], 5) - self.assertEqual(lookup_dict[4]['header 3'], 'foo') - self.assertEqual(lookup_dict[1]['header 1'], 1) - - def test_csv_error_non_utf8_character(self): - """utils: test that error is raised on non-UTF8 character.""" + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'h1': {'type': 'freestyle_string'}, + 'h2': {'type': 'option_string'}, + 'h3': {'type': 'freestyle_string'}, + }}) + self.assertEqual(df['h1'][0], '1') + self.assertEqual(df['h2'][1], 'b') + # empty values are returned as NA + self.assertTrue(pd.isna(df['h3'][1])) + + def test_boolean_type_columns(self): + """utils: boolean column values are returned as booleans.""" from natcap.invest import utils - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj: + with open(csv_file, 'w') as file_obj: file_obj.write(textwrap.dedent( """\ - header 1,HEADER 2,header 3 - 1,2,bar1 - 4,5,FÖÖ + index,h1 + a,1 + b,0 + c, """ )) - with self.assertRaises(UnicodeDecodeError): - utils.read_csv_to_dataframe(csv_file, 'header 1') + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'index': {'type': 'freestyle_string'}, + 'h1': {'type': 'bool', 'na_allowed': True}}}) + self.assertEqual(df['h1'][0], True) + self.assertEqual(df['h1'][1], False) + # empty values are returned as pandas.NA + self.assertTrue(pd.isna(df['h1'][2])) - def test_expand_path(self): - """utils: test path expansion function.""" + def test_expand_path_columns(self): + """utils: test values in path columns are expanded.""" from natcap.invest import utils - base_path = os.path.join(self.workspace_dir, 'csv.csv') + csv_file = os.path.join(self.workspace_dir, 'csv.csv') + with open(csv_file, 'w') as file_obj: + file_obj.write(textwrap.dedent( + f"""\ + bar,path + 1,foo.txt + 2,foo/bar.txt + 3,foo\\bar.txt + 4,{self.workspace_dir}/foo.txt + 5, + """ + )) + df = utils.read_csv_to_dataframe( + csv_file, {'columns': { + 'bar': {'type': 'integer'}, + 'path': {'type': 'file'} + }}) self.assertEqual( f'{self.workspace_dir}{os.sep}foo.txt', - utils.expand_path('foo.txt', base_path)) + df['path'][0]) self.assertEqual( f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt', - utils.expand_path('foo/bar.txt', base_path)) + df['path'][1]) self.assertEqual( f'{self.workspace_dir}{os.sep}foo\\bar.txt', - utils.expand_path('foo\\bar.txt', base_path)) + df['path'][2]) self.assertEqual( f'{self.workspace_dir}{os.sep}foo.txt', - utils.expand_path(f'{self.workspace_dir}{os.sep}foo.txt', base_path)) - - def test_convert_cols_to_lower(self): - """utils: test that to_lower=True makes headers lowercase""" - from natcap.invest import utils - - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - - with open(csv_file, 'w') as file_obj: - file_obj.write(textwrap.dedent( - """\ - HEADER, - A, - b - """ - )) - df = utils.read_csv_to_dataframe( - csv_file, convert_cols_to_lower=True, convert_vals_to_lower=False) - # header should be lowercase - self.assertEqual(df.columns[0], 'header') - # case of table values shouldn't change - self.assertEqual(df['header'][0], 'A') - self.assertEqual(df['header'][1], 'b') + df['path'][3]) + # empty values are returned as empty strings + self.assertTrue(pd.isna(df['path'][4])) - def test_convert_vals_to_lower(self): - """utils: test that to_lower=True makes headers lowercase""" + def test_csv_utf8_encoding(self): + """utils: test that CSV read correctly with UTF-8 encoding.""" from natcap.invest import utils csv_file = os.path.join(self.workspace_dir, 'csv.csv') - - with open(csv_file, 'w') as file_obj: + with open(csv_file, 'w', encoding='utf-8') as file_obj: file_obj.write(textwrap.dedent( """\ - HEADER, - A, - b + header1,HEADER2,header3 + 1,2,bar + 4,5,FOO """ )) - df = utils.read_csv_to_dataframe( - csv_file, convert_cols_to_lower=False, convert_vals_to_lower=True) - # header should still be uppercase - self.assertEqual(df.columns[0], 'HEADER') - # case of table values should change - self.assertEqual(df['HEADER'][0], 'a') - self.assertEqual(df['HEADER'][1], 'b') + lookup_dict = utils.read_csv_to_dataframe( + csv_file, + { + 'index_col': 'header1', + 'columns': { + 'header1': {'type': 'integer'}, + 'header2': {'type': 'integer'}, + 'header3': {'type': 'freestyle_string'} + }}).to_dict(orient='index') + self.assertEqual(lookup_dict[4]['header2'], 5) + self.assertEqual(lookup_dict[4]['header3'], 'foo') def test_utf8_bom_encoding(self): """utils: test that CSV read correctly with UTF-8 BOM encoding.""" from natcap.invest import utils - csv_file = os.path.join(self.workspace_dir, 'csv.csv') # writing with utf-8-sig will prepend the BOM with open(csv_file, 'w', encoding='utf-8-sig') as file_obj: @@ -1018,12 +1067,64 @@ def test_utf8_bom_encoding(self): # confirm that the file has the BOM prefix with open(csv_file, 'rb') as file_obj: self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8)) - - df = utils.read_csv_to_dataframe(csv_file) + df = utils.read_csv_to_dataframe(csv_file, + { + 'columns': { + 'header1': {'type': 'integer'}, + 'header2': {'type': 'integer'}, + 'header3': {'type': 'freestyle_string'} + }}) # assert the BOM prefix was correctly parsed and skipped self.assertEqual(df.columns[0], 'header1') self.assertEqual(df['header2'][1], 5) + def test_csv_latin_1_encoding(self): + """utils: can read Latin-1 encoded CSV if it uses only ASCII chars.""" + from natcap.invest import utils + csv_file = os.path.join(self.workspace_dir, 'csv.csv') + with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj: + file_obj.write(textwrap.dedent( + """\ + header 1,HEADER 2,header 3 + 1,2,bar1 + 4,5,FOO + """ + )) + df = utils.read_csv_to_dataframe( + csv_file, + {'columns': { + 'header 1': {'type': 'integer'}, + 'header 2': {'type': 'integer'}, + 'header 3': {'type': 'freestyle_string'} + }}) + self.assertEqual(df['header 2'][1], 5) + self.assertEqual(df['header 3'][1], 'foo') + self.assertEqual(df['header 1'][0], 1) + + def test_csv_error_non_utf8_character(self): + """utils: test that error is raised on non-UTF8 character.""" + from natcap.invest import utils + + csv_file = os.path.join(self.workspace_dir, 'csv.csv') + with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj: + file_obj.write(textwrap.dedent( + """\ + header 1,HEADER 2,header 3 + 1,2,bar1 + 4,5,FÖÖ + """ + )) + with self.assertRaises(UnicodeDecodeError): + utils.read_csv_to_dataframe( + csv_file, + { + 'index_col': 'header1', + 'columns': { + 'header1': {'type': 'integer'}, + 'header2': {'type': 'integer'}, + 'header3': {'type': 'freestyle_string'} + }}) + def test_override_default_encoding(self): """utils: test that you can override the default encoding kwarg""" from natcap.invest import utils @@ -1039,7 +1140,10 @@ def test_override_default_encoding(self): bar """ )) - df = utils.read_csv_to_dataframe(csv_file, encoding='iso8859_5') + df = utils.read_csv_to_dataframe( + csv_file, { + 'columns': {'header': {'type': 'freestyle_string'} + }}, encoding='iso8859_5') # with the encoding specified, special characters should work # and be lowercased self.assertEqual(df['header'][0], 'fюю') @@ -1061,10 +1165,16 @@ def test_other_kwarg(self): )) # using sep=None with the default engine='python', # it should infer what the separator is - df = utils.read_csv_to_dataframe(csv_file, sep=None) + df = utils.read_csv_to_dataframe( + csv_file, { + 'columns': { + 'h1': {'type': 'freestyle_string'}, + 'h2': {'type': 'freestyle_string'}, + 'h3': {'type': 'freestyle_string'} + }}, converters={'h2': lambda val: f'foo_{val}'}) self.assertEqual(df.columns[0], 'h1') - self.assertEqual(df['h2'][1], 'e') + self.assertEqual(df['h2'][1], 'foo_e') def test_csv_with_integer_headers(self): """ @@ -1085,7 +1195,13 @@ def test_csv_with_integer_headers(self): d,e,f """ )) - df = utils.read_csv_to_dataframe(csv_file) + df = utils.read_csv_to_dataframe( + csv_file, + {'columns': { + '1': {'type': 'freestyle_string'}, + '2': {'type': 'freestyle_string'}, + '3': {'type': 'freestyle_string'} + }}) # expect headers to be strings self.assertEqual(df.columns[0], '1') self.assertEqual(df['1'][0], 'a') @@ -1100,48 +1216,23 @@ def test_removal_whitespace(self): file_obj.write(" Col1, Col2 ,Col3 \n") file_obj.write(" val1, val2 ,val3 \n") file_obj.write(" , 2 1 , ") - df = utils.read_csv_to_dataframe(csv_file, convert_cols_to_lower=False) + df = utils.read_csv_to_dataframe( + csv_file, { + 'columns': { + 'col1': {'type': 'freestyle_string'}, + 'col2': {'type': 'freestyle_string'}, + 'col3': {'type': 'freestyle_string'} + }}) # header should have no leading / trailing whitespace - self.assertEqual(df.columns[0], 'Col1') - self.assertEqual(df.columns[1], 'Col2') - self.assertEqual(df.columns[2], 'Col3') - # values should have no leading / trailing whitespace - self.assertEqual(df['Col1'][0], 'val1') - self.assertEqual(df['Col2'][0], 'val2') - self.assertEqual(df['Col3'][0], 'val3') - self.assertEqual(df['Col1'][1], '') - self.assertEqual(df['Col2'][1], '2 1') - self.assertEqual(df['Col3'][1], '') - - def test_expand_path_columns(self): - """utils: test path expansion feature of read_csv_to_dataframe.""" - from natcap.invest import utils + self.assertEqual(list(df.columns), ['col1', 'col2', 'col3']) - csv_file = os.path.join(self.workspace_dir, 'csv.csv') - with open(csv_file, 'w') as file_obj: - file_obj.write(textwrap.dedent( - f"""\ - bar,path - 1,foo.txt - 2,foo/bar.txt - 3,foo\\bar.txt - 4,{self.workspace_dir}/foo.txt - """ - )) - df = utils.read_csv_to_dataframe( - csv_file, expand_path_cols=['path'], convert_vals_to_lower=False) - self.assertEqual( - f'{self.workspace_dir}{os.sep}foo.txt', - df['path'][0]) - self.assertEqual( - f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt', - df['path'][1]) - self.assertEqual( - f'{self.workspace_dir}{os.sep}foo\\bar.txt', - df['path'][2]) - self.assertEqual( - f'{self.workspace_dir}{os.sep}foo.txt', - df['path'][3]) + # values should have no leading / trailing whitespace + self.assertEqual(df['col1'][0], 'val1') + self.assertEqual(df['col2'][0], 'val2') + self.assertEqual(df['col3'][0], 'val3') + self.assertEqual(df['col1'][1], '') + self.assertEqual(df['col2'][1], '2 1') + self.assertEqual(df['col3'][1], '') class CreateCoordinateTransformationTests(unittest.TestCase): @@ -1471,7 +1562,7 @@ def test_different_feature_count(self): attrs = [{'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789}] attrs_copy = [ {'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789}, - {'id': 3, 'foo': 5.0}] + {'id': 3, 'foo': 5}] srs = osr.SpatialReference() srs.ImportFromEPSG(3157) diff --git a/tests/test_wind_energy.py b/tests/test_wind_energy.py index d9b788c47f..f70e3938f8 100644 --- a/tests/test_wind_energy.py +++ b/tests/test_wind_energy.py @@ -846,7 +846,7 @@ def test_clip_vector_value_error(self): wind_energy.execute(args) self.assertTrue( - "returned 0 features. If an AOI was" in str(cm.exception)) + "returned 0 features. This means the AOI " in str(cm.exception)) class WindEnergyValidationTests(unittest.TestCase): diff --git a/workbench/__mocks__/electron-store.js b/workbench/__mocks__/electron-store.js new file mode 100644 index 0000000000..d4ea6cd5c2 --- /dev/null +++ b/workbench/__mocks__/electron-store.js @@ -0,0 +1,22 @@ +export default class Store { + constructor(options) { + this.defaults = options.defaults || {}; + this.store = this.defaults; + } + + get(key) { + return this.store[key]; + } + + set(key, val) { + this.store[key] = val; + } + + delete(key) { + delete this.store[key]; + } + + reset() { + this.store = this.defaults; + } +} diff --git a/workbench/electron-builder-config.js b/workbench/electron-builder-config.js index 188d7540bf..3f857f1a77 100644 --- a/workbench/electron-builder-config.js +++ b/workbench/electron-builder-config.js @@ -43,11 +43,11 @@ const config = { from: 'resources/storage_token.txt', to: 'storage_token.txt', }, + { + from: '../LICENSE.txt', + to: 'LICENSE.InVEST.txt', + }, ], - extraFiles: [{ - from: '../LICENSE.txt', - to: 'LICENSE.InVEST.txt', - }], appId: APP_ID, productName: PRODUCT_NAME, artifactName: ARTIFACT_NAME, diff --git a/workbench/package.json b/workbench/package.json index 4b243c1455..9ab1e6ebca 100644 --- a/workbench/package.json +++ b/workbench/package.json @@ -53,6 +53,7 @@ "dependencies": { "@babel/runtime": "^7.13.10", "electron-log": "^4.3.5", + "electron-store": "^8.1.0", "i18next": "^22.4.9", "localforage": "^1.9.0", "node-fetch": "^2.6.7", @@ -71,6 +72,7 @@ "@testing-library/react": "^14.0.0", "@testing-library/user-event": "^14.4.3", "@vitejs/plugin-react": "^4.0.0", + "ajv": "^8.12.0", "babel-eslint": "^10.1.0", "bootstrap": "4.3.1", "concurrently": "^8.2.0", diff --git a/workbench/src/main/createPythonFlaskProcess.js b/workbench/src/main/createPythonFlaskProcess.js index 4ccd8a87bc..7e5cb863f4 100644 --- a/workbench/src/main/createPythonFlaskProcess.js +++ b/workbench/src/main/createPythonFlaskProcess.js @@ -1,4 +1,4 @@ -import { spawn, exec } from 'child_process'; +import { spawn, execSync } from 'child_process'; import fetch from 'node-fetch'; @@ -88,26 +88,12 @@ export async function shutdownPythonProcess(subprocess) { subprocess.kill(); } else { const { pid } = subprocess; - exec(`taskkill /pid ${pid} /t /f`); + execSync(`taskkill /pid ${pid} /t /f`); } } catch (error) { // if the process was already killed by some other means logger.debug(error); + } finally { + Promise.resolve(); } - - // If we return too quickly, it seems the electron app is allowed - // to quit before the subprocess is killed, and the subprocess remains - // open. Here we poll a flask endpoint and resolve only when it - // gives ECONNREFUSED. - return fetch(`${HOSTNAME}:${process.env.PORT}/ready`, { - method: 'get', - }) - .then(async () => { - await new Promise((resolve) => setTimeout(resolve, 300)); - return shutdownPythonProcess(subprocess); - }) - .catch(() => { - logger.debug('flask server is closed'); - return Promise.resolve(); - }); } diff --git a/workbench/src/main/ipcMainChannels.js b/workbench/src/main/ipcMainChannels.js index 79993ec586..de7ca13c19 100644 --- a/workbench/src/main/ipcMainChannels.js +++ b/workbench/src/main/ipcMainChannels.js @@ -1,9 +1,12 @@ export const ipcMainChannels = { + CHANGE_LANGUAGE: 'change-language', CHECK_FILE_PERMISSIONS: 'check-file-permissions', CHECK_STORAGE_TOKEN: 'check-storage-token', DOWNLOAD_URL: 'download-url', - GET_N_CPUS: 'get-n-cpus', GET_ELECTRON_PATHS: 'get-electron-paths', + GET_N_CPUS: 'get-n-cpus', + GET_SETTING: 'get-setting', + GET_LANGUAGE: 'get-language', INVEST_KILL: 'invest-kill', INVEST_READ_LOG: 'invest-read-log', INVEST_RUN: 'invest-run', @@ -12,8 +15,8 @@ export const ipcMainChannels = { LOGGER: 'logger', OPEN_EXTERNAL_URL: 'open-external-url', OPEN_LOCAL_HTML: 'open-local-html', + SET_SETTING: 'set-setting', SHOW_ITEM_IN_FOLDER: 'show-item-in-folder', SHOW_OPEN_DIALOG: 'show-open-dialog', SHOW_SAVE_DIALOG: 'show-save-dialog', - CHANGE_LANGUAGE: 'change-language', }; diff --git a/workbench/src/main/main.js b/workbench/src/main/main.js index 775fc623ec..12e60b9cfd 100644 --- a/workbench/src/main/main.js +++ b/workbench/src/main/main.js @@ -4,7 +4,6 @@ import path from 'path'; import { app, BrowserWindow, - screen, nativeTheme, Menu, ipcMain @@ -29,7 +28,7 @@ import { import setupGetNCPUs from './setupGetNCPUs'; import setupOpenExternalUrl from './setupOpenExternalUrl'; import setupOpenLocalHtml from './setupOpenLocalHtml'; -import setupChangeLanguage from './setupChangeLanguage'; +import { settingsStore, setupSettingsHandlers } from './settingsStore'; import setupGetElectronPaths from './setupGetElectronPaths'; import setupRendererLogger from './setupRendererLogger'; import { ipcMainChannels } from './ipcMainChannels'; @@ -37,8 +36,8 @@ import menuTemplate from './menubar'; import ELECTRON_DEV_MODE from './isDevMode'; import BASE_URL from './baseUrl'; import { getLogger } from './logger'; -import pkg from '../../package.json'; import i18n from './i18n/i18n'; +import pkg from '../../package.json'; const logger = getLogger(__filename.split('/').slice(-1)[0]); @@ -61,6 +60,7 @@ if (!process.env.PORT) { let mainWindow; let splashScreen; let flaskSubprocess; +let forceQuit = false; export function destroyWindow() { mainWindow = null; @@ -71,6 +71,8 @@ export const createWindow = async () => { logger.info(`Running invest-workbench version ${pkg.version}`); nativeTheme.themeSource = 'light'; // override OS/browser setting + i18n.changeLanguage(settingsStore.get('language')); + splashScreen = new BrowserWindow({ width: 574, // dims set to match the image in splash.html height: 500, @@ -86,7 +88,7 @@ export const createWindow = async () => { setupCheckFilePermissions(); setupCheckFirstRun(); setupCheckStorageToken(); - setupChangeLanguage(); + setupSettingsHandlers(); setupGetElectronPaths(); setupGetNCPUs(); setupInvestLogReaderHandler(); @@ -110,14 +112,6 @@ export const createWindow = async () => { menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n) ) ); - // when language changes, rebuild the menu bar in new language - i18n.on('languageChanged', (lng) => { - Menu.setApplicationMenu( - Menu.buildFromTemplate( - menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n) - ) - ); - }); mainWindow.loadURL(path.join(BASE_URL, 'index.html')); mainWindow.once('ready-to-show', () => { @@ -135,6 +129,16 @@ export const createWindow = async () => { logger.error(details); }); + mainWindow.on('close', (event) => { + // 'close' is triggered by the red traffic light button on mac + // override this behavior and just minimize, + // unless we're actually quitting the app + if (process.platform === 'darwin' & !forceQuit) { + event.preventDefault(); + mainWindow.minimize() + } + }); + mainWindow.on('closed', () => { mainWindow = null; }); @@ -186,17 +190,12 @@ export function main() { createWindow(); } }); - app.on('window-all-closed', async () => { - // On OS X it is common for applications and their menu bar - // to stay active until the user quits explicitly with Cmd + Q - if (process.platform !== 'darwin') { - app.quit(); - } - }); + let shuttingDown = false; app.on('before-quit', async (event) => { // prevent quitting until after we're done with cleanup, // then programatically quit + forceQuit = true; if (shuttingDown) { return; } event.preventDefault(); shuttingDown = true; diff --git a/workbench/src/main/settingsStore.js b/workbench/src/main/settingsStore.js new file mode 100644 index 0000000000..9d9ad38960 --- /dev/null +++ b/workbench/src/main/settingsStore.js @@ -0,0 +1,101 @@ +import { app, ipcMain } from 'electron'; +import Store from 'electron-store'; +import Ajv from 'ajv'; + +import { ipcMainChannels } from './ipcMainChannels'; +import { getLogger } from './logger'; + +const logger = getLogger(__filename.split('/').slice(-1)[0]); + +export const defaults = { + nWorkers: -1, + taskgraphLoggingLevel: 'INFO', + loggingLevel: 'INFO', + language: 'en', +}; + +export const schema = { + type: 'object', + properties: { + nWorkers: { + type: 'number', + }, + taskgraphLoggingLevel: { + enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], + }, + loggingLevel: { + enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], + }, + language: { + enum: ['en', 'es', 'zh'], + }, + }, + required: ['nWorkers', 'taskgraphLoggingLevel', 'loggingLevel', 'language'] +}; + +/** + * Open a store and validate against a schema. + * + * Required properties missing from the store are initialized with defaults. + * Invalid properties are reset to defaults. + * + * @param {object} data key-values with which to initialize a store. + * @returns {Store} an instance of an electron-store Store + */ +export function initStore(data = defaults) { + const ajv = new Ajv({ allErrors: true }); + const validate = ajv.compile(schema); + const store = new Store({ defaults: data }); + const valid = validate(store.store); + if (!valid) { + validate.errors.forEach((e) => { + logger.debug(e); + let property; + if (e.instancePath) { + property = e.instancePath.split('/').pop(); + } else if (e.keyword === 'required') { + property = e.params.missingProperty; + } else { + // something is invalid that we're not prepared to fix + // so just reset the whole store to defaults. + logger.debug(e); + store.reset(); + } + logger.debug(`resetting value for setting ${property}`); + store.set(property, defaults[property]); + }); + } + return store; +} + +export const settingsStore = initStore(); + +export function setupSettingsHandlers() { + ipcMain.handle( + ipcMainChannels.GET_SETTING, + (event, key) => settingsStore.get(key) + ); + + ipcMain.on( + ipcMainChannels.SET_SETTING, + (event, key, value) => settingsStore.set(key, value) + ); + + // language is stored in the same store, but has special + // needs for getting & setting because we need to get + // the value synchronously during preload, and trigger + // an app restart on change. + ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => { + event.returnValue = settingsStore.get('language'); + }); + + ipcMain.handle( + ipcMainChannels.CHANGE_LANGUAGE, + (e, languageCode) => { + logger.debug('changing language to', languageCode); + settingsStore.set('language', languageCode); + app.relaunch(); + app.quit(); + } + ); +} diff --git a/workbench/src/main/setupChangeLanguage.js b/workbench/src/main/setupChangeLanguage.js index a5f08940a8..f4a2672a0c 100644 --- a/workbench/src/main/setupChangeLanguage.js +++ b/workbench/src/main/setupChangeLanguage.js @@ -1,16 +1,25 @@ -import i18n from 'i18next'; -import { ipcMain } from 'electron'; +import Store from 'electron-store'; +import { app, ipcMain } from 'electron'; import { getLogger } from './logger'; import { ipcMainChannels } from './ipcMainChannels'; const logger = getLogger(__filename.split('/').slice(-1)[0]); +const store = new Store(); + export default function setupChangeLanguage() { + ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => { + // default to en if no language setting exists + event.returnValue = store.get('language', 'en'); + }); + ipcMain.handle( ipcMainChannels.CHANGE_LANGUAGE, (e, languageCode) => { logger.debug('changing language to', languageCode); - i18n.changeLanguage(languageCode); + store.set('language', languageCode); + app.relaunch(); + app.quit(); } ); } diff --git a/workbench/src/main/setupInvestHandlers.js b/workbench/src/main/setupInvestHandlers.js index 37783fd882..093f759518 100644 --- a/workbench/src/main/setupInvestHandlers.js +++ b/workbench/src/main/setupInvestHandlers.js @@ -11,6 +11,7 @@ import ELECTRON_DEV_MODE from './isDevMode'; import investUsageLogger from './investUsageLogger'; import markupMessage from './investLogMarkup'; import writeInvestParameters from './writeInvestParameters'; +import { settingsStore } from './settingsStore'; const logger = getLogger(__filename.split('/').slice(-1)[0]); @@ -45,12 +46,16 @@ export function setupInvestRunHandlers(investExe) { }); ipcMain.on(ipcMainChannels.INVEST_RUN, async ( - event, modelRunName, pyModuleName, args, loggingLevel, taskgraphLoggingLevel, language, tabID + event, modelRunName, pyModuleName, args, tabID ) => { let investRun; let investStarted = false; let investStdErr = ''; const usageLogger = investUsageLogger(); + const loggingLevel = settingsStore.get('loggingLevel'); + const taskgraphLoggingLevel = settingsStore.get('taskgraphLoggingLevel'); + const language = settingsStore.get('language'); + const nWorkers = settingsStore.get('nWorkers'); // Write a temporary datastack json for passing to invest CLI try { @@ -64,7 +69,10 @@ export function setupInvestRunHandlers(investExe) { filepath: datastackPath, moduleName: pyModuleName, relativePaths: false, - args: JSON.stringify(args), + args: JSON.stringify({ + ...args, + n_workers: nWorkers, + }), }; await writeInvestParameters(payload); diff --git a/workbench/src/main/setupOpenLocalHtml.js b/workbench/src/main/setupOpenLocalHtml.js index 4eddf0ea81..fd83412d67 100644 --- a/workbench/src/main/setupOpenLocalHtml.js +++ b/workbench/src/main/setupOpenLocalHtml.js @@ -11,6 +11,7 @@ export default function setupOpenLocalHtml(parentWindow, isDevMode) { ipcMainChannels.OPEN_LOCAL_HTML, (event, url) => { const [width, height] = parentWindow.getSize(); const child = new BrowserWindow({ + parent: parentWindow, width: width > 1000 ? 1000 : width, // UG content is never wider height: height, frame: true, diff --git a/workbench/src/preload/api.js b/workbench/src/preload/api.js index 98bc2c6666..de3b08b0c6 100644 --- a/workbench/src/preload/api.js +++ b/workbench/src/preload/api.js @@ -35,6 +35,7 @@ export default { PORT: PORT, // where the flask app is running ELECTRON_LOG_PATH: electronLogPath, USERGUIDE_PATH: userguidePath, + LANGUAGE: ipcRenderer.sendSync(ipcMainChannels.GET_LANGUAGE), logger: { debug: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'debug', message), info: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'info', message), diff --git a/workbench/src/renderer/InvestJob.js b/workbench/src/renderer/InvestJob.js index 5f5ab66e9b..59a1fc13ff 100644 --- a/workbench/src/renderer/InvestJob.js +++ b/workbench/src/renderer/InvestJob.js @@ -58,8 +58,8 @@ export default class InvestJob { const lastKey = sortedJobHashes.pop(); investJobStore.removeItem(lastKey); } - await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes); await investJobStore.setItem(job.hash, job); + await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes); return InvestJob.getJobStore(); } diff --git a/workbench/src/renderer/app.jsx b/workbench/src/renderer/app.jsx index a43b249c86..bc05cb38ed 100644 --- a/workbench/src/renderer/app.jsx +++ b/workbench/src/renderer/app.jsx @@ -1,5 +1,6 @@ import React from 'react'; import PropTypes from 'prop-types'; +import i18n from 'i18next'; import TabPane from 'react-bootstrap/TabPane'; import TabContent from 'react-bootstrap/TabContent'; @@ -19,14 +20,9 @@ import InvestTab from './components/InvestTab'; import SettingsModal from './components/SettingsModal'; import DataDownloadModal from './components/DataDownloadModal'; import DownloadProgressBar from './components/DownloadProgressBar'; -import { - saveSettingsStore, getAllSettings, -} from './components/SettingsModal/SettingsStorage'; import { getInvestModelNames } from './server_requests'; import InvestJob from './InvestJob'; import { dragOverHandlerNone } from './utils'; -import { ipcMainChannels } from '../main/ipcMainChannels'; -import i18n from 'i18next'; const { ipcRenderer } = window.Workbench.electron; @@ -43,18 +39,15 @@ export default class App extends React.Component { openJobs: {}, investList: null, recentJobs: [], - investSettings: null, showDownloadModal: false, downloadedNofN: null, }; - this.saveSettings = this.saveSettings.bind(this); this.switchTabs = this.switchTabs.bind(this); this.openInvestModel = this.openInvestModel.bind(this); this.closeInvestModel = this.closeInvestModel.bind(this); this.updateJobProperties = this.updateJobProperties.bind(this); this.saveJob = this.saveJob.bind(this); this.clearRecentJobs = this.clearRecentJobs.bind(this); - this.storeDownloadDir = this.storeDownloadDir.bind(this); this.showDownloadModal = this.showDownloadModal.bind(this); } @@ -62,17 +55,17 @@ export default class App extends React.Component { async componentDidMount() { const investList = await getInvestModelNames(); const recentJobs = await InvestJob.getJobStore(); - const investSettings = await getAllSettings(); this.setState({ investList: investList, - recentJobs: recentJobs, - investSettings: investSettings, + // filter out models that do not exist in current version of invest + recentJobs: recentJobs.filter((job) => ( + Object.values(investList) + .map((m) => m.model_name) + .includes(job.modelRunName) + )), showDownloadModal: this.props.isFirstRun, }); - await i18n.changeLanguage(investSettings.language); - await ipcRenderer.invoke( - ipcMainChannels.CHANGE_LANGUAGE, investSettings.language - ); + await i18n.changeLanguage(window.Workbench.LANGUAGE); ipcRenderer.on('download-status', (downloadedNofN) => { this.setState({ downloadedNofN: downloadedNofN, @@ -94,33 +87,6 @@ export default class App extends React.Component { ); } - async saveSettings(settings) { - const { investSettings } = this.state; - await saveSettingsStore(settings); - this.setState({ investSettings: settings }); - // if language has changed, refresh the app - if (settings.language !== investSettings.language) { - // change language in the renderer process - await i18n.changeLanguage(settings.language); - // change language in the main process - await ipcRenderer.invoke( - ipcMainChannels.CHANGE_LANGUAGE, settings.language - ); - // rerender for changes to take effect - window.location.reload(); - } - } - - /** Store a sampledata filepath in localforage. - * - * @param {string} dir - the path to the user-selected dir - */ - storeDownloadDir(dir) { - const { investSettings } = this.state; - investSettings.sampleDataDir = dir; - this.saveSettings(investSettings); - } - showDownloadModal(shouldShow) { this.setState({ showDownloadModal: shouldShow, @@ -211,7 +177,6 @@ export default class App extends React.Component { render() { const { investList, - investSettings, recentJobs, openJobs, openTabIDs, @@ -260,7 +225,17 @@ export default class App extends React.Component { key={id} className={id === activeTab ? 'active' : ''} > - + { + event.stopPropagation(); + event.preventDefault(); + if (event.button === 1) { + // middle mouse button clicked, close tab + this.closeInvestModel(id); + } + }} + > {statusSymbol} {` ${job.modelHumanName}`} @@ -288,7 +263,6 @@ export default class App extends React.Component { @@ -301,7 +275,6 @@ export default class App extends React.Component { this.showDownloadModal(false)} - storeDownloadDir={this.storeDownloadDir} /> } - { - // don't render until after we fetched the data - (investSettings) - ? ( - this.showDownloadModal(true)} - nCPU={this.props.nCPU} - /> - ) - :
- } + this.showDownloadModal(true)} + nCPU={this.props.nCPU} + /> diff --git a/workbench/src/renderer/components/DataDownloadModal/index.jsx b/workbench/src/renderer/components/DataDownloadModal/index.jsx index 2fc064c8fa..e55a0192ce 100644 --- a/workbench/src/renderer/components/DataDownloadModal/index.jsx +++ b/workbench/src/renderer/components/DataDownloadModal/index.jsx @@ -109,7 +109,6 @@ class DataDownloadModal extends React.Component { this.state.selectedLinksArray, data.filePaths[0] ); - this.props.storeDownloadDir(data.filePaths[0]); this.closeDialog(); } } @@ -283,7 +282,6 @@ class DataDownloadModal extends React.Component { DataDownloadModal.propTypes = { show: PropTypes.bool.isRequired, closeModal: PropTypes.func.isRequired, - storeDownloadDir: PropTypes.func.isRequired, }; -export default withTranslation()(DataDownloadModal) +export default withTranslation()(DataDownloadModal); diff --git a/workbench/src/renderer/components/HomeTab/index.jsx b/workbench/src/renderer/components/HomeTab/index.jsx index 32b6d5f745..ec12bfc60f 100644 --- a/workbench/src/renderer/components/HomeTab/index.jsx +++ b/workbench/src/renderer/components/HomeTab/index.jsx @@ -11,6 +11,8 @@ import { useTranslation } from 'react-i18next'; import OpenButton from '../OpenButton'; import InvestJob from '../../InvestJob'; +const { logger } = window.Workbench; + /** * Renders a table of buttons for each invest model and * a list of cards for each cached invest job. @@ -110,46 +112,51 @@ HomeTab.propTypes = { */ function RecentInvestJobs(props) { const { recentJobs, openInvestModel } = props; - const handleClick = (jobMetadata) => { - openInvestModel(new InvestJob(jobMetadata)); - } const { t, i18n } = useTranslation(); - // Buttons to load each recently saved state + const handleClick = (jobMetadata) => { + try { + openInvestModel(new InvestJob(jobMetadata)); + } catch (error) { + logger.debug(error); + } + }; + const recentButtons = []; recentJobs.forEach((job) => { - if (!job.argsValues) { return; } - recentButtons.push( - handleClick(job)} - > - - - {job.modelHumanName} - - - {'Workspace: '} - {job.argsValues.workspace_dir} - - - {'Suffix: '} - {job.argsValues.results_suffix} - - - {job.humanTime} - - {(job.status === 'success' - ? {t('Model Complete')} - : {job.status} - )} - - - - - ); + if (job && job.argsValues && job.modelHumanName) { + recentButtons.push( + handleClick(job)} + > + + + {job.modelHumanName} + + + {'Workspace: '} + {job.argsValues.workspace_dir} + + + {'Suffix: '} + {job.argsValues.results_suffix} + + + {job.humanTime} + + {(job.status === 'success' + ? {t('Model Complete')} + : {job.status} + )} + + + + + ); + } }); return ( diff --git a/workbench/src/renderer/components/InvestTab/index.jsx b/workbench/src/renderer/components/InvestTab/index.jsx index e8036387d8..0c250a4e3a 100644 --- a/workbench/src/renderer/components/InvestTab/index.jsx +++ b/workbench/src/renderer/components/InvestTab/index.jsx @@ -147,7 +147,6 @@ class InvestTab extends React.Component { const { job, tabID, - investSettings, updateJobProperties, } = this.props; const args = { ...argsValues }; @@ -162,9 +161,6 @@ class InvestTab extends React.Component { job.modelRunName, this.state.modelSpec.pyname, args, - investSettings.loggingLevel, - investSettings.taskgraphLoggingLevel, - investSettings.language, tabID ); this.switchTabs('log'); @@ -205,7 +201,7 @@ class InvestTab extends React.Component { logfile, } = this.props.job; - const { tabID, investSettings, t } = this.props; + const { tabID, t } = this.props; // Don't render the model setup & log until data has been fetched. if (!modelSpec) { @@ -279,7 +275,6 @@ class InvestTab extends React.Component { uiSpec={uiSpec} argsInitValues={argsValues} investExecute={this.investExecute} - nWorkers={investSettings.nWorkers} sidebarSetupElementId={sidebarSetupElementId} sidebarFooterElementId={sidebarFooterElementId} executeClicked={executeClicked} @@ -313,12 +308,6 @@ InvestTab.propTypes = { status: PropTypes.string, }).isRequired, tabID: PropTypes.string.isRequired, - investSettings: PropTypes.shape({ - nWorkers: PropTypes.string, - taskgraphLoggingLevel: PropTypes.string, - loggingLevel: PropTypes.string, - language: PropTypes.string, - }).isRequired, saveJob: PropTypes.func.isRequired, updateJobProperties: PropTypes.func.isRequired, }; diff --git a/workbench/src/renderer/components/OpenButton/index.jsx b/workbench/src/renderer/components/OpenButton/index.jsx index 154736a2ce..ed5945d522 100644 --- a/workbench/src/renderer/components/OpenButton/index.jsx +++ b/workbench/src/renderer/components/OpenButton/index.jsx @@ -11,6 +11,7 @@ import { fetchDatastackFromFile } from '../../server_requests'; import { ipcMainChannels } from '../../../main/ipcMainChannels'; const { ipcRenderer } = window.Workbench.electron; +const { logger } = window.Workbench; /** * Render a button that loads args from a datastack, parameterset, or logfile. @@ -23,9 +24,22 @@ class OpenButton extends React.Component { } async browseFile() { + const { t } = this.props; const data = await ipcRenderer.invoke(ipcMainChannels.SHOW_OPEN_DIALOG); if (!data.canceled) { - const datastack = await fetchDatastackFromFile(data.filePaths[0]); + let datastack; + try { + datastack = await fetchDatastackFromFile(data.filePaths[0]); + } catch (error) { + logger.error(error); + alert( + t( + 'No InVEST model data can be parsed from the file:\n {{filepath}}', + { filepath: data.filePaths[0] } + ) + ); + return; + } const job = new InvestJob({ modelRunName: datastack.model_run_name, modelHumanName: datastack.model_human_name, diff --git a/workbench/src/renderer/components/ResourcesLinks/index.jsx b/workbench/src/renderer/components/ResourcesLinks/index.jsx index d527229b85..332ceba943 100644 --- a/workbench/src/renderer/components/ResourcesLinks/index.jsx +++ b/workbench/src/renderer/components/ResourcesLinks/index.jsx @@ -75,7 +75,7 @@ export default function ResourcesTab(props) { } const { t, i18n } = useTranslation(); - const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${docs}`; + const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${docs}`; return ( diff --git a/workbench/src/renderer/components/SettingsModal/SettingsStorage.js b/workbench/src/renderer/components/SettingsModal/SettingsStorage.js deleted file mode 100644 index 866e182720..0000000000 --- a/workbench/src/renderer/components/SettingsModal/SettingsStorage.js +++ /dev/null @@ -1,83 +0,0 @@ -import localforage from 'localforage'; - -const { logger } = window.Workbench; - -const investSettingsStore = localforage.createInstance({ - name: 'InvestSettings', -}); - -/** Getter function for global default settings. - * - * @returns {object} to destructure into: - * {String} nWorkers - TaskGraph number of workers - * {String} taskgraphLoggingLevel - InVEST taskgraph logging level - * {String} loggingLevel - InVEST model logging level - * {String} sampleDataDir - default location for sample datastack downloads - */ -export function getDefaultSettings() { - const defaultSettings = { - nWorkers: '-1', - taskgraphLoggingLevel: 'INFO', - loggingLevel: 'INFO', - sampleDataDir: '', - language: 'en' - }; - return defaultSettings; -} - -/** Getter function for settings store value. - * - * @param {object} obj.argsValues - an invest "args dict" with initial values - * @param {string} key - setting key to get value - * - * @returns {string} - value of the setting key. - */ -export async function getSettingsValue(key) { - const value = await investSettingsStore.getItem(key); - if (!value) { - return getDefaultSettings()[key]; - } - return value; -} - -/** Getter function for entire contents of store. - * - * @returns {Object} - key: value pairs of settings - */ -export async function getAllSettings() { - try { - const promises = []; - const keys = Object.keys(getDefaultSettings()); - keys.forEach((key) => { - promises.push(getSettingsValue(key)); - }); - const values = await Promise.all(promises); - const settings = Object.fromEntries(keys.map( - (_, i) => [keys[i], values[i]] - )); - return settings; - } catch (err) { - logger.error(err.message); - return getDefaultSettings(); - } -} - -/** Clear the settings store. */ -export async function clearSettingsStore() { - await investSettingsStore.clear(); -} - -/** Setter function for saving store values. - * - * @param {object} settingsObj - object with one or more key:value pairs - * - */ -export async function saveSettingsStore(settingsObj) { - try { - for (const [setting, value] of Object.entries(settingsObj)) { - await investSettingsStore.setItem(setting, value); - } - } catch (err) { - logger.error(`Error saving settings: ${err}`); - } -} diff --git a/workbench/src/renderer/components/SettingsModal/index.jsx b/workbench/src/renderer/components/SettingsModal/index.jsx index b3972c43fc..ff5676f619 100644 --- a/workbench/src/renderer/components/SettingsModal/index.jsx +++ b/workbench/src/renderer/components/SettingsModal/index.jsx @@ -16,7 +16,6 @@ import { import { BsChevronExpand } from 'react-icons/bs'; import { withTranslation } from 'react-i18next'; -import { getDefaultSettings } from './SettingsStorage'; import { ipcMainChannels } from '../../../main/ipcMainChannels'; import { getSupportedLanguages } from '../../server_requests'; @@ -29,11 +28,18 @@ class SettingsModal extends React.Component { this.state = { show: false, languageOptions: null, + loggingLevel: null, + taskgraphLoggingLevel: null, + nWorkers: null, + language: window.Workbench.LANGUAGE, + showConfirmLanguageChange: false, }; this.handleShow = this.handleShow.bind(this); this.handleClose = this.handleClose.bind(this); this.handleChange = this.handleChange.bind(this); - this.handleReset = this.handleReset.bind(this); + this.handleChangeNumber = this.handleChangeNumber.bind(this); + this.loadSettings = this.loadSettings.bind(this); + this.handleChangeLanguage = this.handleChangeLanguage.bind(this); this.switchToDownloadModal = this.switchToDownloadModal.bind(this); } @@ -42,6 +48,7 @@ class SettingsModal extends React.Component { this.setState({ languageOptions: languageOptions, }); + this.loadSettings(); } handleClose() { @@ -54,17 +61,40 @@ class SettingsModal extends React.Component { this.setState({ show: true }); } - handleReset(event) { - event.preventDefault(); - const resetSettings = getDefaultSettings(); - this.props.saveSettings(resetSettings); + handleChange(event) { + const { name, value } = event.currentTarget; + this.setState({ [name]: value }); + ipcRenderer.send(ipcMainChannels.SET_SETTING, name, value); } - handleChange(event) { - const newSettings = { ...this.props.investSettings }; + handleChangeNumber(event) { const { name, value } = event.currentTarget; - newSettings[name] = value; - this.props.saveSettings(newSettings); + const numeral = Number(value); + this.setState({ [name]: numeral }); + ipcRenderer.send(ipcMainChannels.SET_SETTING, name, numeral); + } + + async loadSettings() { + const loggingLevel = await ipcRenderer + .invoke(ipcMainChannels.GET_SETTING, 'loggingLevel'); + const taskgraphLoggingLevel = await ipcRenderer + .invoke(ipcMainChannels.GET_SETTING, 'taskgraphLoggingLevel'); + const nWorkers = await ipcRenderer + .invoke(ipcMainChannels.GET_SETTING, 'nWorkers'); + this.setState({ + loggingLevel: loggingLevel, + taskgraphLoggingLevel: taskgraphLoggingLevel, + nWorkers: nWorkers + }); + } + + handleChangeLanguage() { + // if language has changed, refresh the app + if (this.state.language !== window.Workbench.LANGUAGE) { + // tell the main process to update the language setting in storage + // and then relaunch the app + ipcRenderer.invoke(ipcMainChannels.CHANGE_LANGUAGE, this.state.language); + } } switchToDownloadModal() { @@ -73,21 +103,29 @@ class SettingsModal extends React.Component { } render() { - const { show, languageOptions } = this.state; - const { investSettings, clearJobsStorage, nCPU, t } = this.props; + const { + show, + languageOptions, + language, + loggingLevel, + taskgraphLoggingLevel, + nWorkers, + showConfirmLanguageChange, + } = this.state; + const { clearJobsStorage, nCPU, t } = this.props; const nWorkersOptions = [ [-1, `${t('Synchronous')} (-1)`], - [0, `${t('Threaded task management')} (0)`] + [0, `${t('Threaded task management')} (0)`], ]; for (let i = 1; i <= nCPU; i += 1) { nWorkersOptions.push([i, `${i} ${t('CPUs')}`]); } - const logLevelOptions = { // map value to display name - 'DEBUG': t('DEBUG'), - 'INFO': t('INFO'), - 'WARNING': t('WARNING'), - 'ERROR': t('ERROR') + const logLevelOptions = { // map value to display name + DEBUG: t('DEBUG'), + INFO: t('INFO'), + WARNING: t('WARNING'), + ERROR: t('ERROR'), }; return ( @@ -124,18 +162,18 @@ class SettingsModal extends React.Component { {t('Language')} - - - {t('Changing this setting will refresh the app and close all tabs')} - this.setState({ + showConfirmLanguageChange: true, + language: event.target.value + })} > {Object.entries(languageOptions).map((entry) => { const [value, displayName] = entry; @@ -155,7 +193,7 @@ class SettingsModal extends React.Component { id="logging-select" as="select" name="loggingLevel" - value={investSettings.loggingLevel} + value={loggingLevel} onChange={this.handleChange} > {Object.entries(logLevelOptions).map( @@ -173,7 +211,7 @@ class SettingsModal extends React.Component { id="taskgraph-logging-select" as="select" name="taskgraphLoggingLevel" - value={investSettings.taskgraphLoggingLevel} + value={taskgraphLoggingLevel} onChange={this.handleChange} > {Object.entries(logLevelOptions).map( @@ -197,8 +235,8 @@ class SettingsModal extends React.Component { as="select" name="nWorkers" type="text" - value={investSettings.nWorkers} - onChange={this.handleChange} + value={nWorkers} + onChange={this.handleChangeNumber} > {nWorkersOptions.map( (opt) => @@ -233,18 +271,6 @@ class SettingsModal extends React.Component { ) :
} - - - - -
+ + + + ) : + } ); } } SettingsModal.propTypes = { - saveSettings: PropTypes.func.isRequired, clearJobsStorage: PropTypes.func.isRequired, - investSettings: PropTypes.shape({ - nWorkers: PropTypes.string, - taskgraphLoggingLevel: PropTypes.string, - loggingLevel: PropTypes.string, - sampleDataDir: PropTypes.string, - language: PropTypes.string, - }).isRequired, showDownloadModal: PropTypes.func.isRequired, nCPU: PropTypes.number.isRequired, }; diff --git a/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx b/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx index bae30995cc..e190419096 100644 --- a/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx +++ b/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx @@ -30,7 +30,8 @@ const { ipcRenderer } = window.Workbench.electron; * @returns {string} - the filtered and formatted part of the message */ function filterSpatialOverlapFeedback(message, filepath) { - const newPrefix = i18n.t('Bounding box does not intersect at least one other:'); + const newPrefix = i18n.t( + 'Not all of the spatial layers overlap each other. Bounding box:'); const bbox = message.split(`${filepath}:`).pop().split('|')[0]; const bboxFormatted = bbox.split(' ').map( (str) => str.padEnd(22, ' ') @@ -167,7 +168,7 @@ export default function ArgInput(props) { // Messages with this pattern include validation feedback about // multiple inputs, but the whole message is repeated for each input. // It's more readable if filtered on the individual input. - const pattern = 'Bounding boxes do not intersect'; + const pattern = 'Not all of the spatial layers overlap each other'; if (validationMessage.startsWith(pattern)) { validationMessage = filterSpatialOverlapFeedback( validationMessage, value @@ -362,7 +363,7 @@ function AboutModal(props) { // create link to users guide entry for this arg // anchor name is the arg name, with underscores replaced with hyphens const userguideURL = ` - ${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${userguide}#${argkey.replace(/_/g, '-')}`; + ${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${userguide}#${argkey.replace(/_/g, '-')}`; return (