diff --git a/contributors/tobin-ford.md b/contributors/tobin-ford.md new file mode 100644 index 00000000..6a71060c --- /dev/null +++ b/contributors/tobin-ford.md @@ -0,0 +1,12 @@ +2024-08-01 + +I hereby agree to the terms of the Contributors License Agreement, +version 1.0, with MD5 checksum 46ea45f996295bdb0652333d516a6d0a. + +I furthermore declare that I am authorized and able to make this +agreement and sign this declaration. + +Signed, + +Tobin Ford https://github.com/tobin-ford + diff --git a/pvdeg/geospatial.py b/pvdeg/geospatial.py index a7c67722..c13880a3 100644 --- a/pvdeg/geospatial.py +++ b/pvdeg/geospatial.py @@ -285,6 +285,8 @@ def output_template( # we should be able to get rid of this with the new autotemplating function and decorator +# this is helpful for users so we should move it to a section in the documenation, +# discuss with group def template_parameters(func): """ Output parameters for xarray template. diff --git a/pvdeg/utilities.py b/pvdeg/utilities.py index 96073709..e5f448b2 100644 --- a/pvdeg/utilities.py +++ b/pvdeg/utilities.py @@ -12,6 +12,7 @@ import xarray as xr from subprocess import run + def gid_downsampling(meta, n): """ Downsample the NSRDB GID grid by a factor of n @@ -218,215 +219,216 @@ def _get_state(id): state_name = state_dict[id] return state_name + def get_state_bbox( abbr: str = None, -)-> np.ndarray: +) -> np.ndarray: """ Retrieve the top left and bottom right coordinate pairs for state bounding boxes. """ bbox_dict = { - 'Alabama': [ + "Alabama": [ [-84.8882446289062, 35.0080299377441], [-88.4731369018555, 30.1375217437744], - ], - 'Alaska': [ + ], + "Alaska": [ [-129.9795, 71.4410], [-179.1505, 51.2097], - ], - 'Arizona': [ + ], + "Arizona": [ [-109.045196533203, 37.0042610168457], [-114.818359375, 31.3321762084961], - ], - 'Arkansas': [ + ], + "Arkansas": [ [-89.6422424316406, 36.4996032714844], [-94.6178131103516, 33.0041046142578], - ], - 'California': [ + ], + "California": [ [-114.13077545166, 42.0095024108887], [-124.482009887695, 32.5295219421387], - ], - 'Colorado': [ + ], + "Colorado": [ [-102.041580200195, 41.0023612976074], [-109.060256958008, 36.9924240112305], - ], - 'Connecticut': [ + ], + "Connecticut": [ [-71.7869873046875, 42.0505905151367], [-73.7277755737305, 40.9667053222656], - ], - 'Delaware': [ + ], + "Delaware": [ [-74.9846343994141, 39.8394355773926], [-75.7890472412109, 38.4511260986328], - ], - 'District Of Columbia': [ + ], + "District Of Columbia": [ [-76.8369, 39.1072], [-77.2369, 38.7072], - ], - 'Florida': [ + ], + "Florida": [ [-79.9743041992188, 31.0009689331055], [-87.6349029541016, 24.3963069915771], - ], - 'Georgia': [ + ], + "Georgia": [ [-80.7514266967773, 35.0008316040039], [-85.6051712036133, 30.3557567596436], - ], - 'Hawaii': [ + ], + "Hawaii": [ [-154.8066, 22.2356], [160.2471, 189117], - ], - 'Idaho': [ + ], + "Idaho": [ [-111.043563842773, 49.000846862793], [-117.243034362793, 41.9880561828613], - ], - 'Illinois': [ + ], + "Illinois": [ [-87.0199203491211, 42.5083045959473], [-91.513053894043, 36.9701309204102], - ], - 'Indiana': [ + ], + "Indiana": [ [-84.7845764160156, 41.7613716125488], [-88.0997085571289, 37.7717399597168], - ], - 'Iowa': [ + ], + "Iowa": [ [-90.1400604248047, 43.5011367797852], [-96.6397171020508, 40.3755989074707], - ], - 'Kansas': [ + ], + "Kansas": [ [-94.5882034301758, 40.0030975341797], [-102.0517578125, 36.9930801391602], - ], - 'Kentucky': [ + ], + "Kentucky": [ [-81.9645385742188, 39.1474609375], [-89.5715103149414, 36.4967155456543], - ], - 'Louisiana': [ + ], + "Louisiana": [ [-88.817008972168, 33.019458770752], [-94.0431518554688, 28.9210300445557], - ], - 'Maine': [ + ], + "Maine": [ [-66.9250717163086, 47.4598426818848], [-71.0841751098633, 42.9561233520508], - ], - 'Maryland': [ + ], + "Maryland": [ [-75.0395584106445, 39.7229347229004], [-79.4871978759766, 37.8856391906738], - ], - 'Massachusetts': [ + ], + "Massachusetts": [ [-69.8615341186523, 42.8867149353027], [-73.5081481933594, 41.1863288879395], - ], - 'Michigan': [ + ], + "Michigan": [ [-82.122802734375, 48.3060646057129], [-90.4186248779297, 41.6960868835449], - ], - 'Minnesota': [ + ], + "Minnesota": [ [-89.4833831787109, 49.3844909667969], [-97.2392654418945, 43.4994277954102], - ], - 'Mississippi': [ + ], + "Mississippi": [ [-88.0980072021484, 34.9960556030273], [-91.6550140380859, 30.1477890014648], - ], - 'Missouri': [ + ], + "Missouri": [ [-89.0988388061523, 40.6136360168457], [-95.7741470336914, 35.9956817626953], - ], - 'Montana': [ + ], + "Montana": [ [-104.039558410645, 49.0011100769043], [-116.050003051758, 44.3582191467285], - ], - 'Nebraska': [ + ], + "Nebraska": [ [-95.3080520629883, 43.0017013549805], [-104.053520202637, 39.9999961853027], - ], - 'Nevada': [ + ], + "Nevada": [ [-114.039642333984, 42.0022087097168], [-120.005729675293, 35.0018730163574], - ], - 'New Hampshire': [ + ], + "New Hampshire": [ [-70.534065246582, 45.3057823181152], [-72.55712890625, 42.6970405578613], - ], - 'New Jersey': [ + ], + "New Jersey": [ [-73.8850555419922, 41.3574256896973], [-75.5633926391602, 38.7887535095215], - ], - 'New Mexico': [ + ], + "New Mexico": [ [-103.000862121582, 37.0001411437988], [-109.050178527832, 31.3323001861572], - ], - 'New York': [ + ], + "New York": [ [-71.8527069091797, 45.0158615112305], [-79.7625122070312, 40.4773979187012], - ], - 'North Carolina': [ + ], + "North Carolina": [ [-75.4001159667969, 36.5880393981934], [-84.3218765258789, 33.7528762817383], - ], - 'North Dakota': [ + ], + "North Dakota": [ [-96.5543899536133, 49.0004920959473], [-104.049270629883, 45.9350357055664], - ], - 'Ohio': [ + ], + "Ohio": [ [-80.5189895629883, 42.3232383728027], [-84.8203430175781, 38.4031982421875], - ], - 'Oklahoma': [ + ], + "Oklahoma": [ [-94.4312133789062, 37.0021362304688], [-103.002571105957, 33.6191940307617], - ], - 'Oregon': [ + ], + "Oregon": [ [-116.463500976562, 46.2991027832031], [-124.703544616699, 41.9917907714844], - ], - 'Pennsylvania': [ + ], + "Pennsylvania": [ [-74.6894989013672, 42.5146903991699], [-80.5210876464844, 39.7197647094727], - ], - 'Rhode Island': [ + ], + "Rhode Island": [ [-71.1204681396484, 42.018856048584], [-71.9070053100586, 41.055534362793], - ], - 'South Carolina': [ + ], + "South Carolina": [ [-78.4992980957031, 35.2155418395996], [-83.35400390625, 32.0333099365234], - ], - 'South Dakota': [ + ], + "South Dakota": [ [-96.4363327026367, 45.9454536437988], [-104.05770111084, 42.4798889160156], - ], - 'Tennessee': [ + ], + "Tennessee": [ [-81.6468963623047, 36.6781196594238], [-90.310302734375, 34.9829788208008], - ], - 'Texas': [ + ], + "Texas": [ [-93.5078201293945, 36.5007057189941], [-106.645652770996, 25.8370609283447], - ], - 'Utah': [ + ], + "Utah": [ [-109.041069030762, 42.0013885498047], [-114.053932189941, 36.9979667663574], - ], - 'Vermont': [ + ], + "Vermont": [ [-71.4653549194336, 45.0166664123535], [-73.437744140625, 42.7269325256348], - ], - 'Virginia': [ + ], + "Virginia": [ [-75.2312240600586, 39.4660148620605], [-83.6754150390625, 36.5407867431641], - ], - 'Washington': [ + ], + "Washington": [ [-116.917427062988, 49.00244140625], [-124.836097717285, 45.5437202453613], - ], - 'West Virginia': [ + ], + "West Virginia": [ [-77.7190246582031, 40.638801574707], [-82.6447448730469, 37.2014808654785], - ], - 'Wisconsin': [ + ], + "Wisconsin": [ [-104.052154541016, 45.0034217834473], [-111.05689239502, 40.9948768615723], - ] - } + ], + } name = _get_state(abbr) return np.array(bbox_dict[name]) @@ -692,16 +694,15 @@ def tilt_azimuth_scan( print("\r", end="") return tilt_azimuth_series -def _meta_df_from_csv( - file_paths : list[str] - ): + +def _meta_df_from_csv(file_paths: list[str]): """ Helper Function: Create csv dataframe from list of files in string form [Or Directory (not functional yet)] Parameters ---------- file_paths : list[str] - List of local weather csv files to strip metadata from. For example: download a collection of weather files from the NSRDB web viewer. + List of local weather csv files to strip metadata from. For example: download a collection of weather files from the NSRDB web viewer. Returns ------- @@ -712,10 +713,9 @@ def _meta_df_from_csv( # list[path] instead of just string # or a directory, just use csv from provided directory - def read_meta(path): df = pd.read_csv(path, nrows=1) - listed = df.to_dict(orient='list') + listed = df.to_dict(orient="list") stripped = {key: value[0] for key, value in listed.items()} return stripped @@ -729,22 +729,23 @@ def read_meta(path): # correct level of precision?? conversions = { - 'Location ID' : np.int32, - 'Latitude' : np.double, - 'Longitude' : np.double, - 'Time Zone' : np.int8, - 'Elevation' : np.int16, - 'Local Time Zone' : np.int8 + "Location ID": np.int32, + "Latitude": np.double, + "Longitude": np.double, + "Time Zone": np.int8, + "Elevation": np.int16, + "Local Time Zone": np.int8, } metadata_df = metadata_df.astype(conversions) return metadata_df + def _weather_ds_from_csv( - file_paths : list[str], - year : int, - # select year, should be able to provide single year, or list of years - ): + file_paths: list[str], + year: int, + # select year, should be able to provide single year, or list of years +): """ Helper Function: Create a geospatial xarray dataset from local csv files. @@ -754,20 +755,20 @@ def _weather_ds_from_csv( Returns ---------- """ -# ds = xr.open_dataset( -# fp, -# engine="h5netcdf", -# phony_dims="sort", -# chunks={"phony_dim_0": chunks[0], "phony_dim_1": chunks[1]}, -# drop_variables=drop_variables, -# mask_and_scale=False, -# decode_cf=True, -# ) + # ds = xr.open_dataset( + # fp, + # engine="h5netcdf", + # phony_dims="sort", + # chunks={"phony_dim_0": chunks[0], "phony_dim_1": chunks[1]}, + # drop_variables=drop_variables, + # mask_and_scale=False, + # decode_cf=True, + # ) # PROBLEM: all csv do not contain all years but these all appear to have 2004 # when missing years, xarray will see mismatched coordinates and populate all these values with nan # this is wrong we are using tmy so we ignore the year as it represents a typical meteorological year - + # Prepare a list to hold the DataFrames dataframes = [] @@ -775,31 +776,42 @@ def _weather_ds_from_csv( for file_path in file_paths: # Extract GID from the filename header = pd.read_csv(file_path, nrows=1) - gid = header['Location ID'][0] - + gid = header["Location ID"][0] + # Read the CSV, skipping rows to get to the relevant data df = pd.read_csv(file_path, skiprows=2) - + # Add GID and Time columns - df['gid'] = gid + df["gid"] = gid - df['time'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour', 'Minute']]) + df["time"] = pd.to_datetime(df[["Year", "Month", "Day", "Hour", "Minute"]]) # make allow this to take list of years - df = df[df['time'].dt.year == year] + df = df[df["time"].dt.year == year] - # add generic approach, dont manually do this, could change based on user selections + # add generic approach, dont manually do this, could change based on user selections # Select relevant columns and append to the list # df = df[['gid', 'time', 'GHI', 'Temperature', 'DHI', 'DNI', 'Surface Albedo', 'Wind Direction', 'Wind Speed']] - df = df[['gid', 'time', 'GHI', 'Temperature', 'DHI', 'DNI', 'Surface Albedo', 'Wind Speed']] + df = df[ + [ + "gid", + "time", + "GHI", + "Temperature", + "DHI", + "DNI", + "Surface Albedo", + "Wind Speed", + ] + ] dataframes.append(df) # Combine all DataFrames into one combined_df = pd.concat(dataframes) # Convert the combined DataFrame to an xarray Dataset - weather_ds = combined_df.set_index(['time', 'gid']).to_xarray() + weather_ds = combined_df.set_index(["time", "gid"]).to_xarray() # combined_df = combined_df.set_index(['time', 'gid']).sort_index() # weather_ds = combined_df.set_index(['time', 'gid']).to_xarray() @@ -809,27 +821,29 @@ def _weather_ds_from_csv( # DHI (gid, time) int64 12kB 0 0 0 0 0 0 0 ... 56 113 94 129 54 20 # DNI (gid, time) int64 12kB 0 0 0 0 0 0 ... 1004 718 728 337 0 0 # Surface Albedo (gid, time) float64 12kB 0.8 0.8 0.8 0.8 ... 0.8 0.8 0.8 0.8 - # Wind Speed - - weather_ds = weather_ds.rename_vars({ - 'GHI' : 'ghi', - 'Temperature' : 'temp_air', - 'DHI' : 'dhi', - 'DNI' : 'dni', - 'Wind Speed' : 'wind_speed', - }) + # Wind Speed + + weather_ds = weather_ds.rename_vars( + { + "GHI": "ghi", + "Temperature": "temp_air", + "DHI": "dhi", + "DNI": "dni", + "Wind Speed": "wind_speed", + } + ) return weather_ds + # not functional def geospatial_from_csv( - file_path : list[str], - year : int # should be able to take a range of years - ): + file_path: list[str], year: int # should be able to take a range of years +): """ - Create an xarray dataset contaning aeospatial weather data and a pandas dataframe - containing geospatial metadata from a list of local csv files. - + Create an xarray dataset contaning aeospatial weather data and a pandas dataframe + containing geospatial metadata from a list of local csv files. + Useful for importing data from NSRDB api viewer https://nsrdb.nrel.gov/data-viewer when downloaded locally as csv @@ -838,43 +852,46 @@ def geospatial_from_csv( file_path : list[str] List of absolute paths to csv files in string form. year : int - Single year of data to use from local csv files. + Single year of data to use from local csv files. """ - weather_ds, meta_df = _weather_ds_from_csv(file_path, year), _meta_df_from_csv(file_path) + weather_ds, meta_df = _weather_ds_from_csv(file_path, year), _meta_df_from_csv( + file_path + ) # only want to keep meta from given file using GIDs from DS # gather included files' gids from xarray - included_gids = weather_ds.coords['gid'].values + included_gids = weather_ds.coords["gid"].values # filter the metadate to only include gid values found above - filtered_meta = meta_df[meta_df['Location ID'].isin(included_gids)] + filtered_meta = meta_df[meta_df["Location ID"].isin(included_gids)] # reset the indecies of updated dataframe (might not be nessecary) filtered_meta = filtered_meta.reset_index(drop=True) # rename Location ID column to gid - filtered_meta = filtered_meta.rename({'Location ID' : 'gid'}, axis="columns") + filtered_meta = filtered_meta.rename({"Location ID": "gid"}, axis="columns") return weather_ds, filtered_meta + def strip_normalize_tmy(df, start_time, end_time): """ - Normalize the DataFrame to start at 00:00 and extract the data between the + Normalize the DataFrame to start at 00:00 and extract the data between the specified start and end times. Then shift back to the original indexes. - + Parameters: ----------- df : pd.Dataframe dataframe with a datetime index and tmy data - start_time : datetime.datetime + start_time : datetime.datetime start time - end_time : datetime.datetime + end_time : datetime.datetime end time - + Returns: -------- - sub_results : pd.DataFrame + sub_results : pd.DataFrame extracted subset of tmy data """ @@ -883,16 +900,25 @@ def strip_normalize_tmy(df, start_time, end_time): end_time = end_time.replace(tzinfo=tz) initial_time = df.index[0] - shifted_index = df.index - pd.DateOffset(hours=initial_time.hour, minutes=initial_time.minute, seconds=initial_time.second) + shifted_index = df.index - pd.DateOffset( + hours=initial_time.hour, + minutes=initial_time.minute, + seconds=initial_time.second, + ) df.index = shifted_index mask = (df.index >= start_time) & (df.index <= end_time) sub_results = df.loc[mask] - - sub_results.index = sub_results.index + pd.DateOffset(hours=initial_time.hour, minutes=initial_time.minute, seconds=initial_time.second) - + + sub_results.index = sub_results.index + pd.DateOffset( + hours=initial_time.hour, + minutes=initial_time.minute, + seconds=initial_time.second, + ) + return sub_results + def new_id(collection): """ Generate a 5 uppercase letter string unqiue from all keys in a dictionary. @@ -906,20 +932,20 @@ def new_id(collection): ------------- Unique 5 letter string of uppercase characters. """ - if not isinstance(collection, (dict,OrderedDict)): + if not isinstance(collection, (dict, OrderedDict)): raise TypeError(f"{collection.__name__} type {type(collection)} expected dict") - gen = lambda : ''.join(choices(ascii_uppercase, k = 5)) + gen = lambda: "".join(choices(ascii_uppercase, k=5)) id = gen() - while id in collection.keys(): + while id in collection.keys(): id = gen() - - return id - + + return id + + def restore_gids( - original_meta_df : pd.DataFrame, - analysis_result_ds : xr.Dataset - )->xr.Dataset: + original_meta_df: pd.DataFrame, analysis_result_ds: xr.Dataset +) -> xr.Dataset: """ Restore gids to results dataset. For desired behavior output data must have identical ordering to input data, otherwise will fail silently by @@ -938,26 +964,27 @@ def restore_gids( dataset like ``analysis_result_ds`` with new datavariable, ``gid`` holding the original gids of each result from the input metadata. Warning: if meta order is different than result ordering gids will - be assigned incorrectly. + be assigned incorrectly. """ - flattened = analysis_result_ds.stack(points=('latitude', 'longitude')) + flattened = analysis_result_ds.stack(points=("latitude", "longitude")) gids = original_meta_df.index.values # Create a DataArray with the gids and assign it to the Dataset - gids_da = xr.DataArray(gids, coords=[flattened['points']], name='gid') + gids_da = xr.DataArray(gids, coords=[flattened["points"]], name="gid") # Unstack the DataArray to match the original dimensions of the Dataset - gids_da = gids_da.unstack('points') + gids_da = gids_da.unstack("points") restored_gids_ds = analysis_result_ds.assign(gid=gids_da) return restored_gids_ds + def _find_bbox_corners(coord_1=None, coord_2=None, coords=None): """ - find the min and max latitude and longitude values from 2 lists + find the min and max latitude and longitude values from 2 lists or a tall numpy array of the shape [[lat, long], ...] Parameters: @@ -966,13 +993,13 @@ def _find_bbox_corners(coord_1=None, coord_2=None, coords=None): Top left corner of bounding box as lat-long coordinate pair as list or tuple. coord_2 : list, tuple - Bottom right corner of bounding box as lat-long coordinate pair in list + Bottom right corner of bounding box as lat-long coordinate pair in list or tuple. coords : np.array 2d tall numpy array of [lat, long] pairs. Bounding box around the most - extreme entries of the array. Alternative to providing top left and + extreme entries of the array. Alternative to providing top left and bottom right box corners. Could be used to select amongst a subset of - data points. ex) Given all points for the planet, downselect based on + data points. ex) Given all points for the planet, downselect based on the most extreme coordinates for the United States coastline information. Returns: -------- @@ -984,8 +1011,8 @@ def _find_bbox_corners(coord_1=None, coord_2=None, coords=None): lats = [coord_1[0], coord_2[0]] longs = [coord_1[1], coord_2[1]] elif coords.any(): - lats = coords[:,0] - longs = coords[:,1] + lats = coords[:, 0] + longs = coords[:, 1] min_lat, max_lat = np.min(lats), np.max(lats) min_long, max_long = np.min(longs), np.max(longs) @@ -1010,36 +1037,39 @@ def _plot_bbox_corners(ax, coord_1=None, coord_2=None, coords=None): ax.set_ylim([lats[0], lats[1]]) return + def _add_cartopy_features(ax): """ Add cartopy features to an existing matplotlib.pyplot axis. """ import cartopy.feature as cfeature + features = [ cfeature.BORDERS, cfeature.COASTLINE, cfeature.LAND, cfeature.OCEAN, cfeature.LAKES, - cfeature.RIVERS - ] + cfeature.RIVERS, + ] for i in features: if i == cfeature.BORDERS: - ax.add_feature(i, linestyle=':') + ax.add_feature(i, linestyle=":") else: ax.add_feature(i) + def _calc_elevation_weights( - elevations : np.array, - coords : np.array, - k_neighbors : int, - method : str, - normalization : str, - kdtree, - )->np.array: + elevations: np.array, + coords: np.array, + k_neighbors: int, + method: str, + normalization: str, + kdtree, +) -> np.array: """ - utility function. caluclate a weight for each point in a dataset + utility function. caluclate a weight for each point in a dataset to use for probabalistic downselection. Parameters: @@ -1051,7 +1081,7 @@ def _calc_elevation_weights( k_neighbors : int number of neighbors to use in local elevation calculation at each point method : str, (default = 'mean') - method to calculate elevation weights for each point. + method to calculate elevation weights for each point. Options : `'mean'`, `'sum'`, `'median'` normalization : str, (default = 'linear') function to apply when normalizing weights. Logarithmic uses log_e/ln @@ -1070,32 +1100,34 @@ def _calc_elevation_weights( weights = np.empty_like(elevations) for i, coord in enumerate(coords): - indicies = kdtree.query(coord.reshape(1,-1), k=k_neighbors+1)[1][0] # +1 to include current point - delta_elevation = np.abs(elevations[indicies[1:]] - elevations[i]) - - if method == 'mean': + indicies = kdtree.query(coord.reshape(1, -1), k=k_neighbors + 1)[1][ + 0 + ] # +1 to include current point + delta_elevation = np.abs(elevations[indicies[1:]] - elevations[i]) + + if method == "mean": delta = np.mean(delta_elevation) - elif method == 'sum': + elif method == "sum": delta = np.sum(delta_elevation) - elif method == 'median': + elif method == "median": delta = np.median(delta_elevation) weights[i] = delta - - if normalization == 'linear': - pass # do nothing - elif normalization == 'exponential': + if normalization == "linear": + pass # do nothing + elif normalization == "exponential": weights = np.exp(weights) - elif normalization == 'logarithmic': + elif normalization == "logarithmic": weights = np.log(weights) - normalized_weights = np.divide( - np.subtract(weights, np.min(weights)), - np.subtract(np.max(weights), np.min(weights)) - ) - + normalized_weights = np.divide( + np.subtract(weights, np.min(weights)), + np.subtract(np.max(weights), np.min(weights)), + ) + return normalized_weights + def fix_metadata(meta): """ meta gid was appearing with ('lat' : {gid: lat}, 'long' : {gid: long}), ... @@ -1106,20 +1138,21 @@ def fix_metadata(meta): meta : dict dictionary of metadata with key : dict pairs - Returns + Returns fixed_meta : dict dictionary of metadata with key : value pairs """ - fixed_metadata = {key : list(subdict.values())[0] for key, subdict in meta.items()} + fixed_metadata = {key: list(subdict.values())[0] for key, subdict in meta.items()} return fixed_metadata -# we want this to only exist for things that can be run on kestrel + +# we want this to only exist for things that can be run on kestrel # moving away from hpc tools so this may not be useful in the future def nrel_kestrel_check(): """ Check if the user is on Kestrel HPC environment. Passes silently or raises a ConnectionError if not running on Kestrel. - This will fail on AWS + This will fail on AWS Returns: -------- @@ -1131,28 +1164,21 @@ def nrel_kestrel_check(): Kestrel Documentation : https://nrel.github.io/HPC/Documentation/ """ - kestrel_hostname = 'kestrel.hpc.nrel.gov' + kestrel_hostname = "kestrel.hpc.nrel.gov" - host = run( - args=['hostname', '-f'], - shell=False, - capture_output=True, - text=True - ) - device_domain = '.'.join(host.stdout.split('.')[-4:])[:-1] + host = run(args=["hostname", "-f"], shell=False, capture_output=True, text=True) + device_domain = ".".join(host.stdout.split(".")[-4:])[:-1] if kestrel_hostname != device_domain: raise ConnectionError( f""" - connected to {device_domain} + connected to {device_domain} not a node of {kestrel_hostname}") """ - ) + ) + -def remove_scenario_filetrees( - fp, - pattern='pvd_job_*' - ): +def remove_scenario_filetrees(fp, pattern="pvd_job_*"): """ Move `cwd` to fp and remove all scenario file trees from fp directory. Permanently deletes all scenario file trees. USE WITH CAUTION. @@ -1162,7 +1188,7 @@ def remove_scenario_filetrees( fp : string file path to directory where all scenario files should be removed pattern : str - pattern to search for using glob. Default value of `pvd_job_` is + pattern to search for using glob. Default value of `pvd_job_` is equvilent to `pvd_job_*` in bash. Returns: @@ -1187,11 +1213,47 @@ def _update_pipeline_task(task): """ from importlib import import_module - module_name, func_name = task['qualified_function'].rsplit('.', 1) - params = task['params'] # need to do this to maintain ordering + module_name, func_name = task["qualified_function"].rsplit(".", 1) + params = task["params"] # need to do this to maintain ordering module = import_module(module_name) func = getattr(module, func_name) - task['job'] = func - del task['qualified_function'] - del task['params'] # maintain ordering, - task['params'] = params \ No newline at end of file + task["job"] = func + del task["qualified_function"] + del task["params"] # maintain ordering, + task["params"] = params + + +def compare_templates( + ds1: xr.Dataset, ds2: xr.Dataset, atol=1e-10, consider_nan_equal=True +) -> bool: + """Compare loaded datasets with "empty-like" values""" + + if ds1.dims != ds2.dims: + return False + + if set(ds1.coords.keys()) != set(ds2.coords.keys()): + return False + + for coord in ds1.coords: + if ds1.coords[coord].dtype.kind in {"i", "f"}: + if not np.allclose( + ds1.coords[coord], ds2.coords[coord], atol=atol + ): # Use np.allclose for numeric coordinates + return False + elif ds1.coords[coord].dtype.kind == "M": # datetime64 + if not np.array_equal( + ds1.coords[coord], ds2.coords[coord] + ): # Use array equality for datetime coordinates + return False + else: + if not np.array_equal(ds1.coords[coord], ds2.coords[coord]): + return False + + if set(ds1.data_vars.keys()) != set(ds2.data_vars.keys()): + return False + + for dim in ds1.dims: + if not ds1.indexes[dim].equals(ds2.indexes[dim]): + return False + + return True diff --git a/tests/data/humidity_template.nc b/tests/data/humidity_template.nc new file mode 100644 index 00000000..63bbc255 Binary files /dev/null and b/tests/data/humidity_template.nc differ diff --git a/tests/sandbox.ipynb b/tests/sandbox.ipynb index 8d4f5eea..d828964b 100644 --- a/tests/sandbox.ipynb +++ b/tests/sandbox.ipynb @@ -81,834 +81,155 @@ "import pickle\n", "import pandas as pd\n", "import numpy as np\n", + "import xarray as xr\n", "import os\n", "from pvdeg import TEST_DATA_DIR\n", "\n", - "with open(r\"C:\\Users\\tford\\Downloads\\summit-weather.pkl\", 'rb') as f:\n", - " GEO_WEATHER = pickle.load(f)\n", - "\n", - "GEO_META = pd.read_csv(r\"C:\\Users\\tford\\Downloads\\summit-meta.csv\", index_col=0)" + "GEO_META = pd.read_csv(os.path.join(TEST_DATA_DIR, \"summit-meta.csv\"), index_col=0)\n", + "with open(os.path.join(TEST_DATA_DIR, \"summit-weather.pkl\"), 'rb') as f:\n", + " GEO_WEATHER = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "autotemplate" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 9MB\n",
-       "Dimensions:            (time: 17520, gid: 11)\n",
-       "Coordinates:\n",
-       "  * gid                (gid) int64 88B 449211 452064 453020 ... 460613 462498\n",
-       "  * time               (time) datetime64[ns] 140kB 2022-01-01 ... 2022-12-31T...\n",
-       "Data variables:\n",
-       "    temp_air           (time, gid) float64 2MB -12.0 -8.1 -14.2 ... -4.3 -6.2\n",
-       "    wind_speed         (time, gid) float64 2MB 0.6 0.6 0.3 0.6 ... 0.9 1.0 1.1\n",
-       "    dhi                (time, gid) float64 2MB 0.0 0.0 0.0 ... 13.0 18.0 19.0\n",
-       "    ghi                (time, gid) float64 2MB 0.0 0.0 0.0 ... 13.0 24.0 19.0\n",
-       "    dni                (time, gid) float64 2MB 0.0 0.0 0.0 0.0 ... 0.0 126.0 1.0\n",
-       "    relative_humidity  (time, gid) float64 2MB 99.93 79.41 100.0 ... 95.93 100.0\n",
-       "Attributes:\n",
-       "    full_version_record:  {"rex": "0.2.80", "pandas": "2.0.0", "numpy": "1.23...\n",
-       "    package:              rex\n",
-       "    version:              4.0.0
" - ], - "text/plain": [ - " Size: 9MB\n", - "Dimensions: (time: 17520, gid: 11)\n", - "Coordinates:\n", - " * gid (gid) int64 88B 449211 452064 453020 ... 460613 462498\n", - " * time (time) datetime64[ns] 140kB 2022-01-01 ... 2022-12-31T...\n", - "Data variables:\n", - " temp_air (time, gid) float64 2MB -12.0 -8.1 -14.2 ... -4.3 -6.2\n", - " wind_speed (time, gid) float64 2MB 0.6 0.6 0.3 0.6 ... 0.9 1.0 1.1\n", - " dhi (time, gid) float64 2MB 0.0 0.0 0.0 ... 13.0 18.0 19.0\n", - " ghi (time, gid) float64 2MB 0.0 0.0 0.0 ... 13.0 24.0 19.0\n", - " dni (time, gid) float64 2MB 0.0 0.0 0.0 0.0 ... 0.0 126.0 1.0\n", - " relative_humidity (time, gid) float64 2MB 99.93 79.41 100.0 ... 95.93 100.0\n", - "Attributes:\n", - " full_version_record: {\"rex\": \"0.2.80\", \"pandas\": \"2.0.0\", \"numpy\": \"1.23...\n", - " package: rex\n", - " version: 4.0.0" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "GEO_WEATHER" + "autotemplate_result = pvdeg.geospatial.auto_template(\n", + " func=pvdeg.humidity.module, \n", + " ds_gids=GEO_WEATHER\n", + ").compute()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
latitudelongitudealtitudetzcountrystatecountywind_height
44921139.89-106.422954-7United StatesColoradoSummit2
45206439.89-106.302486-7United StatesColoradoSummit2
45302039.69-106.263485-7United StatesColoradoSummit2
45491639.81-106.182767-7United StatesColoradoSummit2
45586739.81-106.142883-7United StatesColoradoSummit2
45587739.41-106.143859-7United StatesColoradoSummit2
45682739.45-106.103634-7United StatesColoradoSummit2
45777639.41-106.063363-7United StatesColoradoSummit2
45967039.65-105.983261-7United StatesColoradoSummit2
46061339.53-105.943190-7United StatesColoradoSummit2
46249839.57-105.863523-7United StatesColoradoSummit2
\n", - "
" - ], - "text/plain": [ - " latitude longitude altitude tz country state county \\\n", - "449211 39.89 -106.42 2954 -7 United States Colorado Summit \n", - "452064 39.89 -106.30 2486 -7 United States Colorado Summit \n", - "453020 39.69 -106.26 3485 -7 United States Colorado Summit \n", - "454916 39.81 -106.18 2767 -7 United States Colorado Summit \n", - "455867 39.81 -106.14 2883 -7 United States Colorado Summit \n", - "455877 39.41 -106.14 3859 -7 United States Colorado Summit \n", - "456827 39.45 -106.10 3634 -7 United States Colorado Summit \n", - "457776 39.41 -106.06 3363 -7 United States Colorado Summit \n", - "459670 39.65 -105.98 3261 -7 United States Colorado Summit \n", - "460613 39.53 -105.94 3190 -7 United States Colorado Summit \n", - "462498 39.57 -105.86 3523 -7 United States Colorado Summit \n", - "\n", - " wind_height \n", - "449211 2 \n", - "452064 2 \n", - "453020 2 \n", - "454916 2 \n", - "455867 2 \n", - "455877 2 \n", - "456827 2 \n", - "457776 2 \n", - "459670 2 \n", - "460613 2 \n", - "462498 2 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "GEO_META" + "humidity_template = xr.open_dataset(os.path.join(TEST_DATA_DIR, 'humidity_template.nc')).compute()" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compare_datasets(ds1: xr.Dataset, ds2: xr.Dataset, atol=1e-10) -> bool:\n", + " \"\"\"Compare loaded datasets with \"empty-like\" values\"\"\"\n", + "\n", + " if ds1.dims != ds2.dims:\n", + " return False\n", + "\n", + " if set(ds1.coords.keys()) != set(ds2.coords.keys()):\n", + " return False\n", + "\n", + " for coord in ds1.coords:\n", + " if ds1.coords[coord].dtype.kind in {'i', 'f'}:\n", + " # Use np.allclose for numeric coordinates\n", + " if not np.allclose(ds1.coords[coord], ds2.coords[coord], atol=atol):\n", + " return False\n", + " elif ds1.coords[coord].dtype.kind == 'M': # datetime64 type\n", + " # Use array equality for datetime coordinates\n", + " if not np.array_equal(ds1.coords[coord], ds2.coords[coord]):\n", + " return False\n", + " else:\n", + " if not np.array_equal(ds1.coords[coord], ds2.coords[coord]):\n", + " return False\n", + "\n", + " if set(ds1.data_vars.keys()) != set(ds2.data_vars.keys()):\n", + " return False\n", + "\n", + " for var in ds1.data_vars:\n", + " if not np.allclose(ds1[var], ds2[var], atol=atol):\n", + " return False\n", + "\n", + " for dim in ds1.dims:\n", + " if not ds1.indexes[dim].equals(ds2.indexes[dim]):\n", + " return False\n", + "\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "The array tilt angle was not provided, therefore the latitude tilt of 39.9 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.9 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.7 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.8 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.8 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.4 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.5 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.4 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.6 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.5 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n", - "The array tilt angle was not provided, therefore the latitude tilt of 39.6 was used.\n", - "The array azimuth was not provided, therefore an azimuth of 180.0 was used.\n" + "C:\\Users\\tford\\dev\\PVDegradationTools\\pvdeg\\utilities.py:479: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", + " if ds1.dims != ds2.dims:\n" ] } ], "source": [ - "res = pvdeg.geospatial.analysis(\n", - " weather_ds=GEO_WEATHER,\n", - " meta_df=GEO_META,\n", - " func=pvdeg.standards.standoff,\n", - ")" + "assert pvdeg.utilities.compare_datasets(autotemplate_result,humidity_template)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "output template" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "shapes = {\n", + " \"RH_surface_outside\": (\"gid\", \"time\"),\n", + " \"RH_front_encap\": (\"gid\", \"time\"),\n", + " \"RH_back_encap\": (\"gid\", \"time\"),\n", + " \"RH_backsheet\": (\"gid\", \"time\"),\n", + "}\n", + "\n", + "manual_template = pvdeg.geospatial.output_template(\n", + " shapes=shapes,\n", + " ds_gids=GEO_WEATHER\n", + ").compute()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "[[ 3.94100000e+01 -1.06420000e+02 nan]\n", - " [ 3.94100000e+01 -1.06300000e+02 nan]\n", - " [ 3.94100000e+01 -1.06260000e+02 nan]\n", - " [ 3.94100000e+01 -1.06180000e+02 nan]\n", - " [ 3.94100000e+01 -1.06140000e+02 5.05368226e-01]\n", - " [ 3.94100000e+01 -1.06100000e+02 nan]\n", - " [ 3.94100000e+01 -1.06060000e+02 4.23099392e-01]\n", - " [ 3.94100000e+01 -1.05980000e+02 nan]\n", - " [ 3.94100000e+01 -1.05940000e+02 nan]\n", - " [ 3.94100000e+01 -1.05860000e+02 nan]\n", - " [ 3.94500000e+01 -1.06420000e+02 nan]\n", - " [ 3.94500000e+01 -1.06300000e+02 nan]\n", - " [ 3.94500000e+01 -1.06260000e+02 nan]\n", - " [ 3.94500000e+01 -1.06180000e+02 nan]\n", - " [ 3.94500000e+01 -1.06140000e+02 nan]\n", - " [ 3.94500000e+01 -1.06100000e+02 4.74997232e-01]\n", - " [ 3.94500000e+01 -1.06060000e+02 nan]\n", - " [ 3.94500000e+01 -1.05980000e+02 nan]\n", - " [ 3.94500000e+01 -1.05940000e+02 nan]\n", - " [ 3.94500000e+01 -1.05860000e+02 nan]\n", - " [ 3.95300000e+01 -1.06420000e+02 nan]\n", - " [ 3.95300000e+01 -1.06300000e+02 nan]\n", - " [ 3.95300000e+01 -1.06260000e+02 nan]\n", - " [ 3.95300000e+01 -1.06180000e+02 nan]\n", - " [ 3.95300000e+01 -1.06140000e+02 nan]\n", - " [ 3.95300000e+01 -1.06100000e+02 nan]\n", - " [ 3.95300000e+01 -1.06060000e+02 nan]\n", - " [ 3.95300000e+01 -1.05980000e+02 nan]\n", - " [ 3.95300000e+01 -1.05940000e+02 7.43781748e-01]\n", - " [ 3.95300000e+01 -1.05860000e+02 nan]\n", - " [ 3.95700000e+01 -1.06420000e+02 nan]\n", - " [ 3.95700000e+01 -1.06300000e+02 nan]\n", - " [ 3.95700000e+01 -1.06260000e+02 nan]\n", - " [ 3.95700000e+01 -1.06180000e+02 nan]\n", - " [ 3.95700000e+01 -1.06140000e+02 nan]\n", - " [ 3.95700000e+01 -1.06100000e+02 nan]\n", - " [ 3.95700000e+01 -1.06060000e+02 nan]\n", - " [ 3.95700000e+01 -1.05980000e+02 nan]\n", - " [ 3.95700000e+01 -1.05940000e+02 nan]\n", - " [ 3.95700000e+01 -1.05860000e+02 1.02006123e-01]\n", - " [ 3.96500000e+01 -1.06420000e+02 nan]\n", - " [ 3.96500000e+01 -1.06300000e+02 nan]\n", - " [ 3.96500000e+01 -1.06260000e+02 nan]\n", - " [ 3.96500000e+01 -1.06180000e+02 nan]\n", - " [ 3.96500000e+01 -1.06140000e+02 nan]\n", - " [ 3.96500000e+01 -1.06100000e+02 nan]\n", - " [ 3.96500000e+01 -1.06060000e+02 nan]\n", - " [ 3.96500000e+01 -1.05980000e+02 9.86327972e-01]\n", - " [ 3.96500000e+01 -1.05940000e+02 nan]\n", - " [ 3.96500000e+01 -1.05860000e+02 nan]\n", - " [ 3.96900000e+01 -1.06420000e+02 nan]\n", - " [ 3.96900000e+01 -1.06300000e+02 nan]\n", - " [ 3.96900000e+01 -1.06260000e+02 1.11679921e+00]\n", - " [ 3.96900000e+01 -1.06180000e+02 nan]\n", - " [ 3.96900000e+01 -1.06140000e+02 nan]\n", - " [ 3.96900000e+01 -1.06100000e+02 nan]\n", - " [ 3.96900000e+01 -1.06060000e+02 nan]\n", - " [ 3.96900000e+01 -1.05980000e+02 nan]\n", - " [ 3.96900000e+01 -1.05940000e+02 nan]\n", - " [ 3.96900000e+01 -1.05860000e+02 nan]\n", - " [ 3.98100000e+01 -1.06420000e+02 nan]\n", - " [ 3.98100000e+01 -1.06300000e+02 nan]\n", - " [ 3.98100000e+01 -1.06260000e+02 nan]\n", - " [ 3.98100000e+01 -1.06180000e+02 3.31105174e+00]\n", - " [ 3.98100000e+01 -1.06140000e+02 2.42166730e+00]\n", - " [ 3.98100000e+01 -1.06100000e+02 nan]\n", - " [ 3.98100000e+01 -1.06060000e+02 nan]\n", - " [ 3.98100000e+01 -1.05980000e+02 nan]\n", - " [ 3.98100000e+01 -1.05940000e+02 nan]\n", - " [ 3.98100000e+01 -1.05860000e+02 nan]\n", - " [ 3.98900000e+01 -1.06420000e+02 2.15902146e+00]\n", - " [ 3.98900000e+01 -1.06300000e+02 3.70965360e+00]\n", - " [ 3.98900000e+01 -1.06260000e+02 nan]\n", - " [ 3.98900000e+01 -1.06180000e+02 nan]\n", - " [ 3.98900000e+01 -1.06140000e+02 nan]\n", - " [ 3.98900000e+01 -1.06100000e+02 nan]\n", - " [ 3.98900000e+01 -1.06060000e+02 nan]\n", - " [ 3.98900000e+01 -1.05980000e+02 nan]\n", - " [ 3.98900000e+01 -1.05940000e+02 nan]\n", - " [ 3.98900000e+01 -1.05860000e+02 nan]]\n" + "C:\\Users\\tford\\dev\\PVDegradationTools\\pvdeg\\utilities.py:479: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", + " if ds1.dims != ds2.dims:\n" ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "data_var = res[\"x\"]\n", - "\n", - "# Stack the latitude and longitude coordinates into a single dimension\n", - "stacked = data_var.stack(z=(\"latitude\", \"longitude\"))\n", - "\n", - "# Extract the coordinates as numpy arrays\n", - "latitudes = stacked['latitude'].values\n", - "longitudes = stacked['longitude'].values\n", - "\n", - "# Extract the data values\n", - "data_values = stacked.values\n", - "\n", - "# Combine coordinates and data values into a single numpy array\n", - "combined_array = np.column_stack((latitudes, longitudes, data_values))\n", - "\n", - "print(combined_array)" + "pvdeg.utilities.compare_datasets(manual_template, humidity_template)" ] }, { @@ -916,27 +237,36 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# test template\n", + "\n", + "shapes = {\n", + " 'testA' : (\"gid\", ),\n", + " 'testB' : (\"gid\", \"time\")\n", + "}\n", + "\n", + "template = pvdeg.geospatial.output_template(\n", + " shapes=shapes,\n", + "\n", + " ds_gids=GEO_WEATHER,\n", + ")\n" + ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "res = pd.DataFrame(combined_array).dropna()\n", - "ans = pd.read_csv(os.path.join(TEST_DATA_DIR, 'summit-standoff-res.csv'), index_col=0)\n", - "res.columns = ans.columns" + "template.to_netcdf(os.path.join(TEST_DATA_DIR, \"mismatch-template.nc\"))" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "pd.testing.assert_frame_equal(res, ans, check_dtype=False, check_names=False)" - ] + "source": [] } ], "metadata": { diff --git a/tests/test_geospatial.py b/tests/test_geospatial.py index a03b1090..71129372 100644 --- a/tests/test_geospatial.py +++ b/tests/test_geospatial.py @@ -3,12 +3,19 @@ import pickle import pandas as pd import numpy as np +import xarray as xr import os -with open(os.path.join(TEST_DATA_DIR, 'summit-weather.pkl'), 'rb') as f: + +GEO_META = pd.read_csv(os.path.join(TEST_DATA_DIR, "summit-meta.csv"), index_col=0) + +with open(os.path.join(TEST_DATA_DIR, "summit-weather.pkl"), "rb") as f: GEO_WEATHER = pickle.load(f) -GEO_META = pd.read_csv(os.path.join(TEST_DATA_DIR, 'summit-meta.csv'), index_col=0) +HUMIDITY_TEMPLATE = xr.open_dataset( + os.path.join(TEST_DATA_DIR, "humidity_template.nc"), engine='h5netcdf' +).compute() + def test_analysis_standoff(): res_ds = pvdeg.geospatial.analysis( @@ -22,13 +29,40 @@ def test_analysis_standoff(): # Stack the latitude and longitude coordinates into a single dimension # convert to dataframe, this can be done with xr.dataset.to_dataframe as well stacked = data_var.stack(z=("latitude", "longitude")) - latitudes = stacked['latitude'].values - longitudes = stacked['longitude'].values + latitudes = stacked["latitude"].values + longitudes = stacked["longitude"].values data_values = stacked.values combined_array = np.column_stack((latitudes, longitudes, data_values)) res = pd.DataFrame(combined_array).dropna() - ans = pd.read_csv(os.path.join(TEST_DATA_DIR, 'summit-standoff-res.csv'), index_col=0) + ans = pd.read_csv( + os.path.join(TEST_DATA_DIR, "summit-standoff-res.csv"), index_col=0 + ) res.columns = ans.columns pd.testing.assert_frame_equal(res, ans, check_dtype=False, check_names=False) + + +def test_autotemplate(): + autotemplate_result = pvdeg.geospatial.auto_template( + func=pvdeg.humidity.module, ds_gids=GEO_WEATHER + ).compute() + + assert pvdeg.utilities.compare_templates( + autotemplate_result, HUMIDITY_TEMPLATE + ) # custom function because we cant use equals or identical because of empty like values + + +def test_template(): + shapes = { + "RH_surface_outside": ("gid", "time"), + "RH_front_encap": ("gid", "time"), + "RH_back_encap": ("gid", "time"), + "RH_backsheet": ("gid", "time"), + } + + manual_template = pvdeg.geospatial.output_template( + shapes=shapes, ds_gids=GEO_WEATHER + ).compute() + + assert pvdeg.utilities.compare_templates(manual_template, HUMIDITY_TEMPLATE)