Skip to content

Commit

Permalink
Update vector grid creation functions
Browse files Browse the repository at this point in the history
  • Loading branch information
f-PLT committed May 30, 2024
1 parent 22348db commit 9438b42
Showing 1 changed file with 63 additions and 23 deletions.
86 changes: 63 additions & 23 deletions geospatial_tools/vector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from typing import Union

import geopandas as gpd
Expand All @@ -10,42 +13,72 @@


def create_vector_grid(
bounding_box: Union[list, tuple], grid_size: float, logger=LOGGER, crs: str = None
bounding_box: Union[list, tuple], grid_size: float, logger: logging.Logger = LOGGER, crs: str = None
) -> gpd.GeoDataFrame:
"""
This function create a square grid polygon based on input bounds and grid size.
Create a grid of polygons within the specified bounds and cell size in EPSG:4326.
This function uses NumPy for optimized performance.
Parameters:
bounding_box (tuple): The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat).
grid_size (float): The size of each grid cell in degrees.
:param bounding_box: Bounding box coordinates as (min_x, min_y, max_x, max_y)
:param grid_size: Size of each grid cell. Unit is according to the projection of bounding box.
:param crs: CRS of the grid cell
:return:
Returns:
GeoDataFrame: A GeoDataFrame containing the grid polygons in EPSG:4326.
"""
min_lon, min_lat, max_lon, max_lat = bounding_box
lon_coords = np.arange(min_lon, max_lon, grid_size)
lat_coords = np.arange(min_lat, max_lat, grid_size)

min_x, min_y, max_x, max_y = bounding_box
x_coords = np.arange(min_x, max_x, grid_size)
y_coords = np.arange(min_y, max_y, grid_size)
polygons = []
for x in x_coords:
for y in y_coords:
polygons.append(Polygon([(x, y), (x + grid_size, y), (x + grid_size, y + grid_size), (x, y + grid_size)]))
# Generate grid coordinates
lon_grid, lat_grid = np.meshgrid(lon_coords, lat_coords)
lon_grid = lon_grid.flatten()
lat_grid = lat_grid.flatten()

# Preallocate polygon array
num_cells = len(lon_grid)
polygons = np.empty(num_cells, dtype=object)

for i in range(num_cells):
x, y = lon_grid[i], lat_grid[i]
polygons[i] = Polygon([(x, y), (x + grid_size, y), (x + grid_size, y + grid_size), (x, y + grid_size)])

properties = {"data": {"geometry": polygons}}
if crs:
properties["crs"] = crs
grid = gpd.GeoDataFrame(**properties)
grid.sindex
return grid


def create_grid_optimized(
bounding_box: Union[list, tuple], grid_size: float, logger=LOGGER, crs: str = None
def create_grid_chunk(chunk):
lon_coords, lat_coords, grid_size = chunk
polygons = []
for lon, lat in zip(lon_coords, lat_coords):
polygons.append(
Polygon([(lon, lat), (lon + grid_size, lat), (lon + grid_size, lat + grid_size), (lon, lat + grid_size)])
)
return polygons


def create_vector_grid_parallel(
bounding_box: Union[list, tuple],
grid_size: float,
crs: str = None,
num_processes: int = None,
logger: logging.Logger = LOGGER,
) -> gpd.GeoDataFrame:
"""
Create a grid of polygons within the specified bounds and cell size in EPSG:4326.
This function uses NumPy for optimized performance.
This function uses NumPy for optimized performance and ProcessPoolExecutor for parallel execution.
Parameters:
bounding_box (tuple): The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat).
grid_size (float): The size of each grid cell in degrees.
crs (str): Coordinate reference system for the resulting GeoDataFrame.
num_processes (int): The number of processes to use for parallel execution.
Defaults to the min of number of CPU cores or number of cells in the grid
logger: Optional logger for logging.
Returns:
GeoDataFrame: A GeoDataFrame containing the grid polygons in EPSG:4326.
Expand All @@ -59,18 +92,25 @@ def create_grid_optimized(
lon_grid = lon_grid.flatten()
lat_grid = lat_grid.flatten()

# Preallocate polygons array
# Prepare chunked grid
num_cells = len(lon_grid)
polygons = np.empty(num_cells, dtype=object)
workers = min(cpu_count(), num_cells)
if num_processes:
workers = num_processes
chunk_size = (num_cells + workers - 1) // workers
chunks = [
(lon_grid[i : i + chunk_size], lat_grid[i : i + chunk_size], grid_size) for i in range(0, num_cells, chunk_size)
]

# Create polygons using vectorized operations
for i in range(num_cells):
x, y = lon_grid[i], lat_grid[i]
polygons[i] = Polygon([(x, y), (x + grid_size, y), (x + grid_size, y + grid_size), (x, y + grid_size)])
polygons = []
with ProcessPoolExecutor(max_workers=workers) as executor:
results = executor.map(create_grid_chunk, chunks)
for result in results:
polygons.extend(result)

# Create GeoDataFrame
properties = {"data": {"geometry": polygons}}
if crs:
properties["crs"] = crs
grid = gpd.GeoDataFrame(**properties)
grid.sindex
return grid

0 comments on commit 9438b42

Please sign in to comment.