diff --git a/src/basic_cleaning/run.py b/src/basic_cleaning/run.py index ec84f80b..ea11618a 100644 --- a/src/basic_cleaning/run.py +++ b/src/basic_cleaning/run.py @@ -26,6 +26,10 @@ def go(args): idx = df['price'].between(args.min_price, args.max_price) df = df[idx].copy() + logger.info("Drop the outliers in geolocation") + dx = df['longitude'].between(-74.25, -73.50) & df['latitude'].between(40.5, 41.2) + df = df[idx].copy() + logger.info("Save results") filename = "clean_sample.csv" df.to_csv(filename, index = False)