Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Python Issues Flagged by pre-commit #82

Merged
merged 13 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .bandit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
# If `tests` is empty, all tests are considered included.

tests:
# - B101
# - B102

skips:
# - B101 # skip "assert used" check since assertions are required in pytests
- B101 # skip "assert used" check since assertions are required in pytests

exclude:
- '**/test_*.py'
dav3r marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ select = C,D,E,F,W,B,B950
# https://github.com/ambv/black/issues/21. Guido agrees here:
# https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b.
ignore = E501,W503
# Ignore D100 and D103, which check for docstrings in modules and functions, in all test files
per-file-ignores =
# Ignore D100 and D103 in all test files
*/test_*.py: D100, D103
Matthew-Grayson marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 2 additions & 0 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ jobs:
uses: actions/[email protected]
with:
python-version: '3.10'
- name: Copy .env file
run: cp ../dev.env.example .env
- uses: actions/cache@v3
with:
path: ~/.cache/pip
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ repos:
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies:
- types-requests
- repo: https://github.com/asottile/pyupgrade
rev: v3.10.1
hooks:
Expand Down
1 change: 1 addition & 0 deletions .python-version
dav3r marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
XFD
48 changes: 40 additions & 8 deletions backend/scripts/populateCountiesCities/cities.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import re
"""
This module contains the script for populating cities data.

It includes functions for parsing titles, pulling cities data from Wikipedia,
and writing the data to a CSV file.
"""

# Standard Python Libraries
import json
import re
import time
from urllib.parse import unquote

# Third-Party Libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests


def title_parse(title):
"""
Parse the title by unquoting it.

Args:
title (str): The title to be parsed.

Returns:
str: The parsed title.
"""
title = unquote(title)
return title


def pull_cities():
"""
Process and pull cities data from Wikipedia.

This function reads the Wikipedia US cities data from a JSON file, processes each entry,
fetches the corresponding Wikipedia page, parses the page to extract city, county, and URL information,
and writes the data to a CSV file.
"""
print("Processing Cities...")
with open("wikipedia_US_cities.json") as f:
wikipedia_us_city_data = json.load(f)
Expand All @@ -23,7 +49,10 @@ def pull_cities():
print(entry["name"])
# get the response in the form of html
wikiurl = "https://en.wikipedia.org/wiki/" + entry["url"]
response = requests.get(wikiurl)
try:
response = requests.get(wikiurl, timeout=5)
except requests.exceptions.Timeout:
print("The request timed out")

# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, "html.parser")
Expand Down Expand Up @@ -52,7 +81,9 @@ def pull_cities():
if "," in link.get("title"):
county_pieces = link.get("title").split(",")
# OPEN WIKIPEDIA PAGE UP
x = requests.get("https://en.wikipedia.org/" + link.get("href"))
x = requests.get(
"https://en.wikipedia.org/" + link.get("href"), timeout=5
)

# PULL COUNTY OR PARISH FROM WIKIPEDIA PAGE
county_parish_matches = re.findall(
Expand Down Expand Up @@ -85,7 +116,8 @@ def pull_cities():
}
)
time.sleep(1)
except:
except Exception as e:
print(f"Error: {e}")
pass

df = pd.DataFrame(holding_pen, columns=["State", "County", "City", "URL"])
Expand Down
32 changes: 26 additions & 6 deletions backend/scripts/populateCountiesCities/counties.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
"""
This module contains the script for populating counties data.

It includes functions for pulling counties data from Wikipedia,
and writing the data to a CSV file.
"""

# Standard Python Libraries
import re
import time

# Third-Party Libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import re


def pull_counties():
"""
Process and pull counties data from Wikipedia.

This function fetches the Wikipedia page for the list of United States counties,
parses the page to extract county, state, and URL information,
and writes the data to a CSV file.
"""
print("Processing Counties...")
# get the response in the form of html
wikiurl = "https://en.wikipedia.org/wiki/List_of_United_States_counties_and_county_equivalents"
table_class = "wikitable sortable jquery-tablesorter"
response = requests.get(wikiurl)
try:
response = requests.get(wikiurl, timeout=5)
except requests.exceptions.Timeout:
print("The request timed out")

# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, "html.parser")
Expand All @@ -24,7 +43,7 @@ def pull_counties():
try:
county_pieces = link.get("title").split(", ")
# OPEN WIKIPEDIA PAGE UP
x = requests.get("https://en.wikipedia.org/" + link.get("href"))
x = requests.get("https://en.wikipedia.org/" + link.get("href"), timeout=5)

# PULL WEBSITE FROM WIKIPEDIA PAGE
w = re.findall(
Expand All @@ -43,6 +62,7 @@ def pull_counties():
}
)
except Exception as e:
print(f"Error: {e}")
pass

time.sleep(1)
Expand Down
33 changes: 32 additions & 1 deletion backend/scripts/populateCountiesCities/main.py
dav3r marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,22 +1,53 @@
import typer
"""
This module contains the main script for populating counties and cities data.

It includes commands for processing cities and counties data separately or both at once.
"""

# Third-Party Libraries
import cities
import counties
import typer

app = typer.Typer()


@app.command()
def process_cities():
"""
Process and pull cities data from Wikipedia.

This function calls the pull_cities function from the cities module,
which reads the Wikipedia US cities data from a JSON file, processes each entry,
fetches the corresponding Wikipedia page, parses the page to extract city, county, and URL information,
and writes the data to a CSV file.
"""
cities.pull_cities()


@app.command()
def process_counties():
"""
Process and pull counties data from Wikipedia.

This function calls the pull_counties function from the counties module,
which fetches the Wikipedia page for the list of United States counties,
parses the page to extract county, state, and URL information,
and writes the data to a CSV file.
"""
counties.pull_counties()


@app.command()
def process_both():
"""
Process and pull both cities and counties data from Wikipedia.

This function calls both the pull_cities function from the cities module and the pull_counties function from the counties module,
which fetches the Wikipedia pages for the list of United States cities and counties,
parses the pages to extract city, county, state, and URL information,
and writes the data to CSV files.
"""
counties.pull_counties()
cities.pull_cities()

Expand Down
2 changes: 1 addition & 1 deletion backend/scripts/populateCountiesCities/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
beautifulsoup4==4.11.2
pandas==1.5.1
requests==2.28.2
beautifulsoup4==4.11.2
typer==0.7.0
5 changes: 5 additions & 0 deletions backend/worker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
This package contains the worker tasks for the backend.

It includes modules for processing data, interacting with databases, and other backend tasks.
"""
Loading
Loading