cisagov · schmelz21 · Mar 19, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
@@ -6,8 +6,9 @@
 # If `tests` is empty, all tests are considered included.
 
 tests:
-# - B101
-# - B102
 
 skips:
-# - B101 # skip "assert used" check since assertions are required in pytests
+  - B101  # skip "assert used" check since assertions are required in pytests
+
+exclude:
+  - '**/test_*.py'
diff --git a/.flake8 b/.flake8
@@ -23,3 +23,7 @@ select = C,D,E,F,W,B,B950
 # https://github.com/ambv/black/issues/21. Guido agrees here:
 # https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b.
 ignore = E501,W503
+# Ignore D100 and D103, which check for docstrings in modules and functions, in all test files
+per-file-ignores =
+    # Ignore D100 and D103 in all test files
+    */test_*.py: D100, D103
@@ -111,6 +111,8 @@ jobs:
         uses: actions/[email protected]
         with:
           python-version: '3.10'
+      - name: Copy .env file
+        run: cp ../dev.env.example .env
       - uses: actions/cache@v3
         with:
           path: ~/.cache/pip

@@ -103,6 +103,8 @@ repos:
     rev: v1.5.1
     hooks:
       - id: mypy
+        additional_dependencies:
+          - types-requests
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.10.1
     hooks:

diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+XFD
@@ -1,18 +1,44 @@
-import pandas as pd
-import requests
-from bs4 import BeautifulSoup
-import time
-import re
+"""
+This module contains the script for populating cities data.
+
+It includes functions for parsing titles, pulling cities data from Wikipedia,
+and writing the data to a CSV file.
+"""
+
+# Standard Python Libraries
 import json
+import re
+import time
 from urllib.parse import unquote
 
+# Third-Party Libraries
+from bs4 import BeautifulSoup
+import pandas as pd
+import requests
+
 
 def title_parse(title):
+    """
+    Parse the title by unquoting it.
+
+    Args:
+        title (str): The title to be parsed.
+
+    Returns:
+        str: The parsed title.
+    """
     title = unquote(title)
     return title
 
 
 def pull_cities():
+    """
+    Process and pull cities data from Wikipedia.
+
+    This function reads the Wikipedia US cities data from a JSON file, processes each entry,
+    fetches the corresponding Wikipedia page, parses the page to extract city, county, and URL information,
+    and writes the data to a CSV file.
+    """
     print("Processing Cities...")
     with open("wikipedia_US_cities.json") as f:
         wikipedia_us_city_data = json.load(f)
@@ -23,7 +49,10 @@ def pull_cities():
         print(entry["name"])
         # get the response in the form of html
         wikiurl = "https://en.wikipedia.org/wiki/" + entry["url"]
-        response = requests.get(wikiurl)
+        try:
+            response = requests.get(wikiurl, timeout=5)
+        except requests.exceptions.Timeout:
+            print("The request timed out")
 
         # parse data from the html into a beautifulsoup object
         soup = BeautifulSoup(response.text, "html.parser")
@@ -52,7 +81,9 @@ def pull_cities():
                     if "," in link.get("title"):
                         county_pieces = link.get("title").split(",")
                         # OPEN WIKIPEDIA PAGE UP
-                        x = requests.get("https://en.wikipedia.org/" + link.get("href"))
+                        x = requests.get(
+                            "https://en.wikipedia.org/" + link.get("href"), timeout=5
+                        )
 
                         # PULL COUNTY OR PARISH FROM WIKIPEDIA PAGE
                         county_parish_matches = re.findall(
@@ -85,7 +116,8 @@ def pull_cities():
                             }
                         )
                     time.sleep(1)
-                except:
+                except Exception as e:
+                    print(f"Error: {e}")
                     pass
 
         df = pd.DataFrame(holding_pen, columns=["State", "County", "City", "URL"])

@@ -1,16 +1,35 @@
+"""
+This module contains the script for populating counties data.
+
+It includes functions for pulling counties data from Wikipedia,
+and writing the data to a CSV file.
+"""
+
+# Standard Python Libraries
+import re
+import time
+
+# Third-Party Libraries
+from bs4 import BeautifulSoup
 import pandas as pd
 import requests
-from bs4 import BeautifulSoup
-import time
-import re
 
 
 def pull_counties():
+    """
+    Process and pull counties data from Wikipedia.
+
+    This function fetches the Wikipedia page for the list of United States counties,
+    parses the page to extract county, state, and URL information,
+    and writes the data to a CSV file.
+    """
     print("Processing Counties...")
     # get the response in the form of html
     wikiurl = "https://en.wikipedia.org/wiki/List_of_United_States_counties_and_county_equivalents"
-    table_class = "wikitable sortable jquery-tablesorter"
-    response = requests.get(wikiurl)
+    try:
+        response = requests.get(wikiurl, timeout=5)
+    except requests.exceptions.Timeout:
+        print("The request timed out")
 
     # parse data from the html into a beautifulsoup object
     soup = BeautifulSoup(response.text, "html.parser")
@@ -24,7 +43,7 @@ def pull_counties():
         try:
             county_pieces = link.get("title").split(", ")
             # OPEN WIKIPEDIA PAGE UP
-            x = requests.get("https://en.wikipedia.org/" + link.get("href"))
+            x = requests.get("https://en.wikipedia.org/" + link.get("href"), timeout=5)
 
             # PULL WEBSITE FROM WIKIPEDIA PAGE
             w = re.findall(
@@ -43,6 +62,7 @@ def pull_counties():
                 }
             )
         except Exception as e:
+            print(f"Error: {e}")
             pass
 
         time.sleep(1)

@@ -1,22 +1,53 @@
-import typer
+"""
+This module contains the main script for populating counties and cities data.
+
+It includes commands for processing cities and counties data separately or both at once.
+"""
+
+# Third-Party Libraries
 import cities
 import counties
+import typer
 
 app = typer.Typer()
 
 
 @app.command()
 def process_cities():
+    """
+    Process and pull cities data from Wikipedia.
+
+    This function calls the pull_cities function from the cities module,
+    which reads the Wikipedia US cities data from a JSON file, processes each entry,
+    fetches the corresponding Wikipedia page, parses the page to extract city, county, and URL information,
+    and writes the data to a CSV file.
+    """
     cities.pull_cities()
 
 
 @app.command()
 def process_counties():
+    """
+    Process and pull counties data from Wikipedia.
+
+    This function calls the pull_counties function from the counties module,
+    which fetches the Wikipedia page for the list of United States counties,
+    parses the page to extract county, state, and URL information,
+    and writes the data to a CSV file.
+    """
     counties.pull_counties()
 
 
 @app.command()
 def process_both():
+    """
+    Process and pull both cities and counties data from Wikipedia.
+
+    This function calls both the pull_cities function from the cities module and the pull_counties function from the counties module,
+    which fetches the Wikipedia pages for the list of United States cities and counties,
+    parses the pages to extract city, county, state, and URL information,
+    and writes the data to CSV files.
+    """
     counties.pull_counties()
     cities.pull_cities()
 

@@ -1,4 +1,4 @@
+beautifulsoup4==4.11.2
 pandas==1.5.1
 requests==2.28.2
-beautifulsoup4==4.11.2
 typer==0.7.0
@@ -0,0 +1,5 @@
+"""
+This package contains the worker tasks for the backend.
+
+It includes modules for processing data, interacting with databases, and other backend tasks.
+"""