Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove inactive person links #2364

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from unittest import TestCase

import pytest
from candidates.models.popolo_extra import Ballot
from candidates.tests.factories import (
ElectionFactory,
MembershipFactory,
PostFactory,
)
from django.core.management import call_command
from parties.tests.factories import PartyFactory
from people.tests.factories import PersonFactory, PersonIdentifierFactory


class TestPersonIdentifiers(TestCase):
def setUp(self):
self.person = PersonFactory.create()
# 200 example
PersonIdentifierFactory.create(
person=self.person,
value="https://en.wikipedia.org/wiki/Rishi_Sunak",
value_type="https://en.wikipedia_url",
)
# 404 example
PersonIdentifierFactory.create(
person=self.person,
value="http://www.conservatives.com/about/our-team/example.com",
value_type="party_ppc_page_url",
)
post = PostFactory.create(slug="parl.2024-07-04")

election = ElectionFactory.create(
slug="parl.2024-07-04",
election_date="2024-07-04",
name="2024 General Election",
)
ballot = Ballot.objects.create(
election=election, post=post, ballot_paper_id="parl.2024-07-04"
)
party = PartyFactory.create()
MembershipFactory.create(
person=self.person,
post=post,
party=party,
ballot=ballot,
)

@pytest.mark.django_db
def test_remove_inactive_person_identifiers(self):
self.assertEqual(len(self.person.get_all_identifiers), 2)
self.assertEqual(
self.person.get_all_identifiers[0].value,
"https://en.wikipedia.org/wiki/Rishi_Sunak",
)
self.assertEqual(
self.person.get_all_identifiers[1].value,
"http://www.conservatives.com/about/our-team/example.com",
)
call_command("remove_inactive_person_links")
self.person.refresh_from_db()
self.assertEqual(len(self.person.get_all_identifiers), 1)
self.assertEqual(
self.person.get_all_identifiers[0].value,
"https://en.wikipedia.org/wiki/Rishi_Sunak",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from typing import List
from urllib.parse import urlparse

import requests
from django.core.management.base import BaseCommand
from people.models import Person
from popolo.models import Membership


def get_domain(url):
parsed_url = urlparse(url)
return parsed_url.netloc


def is_facebook_url(url):
domain = get_domain(url)
return "facebook.com" in domain or "fb.com" in domain
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed


class Command(BaseCommand):
"""
Test and remove inactive or dead links from Person objects.
"""

def handle(self, *args, **options):
"""
Iterate over all Person objects and check if the
person identifier urls return a 200 status code.
"""
inactive_links: List[List] = []
# facebook_url is any url with facebook or fb in the url
memberships = Membership.objects.filter(
ballot__election__slug="parl.2024-07-04"
)

people = Person.objects.all().filter(memberships__in=memberships)
for person in people:
person_identifiers = person.get_all_identifiers
person_identifiers = [
identifier
for identifier in person_identifiers
if identifier.value.startswith("http")
]

if not person_identifiers:
continue
for identifier in person_identifiers:
resp = None
try:
resp = requests.get(identifier.value, timeout=2).status_code
except requests.exceptions.RequestException as e:
self.stdout.write(
f"Request exception: {e} for {person.name}"
)
pass
if resp == 404 and not is_facebook_url(identifier.value):
self.stdout.write(
f"Status code: {resp} for {person.name} {identifier.value}"
)
inactive_links.append(
[
str(person.pk),
person.name,
identifier.value,
str(resp),
]
)
# delete the identifier from the person identifiers
identifier.person.get_all_identifiers.remove(identifier)
identifier.delete()
identifier.person.save()
print(
f"Deleted {identifier.value_type}:{identifier.value} from {person.name}"
)
Loading