diff --git a/ynr/apps/candidatebot/helpers.py b/ynr/apps/candidatebot/helpers.py index 46bf2ac64..a9eec573b 100644 --- a/ynr/apps/candidatebot/helpers.py +++ b/ynr/apps/candidatebot/helpers.py @@ -1,3 +1,4 @@ +import contextlib import re import pypandoc @@ -196,3 +197,10 @@ def add_theyworkforyou_id(self, twfy_id): value = f"https://www.theyworkforyou.com/mp/{twfy_id}/" internal_id = f"uk.org.publicwhip/person/{twfy_id}" self.edit_field("theyworkforyou", value, internal_id=internal_id) + + def remove_person_identifier(self, identifier): + with contextlib.suppress(PersonIdentifier.DoesNotExist): + self.person.get_all_identifiers.remove(identifier) + self.person.save() + identifier.delete() + return self.person diff --git a/ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py b/ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py new file mode 100644 index 000000000..7fa83f887 --- /dev/null +++ b/ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py @@ -0,0 +1,80 @@ +from typing import List +from urllib.parse import urlparse + +import requests +from candidatebot.helpers import CandidateBot +from django.core.management.base import BaseCommand +from people.models import Person +from popolo.models import Membership + + +def get_domain(url): + parsed_url = urlparse(url) + return parsed_url.netloc + + +def is_facebook_url(url): + domain = get_domain(url) + return "facebook.com" in domain or "fb.com" in domain + + +class Command(BaseCommand): + """ + Test and remove inactive or dead links from Person objects. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--person-id", + help="Person ID to test", + ) + + def handle(self, *args, **options): + """ + Iterate over all Person objects and check if the + person identifier urls return a 200 status code. + """ + inactive_links: List[List] = [] + # facebook_url is any url with facebook or fb in the url + memberships = Membership.objects.filter( + ballot__election__slug="parl.2024-07-04" + ) + + people = Person.objects.all().filter(memberships__in=memberships) + for person in people: + person_identifiers = person.get_all_identifiers + person_identifiers = [ + identifier + for identifier in person_identifiers + if identifier.value.startswith("http") + ] + + if not person_identifiers: + continue + for identifier in person_identifiers: + resp = None + try: + resp = requests.get(identifier.value, timeout=2).status_code + except requests.exceptions.RequestException as e: + self.stdout.write( + f"Request exception: {e} for {person.name}" + ) + pass + if resp == 404 and not is_facebook_url(identifier.value): + self.stdout.write( + f"Status code: {resp} for {person.name} {identifier.value}" + ) + inactive_links.append( + [ + str(person.pk), + person.name, + identifier.value, + str(resp), + ] + ) + # delete the identifier from the person identifiers + bot = CandidateBot(person.pk, ignore_errors=True) + bot.remove_person_identifier(identifier) + print( + f"Candidatebot deleted {identifier.value_type}:{identifier.value} from {person.name}" + ) diff --git a/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py new file mode 100644 index 000000000..fe572d47f --- /dev/null +++ b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py @@ -0,0 +1,70 @@ +from unittest import TestCase + +import pytest +from candidates.models.popolo_extra import Ballot +from candidates.tests.factories import ( + ElectionFactory, + MembershipFactory, + PostFactory, +) +from django.core.management import call_command +from parties.tests.factories import PartyFactory +from people.tests.factories import PersonFactory, PersonIdentifierFactory + + +class TestPersonIdentifiers(TestCase): + def setUp(self): + self.person = PersonFactory.create() + # 200 example + PersonIdentifierFactory.create( + person=self.person, + value="https://en.wikipedia.org/wiki/Rishi_Sunak", + value_type="https://en.wikipedia_url", + ) + # 404 example + PersonIdentifierFactory.create( + person=self.person, + value="http://www.conservatives.com/about/our-team/example.com", + value_type="party_ppc_page_url", + ) + post = PostFactory.create(slug="parl.2024-07-04") + + election = ElectionFactory.create( + slug="parl.2024-07-04", + election_date="2024-07-04", + name="2024 General Election", + ) + ballot = Ballot.objects.create( + election=election, post=post, ballot_paper_id="parl.2024-07-04" + ) + party = PartyFactory.create() + MembershipFactory.create( + person=self.person, + post=post, + party=party, + ballot=ballot, + ) + + @pytest.mark.django_db + def test_remove_inactive_person_identifiers(self): + self.assertEqual(len(self.person.get_all_identifiers), 2) + self.assertEqual( + self.person.get_all_identifiers[0].value, + "https://en.wikipedia.org/wiki/Rishi_Sunak", + ) + self.assertEqual( + self.person.get_all_identifiers[1].value, + "http://www.conservatives.com/about/our-team/example.com", + ) + + call_command( + "candidatebot_remove_inactive_person_links", + "--person-id", + self.person.id, + ) + self.person.refresh_from_db() + self.assertEqual(len(self.person.get_all_identifiers), 1) + self.assertEqual( + self.person.get_all_identifiers[0].value, + "https://en.wikipedia.org/wiki/Rishi_Sunak", + )