From 8ead4783df8503fecf9e700134b3d4468be62d43 Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Sun, 25 Aug 2024 13:48:53 -0400 Subject: [PATCH] #130 Adding option to ignore credit cards when in a Unix timestamp. --- .../filters/regex/CreditCardFilter.java | 19 ++++++++++--------- .../test/phileas/services/EndToEndTests.java | 6 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/phileas-core/src/main/java/ai/philterd/phileas/services/filters/regex/CreditCardFilter.java b/phileas-core/src/main/java/ai/philterd/phileas/services/filters/regex/CreditCardFilter.java index d6729cb5..cba22653 100644 --- a/phileas-core/src/main/java/ai/philterd/phileas/services/filters/regex/CreditCardFilter.java +++ b/phileas-core/src/main/java/ai/philterd/phileas/services/filters/regex/CreditCardFilter.java @@ -25,6 +25,7 @@ import ai.philterd.phileas.model.policy.Policy; import org.apache.commons.validator.routines.checkdigit.LuhnCheckDigit; +import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -37,6 +38,8 @@ public class CreditCardFilter extends RegexFilter { private final LuhnCheckDigit luhnCheckDigit; private final boolean ignoreWhenInUnixTimestamp; + private final String UNIX_TIMESTAMP_REGEX = "1[5-8][0-9]{11}"; + public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyValidCreditCardNumbers, boolean ignoreWhenInUnixTimestamp) { @@ -71,27 +74,25 @@ public FilterResult filter(Policy policy, String context, String documentId, int if (ignoreWhenInUnixTimestamp) { - spans.removeAll( + final Collection spansInUnixTimestamps = spans .stream() - .filter(s -> s.getText().matches("1[5-8][0-9]{11}")) - .toList() - ); + .filter(s -> s.getText().matches(UNIX_TIMESTAMP_REGEX)) + .toList(); + + spans.removeAll(spansInUnixTimestamps); } if (onlyValidCreditCardNumbers) { - final Iterator i = spans.iterator(); - while(i.hasNext()) { - - final Span span = i.next(); + for(final Span span : spans) { final String creditCardNumber = input.substring(span.getCharacterStart(), span.getCharacterEnd()) .replaceAll(" ", "") .replaceAll("-", ""); - if(!luhnCheckDigit.isValid(creditCardNumber)) { + if (!luhnCheckDigit.isValid(creditCardNumber)) { spans.remove(span); } diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java index 51ae829c..53797d94 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java @@ -843,7 +843,7 @@ public void endToEndJustCreditCard() throws Exception { } @Test - public void endToEndJustCreditCardInUnixTimstamp() throws Exception { + public void endToEndJustCreditCardInUnixTimestamp() throws Exception { final Path temp = Files.createTempDirectory("philter"); @@ -858,10 +858,10 @@ public void endToEndJustCreditCardInUnixTimstamp() throws Exception { final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties); final PhileasFilterService service = new PhileasFilterService(phileasConfiguration); - final FilterResponse response = service.filter(Arrays.asList("justcreditcard"), "context", "documentid", "My cc is 1647725122227", MimeType.TEXT_PLAIN); + final FilterResponse response = service.filter(List.of("justcreditcard"), "context", "documentid", "My cc is 1647725122227", MimeType.TEXT_PLAIN); LOGGER.info(response.filteredText()); - + showSpans(response.explanation().identifiedSpans()); Assertions.assertEquals("My cc is 1647725122227", response.filteredText()); }