diff --git a/docs/FilterPolicies/Filters/CommonFilters/Filters-CreditCards.md b/docs/FilterPolicies/Filters/CommonFilters/Filters-CreditCards.md index fd12a14a..4a5217cc 100644 --- a/docs/FilterPolicies/Filters/CommonFilters/Filters-CreditCards.md +++ b/docs/FilterPolicies/Filters/CommonFilters/Filters-CreditCards.md @@ -16,6 +16,7 @@ This filter has no required parameters. | `enabled` | When set to false, the filter will be disabled and not applied | `true` | | `ignored` | A list of terms to be ignored by the filter. | None | | `onlyValidCreditCardNumbers` | When set to true, only valid credit card numbers will be filtered. | `true` | +| `ignoreWhenInUnixTimestamp` | When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. | `false` | ### Filter Strategies diff --git a/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java b/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java index 1d5372d0..37645b92 100644 --- a/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java +++ b/phileas-core/src/main/java/ai/philterd/phileas/services/FilterPolicyLoader.java @@ -188,8 +188,9 @@ public List getFiltersForPolicy(final Policy policy, final Map confidenceModifiers = List.of( @@ -50,8 +57,9 @@ public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyVal new ConfidenceModifier(0.5, ConfidenceModifier.ConfidenceCondition.CHARACTER_SEQUENCE_SURROUNDING, "-")); // See http://regular-expressions.info/creditcard.html - final Pattern creditCardPattern = Pattern.compile("\\b(?:\\d[ -]*?){13,16}\\b", Pattern.CASE_INSENSITIVE); - final FilterPattern creditcard = new FilterPattern.FilterPatternBuilder(creditCardPattern, 0.90, confidenceModifiers).build(); + final Pattern creditCard = Pattern.compile("\\b(?:\\d[ -]*?){13,16}\\b", Pattern.CASE_INSENSITIVE); + final FilterPattern creditCardPattern = new FilterPattern.FilterPatternBuilder(creditCard, 0.90) + .withConfidenceModifiers(confidenceModifiers).build(); this.contextualTerms = new HashSet<>(); this.contextualTerms.add("credit"); @@ -62,7 +70,7 @@ public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyVal this.contextualTerms.add("jcb"); this.contextualTerms.add("diners"); - this.analyzer = new Analyzer(contextualTerms, creditcard); + this.analyzer = new Analyzer(contextualTerms, creditCardPattern); } @@ -71,28 +79,36 @@ public FilterResult filter(Policy policy, String context, String documentId, int final List spans = findSpans(policy, analyzer, input, context, documentId, attributes); - final List validSpans = new LinkedList<>(); + if (ignoreWhenInUnixTimestamp) { + + final Collection spansInUnixTimestamps = + spans + .stream() + .filter(s -> s.getText().matches(UNIX_TIMESTAMP_REGEX)) + .toList(); + + spans.removeAll(spansInUnixTimestamps); + + } - for(final Span span : spans) { + if (onlyValidCreditCardNumbers) { - final String creditCardNumber = input.substring(span.getCharacterStart(), span.getCharacterEnd()) - .replaceAll(" ", "") - .replaceAll("-", ""); + for(final Span span : spans) { - if(onlyValidCreditCardNumbers) { + final String creditCardNumber = input.substring(span.getCharacterStart(), span.getCharacterEnd()) + .replaceAll(" ", "") + .replaceAll("-", ""); - if(luhnCheckDigit.isValid(creditCardNumber)) { - validSpans.add(span); + if (!luhnCheckDigit.isValid(creditCardNumber)) { + spans.remove(span); } - } else { - validSpans.add(span); } } - return new FilterResult(context, documentId, validSpans); + return new FilterResult(context, documentId, spans); } -} +} \ No newline at end of file diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java index 302f2404..53797d94 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTests.java @@ -842,6 +842,30 @@ public void endToEndJustCreditCard() throws Exception { } + @Test + public void endToEndJustCreditCardInUnixTimestamp() throws Exception { + + final Path temp = Files.createTempDirectory("philter"); + + final File file2 = Paths.get(temp.toFile().getAbsolutePath(), "justcreditcard.json").toFile(); + LOGGER.info("Writing policy to {}", file2.getAbsolutePath()); + FileUtils.writeStringToFile(file2, gson.toJson(getPolicyJustCreditCardNotInUnixTimestamps("justcreditcard")), Charset.defaultCharset()); + + Properties properties = new Properties(); + properties.setProperty("indexes.directory", INDEXES_DIRECTORY); + properties.setProperty("filter.policies.directory", temp.toFile().getAbsolutePath()); + + final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties); + + final PhileasFilterService service = new PhileasFilterService(phileasConfiguration); + final FilterResponse response = service.filter(List.of("justcreditcard"), "context", "documentid", "My cc is 1647725122227", MimeType.TEXT_PLAIN); + + LOGGER.info(response.filteredText()); + showSpans(response.explanation().identifiedSpans()); + Assertions.assertEquals("My cc is 1647725122227", response.filteredText()); + + } + @Test public void endToEndJustCreditCardWithIgnoredTerms() throws Exception { diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java index dcfb6b34..e9ff7a0c 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java @@ -172,11 +172,12 @@ public static Policy getPdfFilterWithPersonPolicy(String policyName) throws URIS } - public static Policy getPolicyJustCreditCard(String policyName) throws IOException { + public static Policy getPolicyJustCreditCardNotInUnixTimestamps(String policyName) { CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy(); CreditCard creditCard = new CreditCard(); + creditCard.setIgnoreWhenInUnixTimestamp(true); creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy)); Identifiers identifiers = new Identifiers(); @@ -188,7 +189,29 @@ public static Policy getPolicyJustCreditCard(String policyName) throws IOExcepti Policy policy = new Policy(); policy.setName(policyName); policy.setIdentifiers(identifiers); - policy.setIgnored(Arrays.asList(ignored)); + policy.setIgnored(List.of(ignored)); + + return policy; + + } + + public static Policy getPolicyJustCreditCard(String policyName) { + + CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy(); + + CreditCard creditCard = new CreditCard(); + creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy)); + + Identifiers identifiers = new Identifiers(); + identifiers.setCreditCard(creditCard); + + Ignored ignored = new Ignored(); + ignored.setTerms(Arrays.asList("4121742025464400", "12341341234", "2423543545")); + + Policy policy = new Policy(); + policy.setName(policyName); + policy.setIdentifiers(identifiers); + policy.setIgnored(List.of(ignored)); return policy; diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/CreditCardFilterTest.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/CreditCardFilterTest.java index 7de267b3..01addfe7 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/CreditCardFilterTest.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/filters/CreditCardFilterTest.java @@ -31,7 +31,7 @@ public class CreditCardFilterTest extends AbstractFilterTest { - private AlertService alertService = Mockito.mock(AlertService.class); + private final AlertService alertService = Mockito.mock(AlertService.class); @Test public void filterCreditCardOnlyValid() throws Exception { @@ -43,7 +43,7 @@ public void filterCreditCardOnlyValid() throws Exception { .withWindowSize(windowSize) .build(); - final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, true); + final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, true, false); // VISA @@ -107,7 +107,7 @@ public void filterCreditCardValidAndInvalid() throws Exception { .withWindowSize(windowSize) .build(); - final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false); + final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false, false); // VISA @@ -170,7 +170,7 @@ public void filterCreditCardBorderedByDashes() throws Exception { .withWindowSize(windowSize) .build(); - final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false); + final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false, false); final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "the payment method is 1234567812345678- visa.", attributes); Assertions.assertEquals(1, filterResult.getSpans().size()); diff --git a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/CreditCard.java b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/CreditCard.java index 062f7d73..456dbf1c 100644 --- a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/CreditCard.java +++ b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/filters/CreditCard.java @@ -31,6 +31,9 @@ public class CreditCard extends AbstractFilter { @Expose private List creditCardFilterStrategies; + @SerializedName("ignoreWhenInUnixTimestamp") + @Expose private boolean ignoreWhenInUnixTimestamp = false; + public List getCreditCardFilterStrategies() { return creditCardFilterStrategies; } @@ -47,4 +50,12 @@ public void setOnlyValidCreditCardNumbers(boolean onlyValidCreditCardNumbers) { this.onlyValidCreditCardNumbers = onlyValidCreditCardNumbers; } + public boolean isIgnoreWhenInUnixTimestamp() { + return ignoreWhenInUnixTimestamp; + } + + public void setIgnoreWhenInUnixTimestamp(boolean ignoreWhenInUnixTimestamp) { + this.ignoreWhenInUnixTimestamp = ignoreWhenInUnixTimestamp; + } + } \ No newline at end of file