Skip to content

Commit

Permalink
Adding flag for removing credit cards in Unix timestamp (#137)
Browse files Browse the repository at this point in the history
* #130 Adding option to ignore credit cards when in a Unix timestamp.

* #130 Adding option to ignore credit cards when in a Unix timestamp.

* #130 Adding property to docs.
  • Loading branch information
jzonthemtn authored Sep 3, 2024
1 parent 45c567d commit 0a1aab1
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This filter has no required parameters.
| `enabled` | When set to false, the filter will be disabled and not applied | `true` |
| `ignored` | A list of terms to be ignored by the filter. | None |
| `onlyValidCreditCardNumbers` | When set to true, only valid credit card numbers will be filtered. | `true` |
| `ignoreWhenInUnixTimestamp` | When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. | `false` |

### Filter Strategies

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
.build();

final boolean onlyValidCreditCardNumbers = policy.getIdentifiers().getCreditCard().isOnlyValidCreditCardNumbers();
final boolean ignoreWhenInUnixTimestamp = policy.getIdentifiers().getCreditCard().isIgnoreWhenInUnixTimestamp();

final Filter filter = new CreditCardFilter(filterConfiguration, onlyValidCreditCardNumbers);
final Filter filter = new CreditCardFilter(filterConfiguration, onlyValidCreditCardNumbers, ignoreWhenInUnixTimestamp);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.CREDIT_CARD, filter);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
import ai.philterd.phileas.model.policy.Policy;
import org.apache.commons.validator.routines.checkdigit.LuhnCheckDigit;

import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
Expand All @@ -36,12 +37,18 @@ public class CreditCardFilter extends RegexFilter {

private final boolean onlyValidCreditCardNumbers;
private final LuhnCheckDigit luhnCheckDigit;
private final boolean ignoreWhenInUnixTimestamp;

private final String UNIX_TIMESTAMP_REGEX = "1[5-8][0-9]{11}";

public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyValidCreditCardNumbers,
boolean ignoreWhenInUnixTimestamp) {

public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyValidCreditCardNumbers) {
super(FilterType.CREDIT_CARD, filterConfiguration);

this.onlyValidCreditCardNumbers = onlyValidCreditCardNumbers;
this.luhnCheckDigit = new LuhnCheckDigit();
this.ignoreWhenInUnixTimestamp = ignoreWhenInUnixTimestamp;

// Modify the confidence based on the characters around the span.
final List<ConfidenceModifier> confidenceModifiers = List.of(
Expand All @@ -50,8 +57,9 @@ public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyVal
new ConfidenceModifier(0.5, ConfidenceModifier.ConfidenceCondition.CHARACTER_SEQUENCE_SURROUNDING, "-"));

// See http://regular-expressions.info/creditcard.html
final Pattern creditCardPattern = Pattern.compile("\\b(?:\\d[ -]*?){13,16}\\b", Pattern.CASE_INSENSITIVE);
final FilterPattern creditcard = new FilterPattern.FilterPatternBuilder(creditCardPattern, 0.90, confidenceModifiers).build();
final Pattern creditCard = Pattern.compile("\\b(?:\\d[ -]*?){13,16}\\b", Pattern.CASE_INSENSITIVE);
final FilterPattern creditCardPattern = new FilterPattern.FilterPatternBuilder(creditCard, 0.90)
.withConfidenceModifiers(confidenceModifiers).build();

this.contextualTerms = new HashSet<>();
this.contextualTerms.add("credit");
Expand All @@ -62,7 +70,7 @@ public CreditCardFilter(FilterConfiguration filterConfiguration, boolean onlyVal
this.contextualTerms.add("jcb");
this.contextualTerms.add("diners");

this.analyzer = new Analyzer(contextualTerms, creditcard);
this.analyzer = new Analyzer(contextualTerms, creditCardPattern);

}

Expand All @@ -71,28 +79,36 @@ public FilterResult filter(Policy policy, String context, String documentId, int

final List<Span> spans = findSpans(policy, analyzer, input, context, documentId, attributes);

final List<Span> validSpans = new LinkedList<>();
if (ignoreWhenInUnixTimestamp) {

final Collection<Span> spansInUnixTimestamps =
spans
.stream()
.filter(s -> s.getText().matches(UNIX_TIMESTAMP_REGEX))
.toList();

spans.removeAll(spansInUnixTimestamps);

}

for(final Span span : spans) {
if (onlyValidCreditCardNumbers) {

final String creditCardNumber = input.substring(span.getCharacterStart(), span.getCharacterEnd())
.replaceAll(" ", "")
.replaceAll("-", "");
for(final Span span : spans) {

if(onlyValidCreditCardNumbers) {
final String creditCardNumber = input.substring(span.getCharacterStart(), span.getCharacterEnd())
.replaceAll(" ", "")
.replaceAll("-", "");

if(luhnCheckDigit.isValid(creditCardNumber)) {
validSpans.add(span);
if (!luhnCheckDigit.isValid(creditCardNumber)) {
spans.remove(span);
}

} else {
validSpans.add(span);
}

}

return new FilterResult(context, documentId, validSpans);
return new FilterResult(context, documentId, spans);

}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,30 @@ public void endToEndJustCreditCard() throws Exception {

}

@Test
public void endToEndJustCreditCardInUnixTimestamp() throws Exception {

final Path temp = Files.createTempDirectory("philter");

final File file2 = Paths.get(temp.toFile().getAbsolutePath(), "justcreditcard.json").toFile();
LOGGER.info("Writing policy to {}", file2.getAbsolutePath());
FileUtils.writeStringToFile(file2, gson.toJson(getPolicyJustCreditCardNotInUnixTimestamps("justcreditcard")), Charset.defaultCharset());

Properties properties = new Properties();
properties.setProperty("indexes.directory", INDEXES_DIRECTORY);
properties.setProperty("filter.policies.directory", temp.toFile().getAbsolutePath());

final PhileasConfiguration phileasConfiguration = new PhileasConfiguration(properties);

final PhileasFilterService service = new PhileasFilterService(phileasConfiguration);
final FilterResponse response = service.filter(List.of("justcreditcard"), "context", "documentid", "My cc is 1647725122227", MimeType.TEXT_PLAIN);

LOGGER.info(response.filteredText());
showSpans(response.explanation().identifiedSpans());
Assertions.assertEquals("My cc is 1647725122227", response.filteredText());

}

@Test
public void endToEndJustCreditCardWithIgnoredTerms() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,12 @@ public static Policy getPdfFilterWithPersonPolicy(String policyName) throws URIS

}

public static Policy getPolicyJustCreditCard(String policyName) throws IOException {
public static Policy getPolicyJustCreditCardNotInUnixTimestamps(String policyName) {

CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy();

CreditCard creditCard = new CreditCard();
creditCard.setIgnoreWhenInUnixTimestamp(true);
creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy));

Identifiers identifiers = new Identifiers();
Expand All @@ -188,7 +189,29 @@ public static Policy getPolicyJustCreditCard(String policyName) throws IOExcepti
Policy policy = new Policy();
policy.setName(policyName);
policy.setIdentifiers(identifiers);
policy.setIgnored(Arrays.asList(ignored));
policy.setIgnored(List.of(ignored));

return policy;

}

public static Policy getPolicyJustCreditCard(String policyName) {

CreditCardFilterStrategy creditCardFilterStrategy = new CreditCardFilterStrategy();

CreditCard creditCard = new CreditCard();
creditCard.setCreditCardFilterStrategies(Arrays.asList(creditCardFilterStrategy));

Identifiers identifiers = new Identifiers();
identifiers.setCreditCard(creditCard);

Ignored ignored = new Ignored();
ignored.setTerms(Arrays.asList("4121742025464400", "12341341234", "2423543545"));

Policy policy = new Policy();
policy.setName(policyName);
policy.setIdentifiers(identifiers);
policy.setIgnored(List.of(ignored));

return policy;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

public class CreditCardFilterTest extends AbstractFilterTest {

private AlertService alertService = Mockito.mock(AlertService.class);
private final AlertService alertService = Mockito.mock(AlertService.class);

@Test
public void filterCreditCardOnlyValid() throws Exception {
Expand All @@ -43,7 +43,7 @@ public void filterCreditCardOnlyValid() throws Exception {
.withWindowSize(windowSize)
.build();

final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, true);
final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, true, false);

// VISA

Expand Down Expand Up @@ -107,7 +107,7 @@ public void filterCreditCardValidAndInvalid() throws Exception {
.withWindowSize(windowSize)
.build();

final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false);
final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false, false);

// VISA

Expand Down Expand Up @@ -170,7 +170,7 @@ public void filterCreditCardBorderedByDashes() throws Exception {
.withWindowSize(windowSize)
.build();

final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false);
final CreditCardFilter filter = new CreditCardFilter(filterConfiguration, false, false);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "the payment method is 1234567812345678- visa.", attributes);
Assertions.assertEquals(1, filterResult.getSpans().size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public class CreditCard extends AbstractFilter {
@Expose
private List<CreditCardFilterStrategy> creditCardFilterStrategies;

@SerializedName("ignoreWhenInUnixTimestamp")
@Expose private boolean ignoreWhenInUnixTimestamp = false;

public List<CreditCardFilterStrategy> getCreditCardFilterStrategies() {
return creditCardFilterStrategies;
}
Expand All @@ -47,4 +50,12 @@ public void setOnlyValidCreditCardNumbers(boolean onlyValidCreditCardNumbers) {
this.onlyValidCreditCardNumbers = onlyValidCreditCardNumbers;
}

public boolean isIgnoreWhenInUnixTimestamp() {
return ignoreWhenInUnixTimestamp;
}

public void setIgnoreWhenInUnixTimestamp(boolean ignoreWhenInUnixTimestamp) {
this.ignoreWhenInUnixTimestamp = ignoreWhenInUnixTimestamp;
}

}

0 comments on commit 0a1aab1

Please sign in to comment.