Skip to content

Commit

Permalink
Add an adhoc check in the HtmlTagIntegrityChecker
Browse files Browse the repository at this point in the history
This is very adhoc and will require a better implementation if there are more of those type of issue.
Also expecting for most of them to go away.
  • Loading branch information
ja-openai committed Oct 1, 2024
1 parent 1a71427 commit 02837b6
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ public String getRegex() {
public void check(String sourceContent, String targetContent) throws IntegrityCheckException {
logger.debug("Get Html tags of the target");
List<String> targetHtmlTags = getHtmlTags(targetContent);
logger.debug("Target Html tags: {}", targetHtmlTags);
logger.info("Target Html tags: {}", targetHtmlTags);

logger.debug("Get Html tags of the source");
List<String> sourceHtmlTags = getHtmlTags(sourceContent);
logger.debug("Source Html tags: {}", sourceHtmlTags);
logger.info("Source Html tags: {}", sourceHtmlTags);

Map<String, Long> sourceTagCount =
sourceHtmlTags.stream()
Expand Down Expand Up @@ -73,6 +73,19 @@ public void check(String sourceContent, String targetContent) throws IntegrityCh
if (!isValidTagOrder(targetHtmlTags)) {
throw new HtmlTagIntegrityCheckerException("HTML tags in target are not in valid order");
}

logger.debug("Ad-hoc checks");
checkDoubleAnnotationElements(sourceContent, targetContent);
}

/** Adhoc check to unblock. Eventually needs a better solution. */
void checkDoubleAnnotationElements(String sourceContent, String targetContent) {
String doubleAnnotationString = "<annotation <annotation";
if (!sourceContent.contains(doubleAnnotationString)
&& targetContent.contains(doubleAnnotationString)) {
throw new HtmlTagIntegrityCheckerException(
"Target must not contain: '%s'".formatted(doubleAnnotationString));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ public void testHtmlTagCheckCloseTagsSame() {
checker.check(source, target);
}

@Test(expected = HtmlTagIntegrityCheckerException.class)
public void testHtmlTagCheckWithDuplicatedOpening() {
String source = "<annotation url=\"https://something.com\">text</annotation>";
String target = "<annotation &lt;annotation url=\"https://something.com\">text</annotation>";

checker.check(source, target);
}

@Test
public void testHtmlTagCheckNonTagLessThanDoesntConfuseThings() {
String source = "Upload is <10% complete.";
Expand Down

0 comments on commit 02837b6

Please sign in to comment.