Skip to content

Commit

Permalink
(feat) regex: Pattern.split()
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-pelykh committed Jun 20, 2024
1 parent dba146f commit 25f986e
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 46 deletions.
131 changes: 85 additions & 46 deletions regex/src/main/java/org/pcre4j/regex/Pattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.pcre4j.Pcre2CompileError;
import org.pcre4j.Pcre2CompileOption;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -178,52 +179,90 @@ public String pattern() {

// TODO: quote(String s)

// /**
// * Splits the given input around matches of this pattern.
// *
// * @param input the input to split
// * @return the array of strings computed by splitting the input around matches of this pattern
// */
// public String[] split(CharSequence input) {
// return split(input, 0, false);
// }
//
// /**
// * Splits the given input around matches of this pattern.
// *
// * @param input the input to split
// * @param limit the maximum number of items to return
// * @return the array of strings computed by splitting the input around matches of this pattern
// */
// public String[] split(CharSequence input, int limit) {
// return split(input, limit, false);
// }
//
// /**
// * Splits the given input around matches of this pattern and returns both the strings and the matching delimiters.
// *
// * @param input the input to split
// * @param limit the maximum number of items to return
// * @return the array of strings and matching delimiters computed by splitting the input around matches of this
// * pattern
// */
// public String[] splitWithDelimiters(CharSequence input, int limit) {
// return split(input, limit, true);
// }
//
// /**
// * Splits the given input around matches of this pattern and returns either just the strings or both the strings
// * and the matching delimiters.
// *
// * @param input the input to split
// * @param limit the maximum number of items to return
// * @param includeDelimiters whether to include the matching delimiters in the result
// * @return the array of strings and optionally matching delimiters computed by splitting the input around matches
// * of this pattern
// */
// public String[] split(CharSequence input, int limit, boolean includeDelimiters) {
// // TODO:
// }
/**
* Splits the given input around matches of this pattern.
*
* @param input the input to split
* @return the array of strings computed by splitting the input around matches of this pattern
*/
public String[] split(CharSequence input) {
return split(input, 0, false);
}

/**
* Splits the given input around matches of this pattern.
*
* @param input the input to split
* @param limit the maximum number of items to return
* @return the array of strings computed by splitting the input around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
return split(input, limit, false);
}

/**
* Splits the given input around matches of this pattern and returns both the strings and the matching delimiters.
*
* @param input the input to split
* @param limit the maximum number of items to return
* @return the array of strings and matching delimiters computed by splitting the input around matches of this
* pattern
*/
public String[] splitWithDelimiters(CharSequence input, int limit) {
return split(input, limit, true);
}

/**
* Splits the given input around matches of this pattern and returns either just the strings or both the strings
* and the matching delimiters.
*
* @param input the input to split
* @param limit the maximum number of items to return
* @param includeDelimiters whether to include the matching delimiters in the result
* @return the array of strings and optionally matching delimiters computed by splitting the input around matches
* of this pattern
*/
public String[] split(CharSequence input, int limit, boolean includeDelimiters) {
final var matcher = matcher(input);
final var result = new ArrayList<String>();
var numMatches = 0;
var offset = 0;
while (matcher.find()) {
if (limit <= 0 || numMatches < limit - 1) {
if (offset == 0 && offset == matcher.start() && matcher.start() == matcher.end()) {
continue;
}
final var match = input.subSequence(offset, matcher.start()).toString();
result.add(match);
offset = matcher.end();
if (includeDelimiters) {
result.add(input.subSequence(matcher.start(), offset).toString());
}
numMatches += 1;
} else if (numMatches == limit - 1) {
final var match = input.subSequence(offset, input.length()).toString();
result.add(match);
offset = matcher.end();
numMatches += 1;
}
}

if (result.isEmpty()) {
return new String[]{input.toString()};
}

if (limit <= 0 || numMatches < limit) {
result.add(input.subSequence(offset, input.length()).toString());
}

var resultSize = result.size();
if (limit <= 0) {
while (resultSize > 0 && result.get(resultSize - 1).isEmpty()) {
resultSize--;
}
}
return result.subList(0, resultSize).toArray(new String[resultSize]);
}

// TODO: splitAsStream(CharSequence input)

Expand Down
13 changes: 13 additions & 0 deletions regex/src/test/java/org/pcre4j/regex/PatternTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.pcre4j.Pcre4j;
import org.pcre4j.jna.Pcre2;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
Expand All @@ -36,4 +37,16 @@ void namedGroups() {

assertEquals(javaPattern.namedGroups(), pcre4jPattern.namedGroups());
}

@Test
void split() {
var javaPattern = java.util.regex.Pattern.compile("\\D+");
var pcre4jPattern = Pattern.compile("\\D+");

var input = "0, 1, 1, 2, 3, 5, 8, ..., 144, ...";

assertArrayEquals(javaPattern.split(input), pcre4jPattern.split(input));
assertArrayEquals(javaPattern.split(input, 2), pcre4jPattern.split(input, 2));
assertArrayEquals(javaPattern.splitWithDelimiters(input, 0), pcre4jPattern.splitWithDelimiters(input, 0));
}
}

0 comments on commit 25f986e

Please sign in to comment.