Skip to content

Commit

Permalink
(feat) JIT
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-pelykh committed Jun 23, 2024
1 parent 3f656b0 commit 10ab7bd
Show file tree
Hide file tree
Showing 11 changed files with 370 additions and 55 deletions.
4 changes: 2 additions & 2 deletions PCRE2_API.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ Here's the list of the PCRE2 API functions exposed via `org.pcre4j.api.IPcre2` a
|| [pcre2_get_ovector_count](https://www.pcre.org/current/doc/html/pcre2_get_ovector_count.html) | Get the ovector count |
|| [pcre2_get_ovector_pointer](https://www.pcre.org/current/doc/html/pcre2_get_ovector_pointer.html) | Get a pointer to the ovector |
| | [pcre2_get_startchar](https://www.pcre.org/current/doc/html/pcre2_get_startchar.html) | Get the starting character offset |
| | [pcre2_jit_compile](https://www.pcre.org/current/doc/html/pcre2_jit_compile.html) | Process a compiled pattern with the JIT compiler |
| | [pcre2_jit_compile](https://www.pcre.org/current/doc/html/pcre2_jit_compile.html) | Process a compiled pattern with the JIT compiler |
| | [pcre2_jit_free_unused_memory](https://www.pcre.org/current/doc/html/pcre2_jit_free_unused_memory.html) | Free unused JIT memory |
| | [pcre2_jit_match](https://www.pcre.org/current/doc/html/pcre2_jit_match.html) | Fast path interface to JIT matching |
| | [pcre2_jit_match](https://www.pcre.org/current/doc/html/pcre2_jit_match.html) | Fast path interface to JIT matching |
| | [pcre2_jit_stack_assign](https://www.pcre.org/current/doc/html/pcre2_jit_stack_assign.html) | Assign stack for JIT matching |
| | [pcre2_jit_stack_create](https://www.pcre.org/current/doc/html/pcre2_jit_stack_create.html) | Create a stack for JIT matching |
| | [pcre2_jit_stack_free](https://www.pcre.org/current/doc/html/pcre2_jit_stack_free.html) | Free a JIT matching stack |
Expand Down
31 changes: 18 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,7 @@ Add the following dependencies to your `pom.xml` file:
Proceed using the PCRE4J library in your Java code:

```java
import org.pcre4j.Pcre2Code;
import org.pcre4j.Pcre2CompileOption;
import org.pcre4j.Pcre2MatchData;
import org.pcre4j.Pcre2MatchOption;
import org.pcre4j.Pcre4j;
import org.pcre4j.Pcre4jUtils;
import org.pcre4j.*;
// TODO: Select one of the following imports for the backend you want to use:
import org.pcre4j.jna.Pcre2;
// import org.pcre4j.ffm.Pcre2;
Expand All @@ -107,11 +102,21 @@ public class Usage {
}

public static String[] example(String pattern, String subject) {
final var code = new Pcre2Code(
pattern,
EnumSet.noneOf(Pcre2CompileOption.class),
null
);
final Pcre2Code code;
if (Pcre4jUtils.isJitSupported(Pcre4j.api())) {
code = new Pcre2JitCode(
pattern,
EnumSet.noneOf(Pcre2CompileOption.class),
null,
null
);
} else {
code = new Pcre2Code(
pattern,
EnumSet.noneOf(Pcre2CompileOption.class),
null
);
}
final var matchData = new Pcre2MatchData(code);
code.match(
subject,
Expand Down Expand Up @@ -175,8 +180,8 @@ The PCRE4J library supports several backends to invoke the `pcre2` API.
### `jna`

The `jna` backend uses the [Java Native Access](https://github.com/java-native-access/jna) library to invoke the `pcre2`
shared library. For this backend to work, the `pcre2` shared library must be installed on the system and be visible to
the JNA.
shared library. For this backend to work, the `pcre2` shared library must be installed on the system and be visible via
`jna.library.path`.

### `ffm`

Expand Down
23 changes: 23 additions & 0 deletions api/src/main/java/org/pcre4j/api/IPcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,29 @@ public interface IPcre2 {
*/
public int patternInfo(long code, int what, ByteBuffer where);

/**
* JIT-compile a compiled pattern.
*
* @param code the compiled pattern handle
* @param options option bits
* @return 0 on success, otherwise a negative error code
*/
public int jitCompile(long code, int options);

/**
* Match a compiled pattern against a subject string.
*
* @param code the compiled pattern handle
* @param subject the subject string
* @param startoffset the starting offset in the subject string
* @param options option bits
* @param matchData the match data handle
* @param mcontext the match context handle
* @return the number of captures plus one, zero if the {@code matchData} is too small, or a negative value if there
* was no match or an actual error occurred
*/
public int jitMatch(long code, String subject, int startoffset, int options, long matchData, long mcontext);

/**
* Create a new match data block.
*
Expand Down
66 changes: 66 additions & 0 deletions ffm/src/main/java/org/pcre4j/ffm/Pcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ public class Pcre2 implements IPcre2 {
private final MethodHandle pcre2_get_error_message;
private final MethodHandle pcre2_pattern_info;

private final MethodHandle pcre2_jit_compile;
private final MethodHandle pcre2_jit_match;

private final MethodHandle pcre2_match_data_create;
private final MethodHandle pcre2_match_data_create_from_pattern;
private final MethodHandle pcre2_match_data_free;
Expand Down Expand Up @@ -178,6 +181,27 @@ public Pcre2(String library, String suffix) {
)
);

pcre2_jit_compile = LINKER.downcallHandle(
SYMBOL_LOOKUP.find("pcre2_jit_compile" + suffix).orElseThrow(),
FunctionDescriptor.of(ValueLayout.JAVA_INT, // int
ValueLayout.ADDRESS, // pcre2_code*
ValueLayout.JAVA_INT // int
)
);

pcre2_jit_match = LINKER.downcallHandle(
SYMBOL_LOOKUP.find("pcre2_jit_match" + suffix).orElseThrow(),
FunctionDescriptor.of(ValueLayout.JAVA_INT, // int
ValueLayout.ADDRESS, // pcre2_code*
ValueLayout.ADDRESS, // PCRE2_SPTR
ValueLayout.ADDRESS, // PCRE2_SIZE
ValueLayout.ADDRESS, // PCRE2_SIZE
ValueLayout.JAVA_INT, // int
ValueLayout.ADDRESS, // pcre2_match_data*
ValueLayout.ADDRESS // pcre2_match_context*
)
);

pcre2_match_data_create = LINKER.downcallHandle(
SYMBOL_LOOKUP.find("pcre2_match_data_create" + suffix).orElseThrow(),
FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_data*
Expand Down Expand Up @@ -569,6 +593,48 @@ public int patternInfo(long code, int what, ByteBuffer where) {
}
}

@Override
public int jitCompile(long code, int options) {
try (var arena = Arena.ofConfined()) {
final var pCode = MemorySegment.ofAddress(code);

return (int) pcre2_jit_compile.invokeExact(
pCode,
options
);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}

@Override
public int jitMatch(long code, String subject, int startoffset, int options, long matchData, long mcontext) {
if (subject == null) {
throw new IllegalArgumentException("subject must not be null");
}

try (var arena = Arena.ofConfined()) {
final var pCode = MemorySegment.ofAddress(code);
final var pszSubject = arena.allocateUtf8String(subject);
final var subjectLength = MemorySegment.ofAddress(pszSubject.byteSize() - 1);
final var startOffset = MemorySegment.ofAddress(startoffset);
final var pMatchData = MemorySegment.ofAddress(matchData);
final var pMatchContext = MemorySegment.ofAddress(mcontext);

return (int) pcre2_jit_match.invokeExact(
pCode,
pszSubject,
subjectLength,
startOffset,
options,
pMatchData,
pMatchContext
);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}

@Override
public long matchDataCreate(int ovecsize, long gcontext) {
try (var arena = Arena.ofConfined()) {
Expand Down
36 changes: 36 additions & 0 deletions jna/src/main/java/org/pcre4j/jna/Pcre2.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,30 @@ public int patternInfo(long code, int what, ByteBuffer where) {
return result;
}

@Override
public int jitCompile(long code, int options) {
return library.pcre2_jit_compile(new Pointer(code), options);
}

@Override
public int jitMatch(long code, String subject, int startoffset, int options, long matchData, long mcontext) {
if (subject == null) {
throw new IllegalArgumentException("subject must not be null");
}

final var pszSubject = subject.getBytes(StandardCharsets.UTF_8);

return library.pcre2_jit_match(
new Pointer(code),
pszSubject,
pszSubject.length,
startoffset,
options,
new Pointer(matchData),
new Pointer(mcontext)
);
}

@Override
public long matchDataCreate(int ovecsize, long gcontext) {
Pointer matchData = library.pcre2_match_data_create(ovecsize, new Pointer(gcontext));
Expand Down Expand Up @@ -336,6 +360,18 @@ Pointer pcre2_compile(

int pcre2_pattern_info(Pointer code, int what, Pointer where);

int pcre2_jit_compile(Pointer code, int options);

int pcre2_jit_match(
Pointer code,
byte[] subject,
long length,
long startoffset,
int options,
Pointer matchData,
Pointer mcontext
);

Pointer pcre2_match_data_create(int ovecsize, Pointer gcontext);

Pointer pcre2_match_data_create_from_pattern(Pointer code, Pointer gcontext);
Expand Down
36 changes: 18 additions & 18 deletions lib/src/main/java/org/pcre4j/Pcre2Code.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

import org.pcre4j.api.IPcre2;

/**
* A compiled pattern.
*/
public class Pcre2Code {

private static final Cleaner cleaner = Cleaner.create();
Expand All @@ -45,10 +48,22 @@ public class Pcre2Code {
* Constructor for Pcre2Code
*
* @param pattern the pattern to compile
* @param options the flags to compile the pattern with, see {@link Pcre2CompileOption}
* @param options the flags to compile the pattern with, see {@link Pcre2CompileOption} or null for default
* options
* @param compileContext the compile context to use or null
*/
public Pcre2Code(String pattern, EnumSet<Pcre2CompileOption> options, Pcre2CompileContext compileContext) {
public Pcre2Code(
String pattern,
EnumSet<Pcre2CompileOption> options,
Pcre2CompileContext compileContext
) {
if (pattern == null) {
throw new IllegalArgumentException("pattern cannot be null");
}
if (options == null) {
options = EnumSet.noneOf(Pcre2CompileOption.class);
}

final var api = Pcre4j.api();

final var errorcode = new int[1];
Expand Down Expand Up @@ -413,25 +428,10 @@ public int match(
throw new IllegalArgumentException("matchData must not be null");
}

// For the UTF-8, convert the startOffset from characters to bytes
var startOffsetInBytes = 0;
for (var charIndex = 0; charIndex < startOffset; charIndex++) {
final var theChar = subject.charAt(charIndex);
if (theChar <= 0x007F) {
startOffsetInBytes += 1;
} else if (theChar <= 0x07FF) {
startOffsetInBytes += 2;
} else if (Character.isHighSurrogate(theChar) || Character.isLowSurrogate(theChar)) {
startOffsetInBytes += 2;
} else {
startOffsetInBytes += 3;
}
}

return api.match(
handle,
subject,
startOffsetInBytes,
Pcre4jUtils.convertCharacterIndexToByteOffset(subject, startOffset),
options
.stream()
.mapToInt(Pcre2MatchOption::value)
Expand Down
98 changes: 98 additions & 0 deletions lib/src/main/java/org/pcre4j/Pcre2JitCode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package org.pcre4j;

import java.util.EnumSet;

/**
* A JIT-compiled pattern.
*/
public class Pcre2JitCode extends Pcre2Code {

/**
* The supported match options for JIT-compiled patterns.
*/
private final static EnumSet<Pcre2MatchOption> SUPPORTED_MATCH_OPTIONS = EnumSet.of(
Pcre2MatchOption.NOTBOL,
Pcre2MatchOption.NOTEOL,
Pcre2MatchOption.NOTEMPTY,
Pcre2MatchOption.NOTEMPTY_ATSTART,
Pcre2MatchOption.PARTIAL_HARD,
Pcre2MatchOption.PARTIAL_SOFT
);

/**
* Get the supported match options for JIT-compiled patterns.
*
* @return the supported match options
*/
public static EnumSet<Pcre2MatchOption> getSupportedMatchOptions() {
return EnumSet.copyOf(SUPPORTED_MATCH_OPTIONS);
}

/**
* Constructor for Pcre2JitCode
*
* @param pattern the pattern to compile
* @param options the flags to compile the pattern with, see {@link Pcre2CompileOption} or null for default
* options
* @param jitOptions the flags to compile the pattern with JIT, see {@link Pcre2JitOption} or null for default
* options
* @param compileContext the compile context to use or null
*/
public Pcre2JitCode(
String pattern,
EnumSet<Pcre2CompileOption> options,
EnumSet<Pcre2JitOption> jitOptions,
Pcre2CompileContext compileContext
) {
super(pattern, options, compileContext);

if (jitOptions == null) {
jitOptions = EnumSet.of(
Pcre2JitOption.COMPLETE,
Pcre2JitOption.PARTIAL_SOFT,
Pcre2JitOption.PARTIAL_HARD
);
}

final var jitResult = api.jitCompile(
handle,
jitOptions
.stream()
.mapToInt(Pcre2JitOption::value).sum()
);
if (jitResult != 0) {
throw new IllegalStateException(Pcre4jUtils.getErrorMessage(api, jitResult));
}
}

@Override
public int match(
String subject,
int startOffset,
EnumSet<Pcre2MatchOption> options,
Pcre2MatchData matchData,
Pcre2MatchContext matchContext
) {
if (subject == null) {
throw new IllegalArgumentException("subject must not be null");
}
if (startOffset < 0) {
throw new IllegalArgumentException("startOffset must be greater than or equal to zero");
}
if (startOffset >= subject.length()) {
throw new IllegalArgumentException("startOffset must be less than the length of the subject");
}
if (matchData == null) {
throw new IllegalArgumentException("matchData must not be null");
}

return api.jitMatch(
handle,
subject,
Pcre4jUtils.convertCharacterIndexToByteOffset(subject, startOffset),
options.stream().mapToInt(Pcre2MatchOption::value).sum(),
matchData.handle,
matchContext != null ? matchContext.handle : 0
);
}
}
Loading

0 comments on commit 10ab7bd

Please sign in to comment.