From 2bfdf899f6a29ad5ceaf3bcd1288ffed82fec5d5 Mon Sep 17 00:00:00 2001 From: Osiris Team Date: Tue, 18 Jun 2024 14:38:35 +0200 Subject: [PATCH] 3.0.2 - NEW: support for existingChromeBrowserUrl - NEW: hardcoded versions for playwright, pupetter, their extra variants, puppeteer-extra-plugin-stealth and nodejs - NEW: added convenience static class HB --- README.md | 15 +++--- pom.xml | 2 +- .../java/com/osiris/headlessbrowser/HB.java | 33 +++++++++++++ .../com/osiris/headlessbrowser/Versions.java | 10 ++++ .../js/contexts/NodeContext.java | 20 +++++--- .../windows/PlaywrightWindow.java | 46 +++++++++++-------- .../windows/PuppeteerWindow.java | 7 +-- .../windows/WindowBuilder.java | 18 +++++++- .../js/contexts/NodeContextTest.java | 3 +- 9 files changed, 115 insertions(+), 39 deletions(-) create mode 100644 src/main/java/com/osiris/headlessbrowser/HB.java create mode 100644 src/main/java/com/osiris/headlessbrowser/Versions.java diff --git a/README.md b/README.md index feb9e06..05a1491 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,7 @@ Add this to your project with [Maven/Gradle/Sbt/Leinigen](https://jitpack.io/#Os (Java 8 or higher required). ```java -HBrowser browser = new HBrowser(); -try(PlaywrightWindow window = browser.openWindow()){ +try(PlaywrightWindow window = HB.newWin()){ window.load("https://example.com"); // ... } @@ -22,8 +21,7 @@ Note that the first run may take a bit because Node.js and its modules get insta - simulating real user input. - **Integrated evasions for headless detection:** ```java -HBrowser b = new HBrowser(); -try (PlaywrightWindow w = b.openCustomWindow() +try (PlaywrightWindow w = HB.newWinBuilder() .headless(true).makeUndetectable(true).buildPlaywrightWindow()) { w.load("https://infosimples.github.io/detect-headless/"); @@ -42,11 +40,12 @@ Playwright is the default and recommended browser driver to use, since it suppor and more of its features were ported to Java. Checkout [JG-Browser](https://github.com/Osiris-Team/JG-Browser) for a browser completely written in Java. -| Name | Version| JS-Engine | Downloads | -| :-----: | :-----: | :-----: | :-----: -| [Playwright](https://github.com/microsoft/playwright)| Latest | Node.js/V8 | Yes | No | -| [Puppeteer](https://github.com/puppeteer/puppeteer) | Latest | Node.js/V8 | No | No | +| Name | JS-Engine | Downloads | +| :-----: |:---------------:| :-----: +| [Playwright](https://github.com/microsoft/playwright)| Node.js/V8 | Yes | No | +| [Puppeteer](https://github.com/puppeteer/puppeteer) | Node.js/V8 | No | No | +You can find their versions in [this class](), which also allows you to set custom versions. (JS = JavaScript; Downloads = If the browser is able to download files other than html/xml/pdf;) ### Contribute/Build diff --git a/pom.xml b/pom.xml index 7f275ba..4bd9087 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.osiris.headlessbrowser Headless-Browser - 3.0.1 + 3.0.2 jitpack diff --git a/src/main/java/com/osiris/headlessbrowser/HB.java b/src/main/java/com/osiris/headlessbrowser/HB.java new file mode 100644 index 0000000..b167915 --- /dev/null +++ b/src/main/java/com/osiris/headlessbrowser/HB.java @@ -0,0 +1,33 @@ +package com.osiris.headlessbrowser; + +import com.osiris.headlessbrowser.exceptions.NodeJsCodeException; +import com.osiris.headlessbrowser.windows.HWindow; +import com.osiris.headlessbrowser.windows.PlaywrightWindow; +import com.osiris.headlessbrowser.windows.WindowBuilder; + +import java.io.IOException; + +public class HB { + public static HBrowser globalHBrowserInstace = new HBrowser(); + + /** + * @see HBrowser#openWindow() + */ + public static PlaywrightWindow newWin() { + return new WindowBuilder(globalHBrowserInstace).buildPlaywrightWindow(); + } + + /** + * @see HBrowser#openWindowAndLoad(String) + */ + public PlaywrightWindow newWin(String url) throws IOException, NodeJsCodeException { + return globalHBrowserInstace.openWindow().load(url); + } + + /** + * @see HBrowser#openCustomWindow() + */ + public WindowBuilder newWinBuilder() { + return new WindowBuilder(globalHBrowserInstace); + } +} diff --git a/src/main/java/com/osiris/headlessbrowser/Versions.java b/src/main/java/com/osiris/headlessbrowser/Versions.java new file mode 100644 index 0000000..75908d5 --- /dev/null +++ b/src/main/java/com/osiris/headlessbrowser/Versions.java @@ -0,0 +1,10 @@ +package com.osiris.headlessbrowser; + +public class Versions { + public static String NODEJS = "22.3.0"; + public static String PLAYWRIGHT = "1.44.1"; + public static String PLAYWRIGHT_EXTRA = "4.3.6"; + public static String PUPPETEER = "22.11.1"; + public static String PUPPETEER_EXTRA = "3.3.6"; + public static String PUPPETEER_EXTRA_STEALTH_PLUGIN = "2.11.2"; +} diff --git a/src/main/java/com/osiris/headlessbrowser/js/contexts/NodeContext.java b/src/main/java/com/osiris/headlessbrowser/js/contexts/NodeContext.java index fd57ff8..09d6858 100644 --- a/src/main/java/com/osiris/headlessbrowser/js/contexts/NodeContext.java +++ b/src/main/java/com/osiris/headlessbrowser/js/contexts/NodeContext.java @@ -2,11 +2,13 @@ import com.osiris.autoplug.core.UtilsFiles; import com.osiris.betterthread.BThreadManager; +import com.osiris.headlessbrowser.Versions; import com.osiris.headlessbrowser.exceptions.NodeJsCodeException; import com.osiris.headlessbrowser.utils.*; import net.lingala.zip4j.ZipFile; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; import org.jline.utils.OSUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -78,7 +80,7 @@ public NodeContext(File parentNodeDir, OutputStream debugOutput, int timeout) { try { // Download and install NodeJS into current working directory if no installation found - install(false); + install(Versions.NODEJS, false); File nodeExeFile, npmExeFile, npxExeFile; if (TYPE.equals(OS.Type.WINDOWS)) { @@ -183,7 +185,7 @@ private void updatePath(ProcessBuilder processBuilder, File exeFile) { * * @param force if true force-installs the latest release. */ - public void install(boolean force) throws Exception { + public void install(@Nullable String version, boolean force) throws Exception { if (!force) { // Don't install if already done. if (installationDir.listFiles() != null && installationDir.listFiles().length != 0) { @@ -194,14 +196,20 @@ public void install(boolean force) throws Exception { close(); // If this node context is still running if(installationDir.exists()) FileUtils.deleteDirectory(installationDir); installationDir.mkdirs(); - String url = "https://nodejs.org/dist/latest/"; - debugOutput.println("Installing latest NodeJS release from '" + url + "'..."); + + String url; + if(version != null){ + url = "https://nodejs.org/dist/v"+version+"/"; + } else{ + url = "https://nodejs.org/dist/latest/"; + } + debugOutput.println("Installing NodeJS release from '" + url + "'..."); debugOutput.println("This devices' details: "+TYPE.name+" "+ ARCH.name()+" ("+ Utils.toString(ARCH.altNames)+")"); - Document docLatest = Jsoup.connect(url).get(); + Document doc = Jsoup.connect(url).get(); String downloadUrl = null; for (Element e : - docLatest.getElementsByTag("a")) { + doc.getElementsByTag("a")) { String attr = e.attr("href"); if (isCorrectFileForOs(attr.replace(url, ""))) { downloadUrl = url + attr; diff --git a/src/main/java/com/osiris/headlessbrowser/windows/PlaywrightWindow.java b/src/main/java/com/osiris/headlessbrowser/windows/PlaywrightWindow.java index 417434f..0d1b2d9 100644 --- a/src/main/java/com/osiris/headlessbrowser/windows/PlaywrightWindow.java +++ b/src/main/java/com/osiris/headlessbrowser/windows/PlaywrightWindow.java @@ -5,6 +5,7 @@ import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.osiris.headlessbrowser.HBrowser; +import com.osiris.headlessbrowser.Versions; import com.osiris.headlessbrowser.data.chrome.ChromeHeaders; import com.osiris.headlessbrowser.exceptions.NodeJsCodeException; import com.osiris.headlessbrowser.js.contexts.NodeContext; @@ -44,7 +45,7 @@ public class PlaywrightWindow implements HWindow { * Use the {@link WindowBuilder} instead. The {@link HBrowser} has a shortcut method for creating custom windows: {@link HBrowser#openCustomWindow()}. */ public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, OutputStream debugOutput, int jsTimeout, - boolean isHeadless, File userDataDir, boolean isDevTools, boolean makeUndetectable, boolean temporaryUserDataDir) { + boolean isHeadless, File userDataDir, boolean isDevTools, boolean makeUndetectable, boolean temporaryUserDataDir, String existingChromeBrowserUrl) { this.parentBrowser = parentBrowser; if (debugOutput == null) debugOutput = new TrashOutput(); @@ -55,14 +56,14 @@ public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, Output this.temporaryUserDataDir = temporaryUserDataDir; this.jsContext = new NodeContext(new File(userDataDir.getParentFile() + "/node-js"), debugOutput, jsTimeout); try { - jsContext.npmInstall("playwright"); - jsContext.executeNpxWithArgs("playwright", "install"); + jsContext.npmInstall("playwright@"+Versions.PLAYWRIGHT); + jsContext.executeNpxWithArgs("playwright@"+Versions.PLAYWRIGHT, "install"); // TODO this installs all browsers (firefox and webkit), but we only need chrome // Define global variables/constants if (makeUndetectable) { - jsContext.npmInstall("playwright-extra"); - jsContext.npmInstall("puppeteer-extra-plugin-stealth"); + jsContext.npmInstall("playwright-extra@"+ Versions.PLAYWRIGHT_EXTRA); + jsContext.npmInstall("puppeteer-extra-plugin-stealth@"+ Versions.PUPPETEER_EXTRA_STEALTH_PLUGIN); } jsContext.executeJavaScript( "const { chromium } = require('" + (makeUndetectable ? "playwright-extra" : "playwright") + "');\n" + @@ -88,20 +89,27 @@ public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, Output downloadTempDir = new File(userDataDir + "/downloads-temp"); if (!downloadTempDir.exists()) downloadTempDir.mkdirs(); - jsContext.executeJavaScript( - "browserCtx = await chromium.launchPersistentContext('" + userDataDir.getAbsolutePath().replace("\\", "/") + "', {\n" + - " acceptDownloads: true,\n" + - " headless : " + isHeadless + ",\n" + - " javaScriptEnabled: " + enableJavaScript + ",\n" + - //" downloadsPath: '" + downloadTempDir.getAbsolutePath().replace("\\", "/") + "',\n" + // Active issue at: https://github.com/microsoft/playwright/issues/9279 - " devtools: " + isDevTools + ",\n" + - //" ignoreDefaultArgs: true,\n" + - " args: ['--disable-blink-features=AutomationControlled'],\n" + // '--enable-automation=false' - " extraHTTPHeaders: " + new GsonBuilder().setPrettyPrinting().create().toJson(new ChromeHeaders().getJson()) + ",\n" + - " userAgent: '" + new ChromeHeaders().user_agent + "'\n" + // Just to make sure... - "});\n" + - "browser = browserCtx.browser();\n" + - "page = await browserCtx.newPage();\n", 30, false); + if(existingChromeBrowserUrl != null){ + jsContext.executeJavaScript( + "browser = await chromium.connectOverCDP('"+existingChromeBrowserUrl+"');\n" + + "browserCtx = browser.contexts()[0];\n" + + "page = await browserCtx.newPage();\n", 30, false); + }else{ + jsContext.executeJavaScript( + "browserCtx = await chromium.launchPersistentContext('" + userDataDir.getAbsolutePath().replace("\\", "/") + "', {\n" + + " acceptDownloads: true,\n" + + " headless : " + isHeadless + ",\n" + + " javaScriptEnabled: " + enableJavaScript + ",\n" + + //" downloadsPath: '" + downloadTempDir.getAbsolutePath().replace("\\", "/") + "',\n" + // Active issue at: https://github.com/microsoft/playwright/issues/9279 + " devtools: " + isDevTools + ",\n" + + //" ignoreDefaultArgs: true,\n" + + " args: ['--disable-blink-features=AutomationControlled'],\n" + // '--enable-automation=false' + " extraHTTPHeaders: " + new GsonBuilder().setPrettyPrinting().create().toJson(new ChromeHeaders().getJson()) + ",\n" + + " userAgent: '" + new ChromeHeaders().user_agent + "'\n" + // Just to make sure... + "});\n" + + "browser = browserCtx.browser();\n" + + "page = await browserCtx.newPage();\n", 30, false); + } } catch (Exception e) { throw new RuntimeException(e); diff --git a/src/main/java/com/osiris/headlessbrowser/windows/PuppeteerWindow.java b/src/main/java/com/osiris/headlessbrowser/windows/PuppeteerWindow.java index c0c9820..4ac81c6 100644 --- a/src/main/java/com/osiris/headlessbrowser/windows/PuppeteerWindow.java +++ b/src/main/java/com/osiris/headlessbrowser/windows/PuppeteerWindow.java @@ -5,6 +5,7 @@ import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.osiris.headlessbrowser.HBrowser; +import com.osiris.headlessbrowser.Versions; import com.osiris.headlessbrowser.data.chrome.ChromeHeaders; import com.osiris.headlessbrowser.exceptions.NodeJsCodeException; import com.osiris.headlessbrowser.js.contexts.NodeContext; @@ -69,10 +70,10 @@ public PuppeteerWindow(HBrowser parentBrowser, boolean enableJavaScript, OutputS } else { } - jsContext.npmInstall("puppeteer"); + jsContext.npmInstall("puppeteer@"+ Versions.PUPPETEER); if (makeUndetectable) { - jsContext.npmInstall("puppeteer-extra"); - jsContext.npmInstall("puppeteer-extra-plugin-stealth"); + jsContext.npmInstall("puppeteer-extra@"+Versions.PUPPETEER_EXTRA); + jsContext.npmInstall("puppeteer-extra-plugin-stealth@"+Versions.PUPPETEER_EXTRA_STEALTH_PLUGIN); } jsContext.executeJavaScript( "const puppeteer = require('" + (makeUndetectable ? "puppeteer-extra" : "puppeteer") + "');\n" + diff --git a/src/main/java/com/osiris/headlessbrowser/windows/WindowBuilder.java b/src/main/java/com/osiris/headlessbrowser/windows/WindowBuilder.java index d5334a8..2462143 100644 --- a/src/main/java/com/osiris/headlessbrowser/windows/WindowBuilder.java +++ b/src/main/java/com/osiris/headlessbrowser/windows/WindowBuilder.java @@ -66,6 +66,13 @@ public class WindowBuilder { * The directory will get deleted on {@link HWindow#close()}.
*/ public boolean temporaryUserDataDir = false; + /** + * Instead of creating a new browser this will connect to an existing browser + * via the chrome dev tools protocol, if the url is provided.
+ * CURRENTLY ONLY SUPPORTED BY PLAYWRIGHT!
+ * const browser = await chromium.connectOverCDP('http://localhost:9222'); + */ + public String existingChromeBrowserUrl = null; public WindowBuilder(HBrowser parentBrowser) { this.parentBrowser = parentBrowser; @@ -79,7 +86,8 @@ public PuppeteerWindow buildPuppeteerWindow() { public PlaywrightWindow buildPlaywrightWindow() { return new PlaywrightWindow(this.parentBrowser, this.enableJavaScript, this.debugOutputStream, this.jsTimeout, - this.isHeadless, this.userDataDir, this.isDevTools, this.makeUndetectable, this.temporaryUserDataDir); + this.isHeadless, this.userDataDir, this.isDevTools, this.makeUndetectable, this.temporaryUserDataDir, + this.existingChromeBrowserUrl); } public LightWindow buildLightWindow() { @@ -175,4 +183,12 @@ public WindowBuilder temporaryUserDataDir(boolean val) { return this; } + /** + * For details see {@link #existingChromeBrowserUrl}. + */ + public WindowBuilder existingChromeBrowserUrl(String val){ + this.existingChromeBrowserUrl = val; + return this; + } + } diff --git a/src/test/java/com/osiris/headlessbrowser/js/contexts/NodeContextTest.java b/src/test/java/com/osiris/headlessbrowser/js/contexts/NodeContextTest.java index ce8aa1a..e9b6c8c 100644 --- a/src/test/java/com/osiris/headlessbrowser/js/contexts/NodeContextTest.java +++ b/src/test/java/com/osiris/headlessbrowser/js/contexts/NodeContextTest.java @@ -1,5 +1,6 @@ package com.osiris.headlessbrowser.js.contexts; +import com.osiris.headlessbrowser.Versions; import org.junit.jupiter.api.Test; import java.io.IOException; @@ -11,6 +12,6 @@ class NodeContextTest { @Test void install() throws Exception { NodeContext ctx = new NodeContext(null, System.out, 30); // Installs and starts Node.js if not exists - ctx.install(true); + ctx.install(Versions.NODEJS, true); } } \ No newline at end of file