Skip to content

Commit

Permalink
3.0.2
Browse files Browse the repository at this point in the history
- NEW: support for existingChromeBrowserUrl
- NEW: hardcoded versions for playwright, pupetter, their extra variants, puppeteer-extra-plugin-stealth and nodejs
- NEW: added convenience static class HB
  • Loading branch information
Osiris-Team committed Jun 18, 2024
1 parent 0a820f5 commit 2bfdf89
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 39 deletions.
15 changes: 7 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ Add this to your project with [Maven/Gradle/Sbt/Leinigen](https://jitpack.io/#Os
(Java 8 or higher required).

```java
HBrowser browser = new HBrowser();
try(PlaywrightWindow window = browser.openWindow()){
try(PlaywrightWindow window = HB.newWin()){
window.load("https://example.com");
// ...
}
Expand All @@ -22,8 +21,7 @@ Note that the first run may take a bit because Node.js and its modules get insta
- simulating real user input.
- **Integrated evasions for headless detection:**
```java
HBrowser b = new HBrowser();
try (PlaywrightWindow w = b.openCustomWindow()
try (PlaywrightWindow w = HB.newWinBuilder()
.headless(true).makeUndetectable(true).buildPlaywrightWindow())
{
w.load("https://infosimples.github.io/detect-headless/");
Expand All @@ -42,11 +40,12 @@ Playwright is the default and recommended browser driver to use, since it suppor
and more of its features were ported to Java.
Checkout [JG-Browser](https://github.com/Osiris-Team/JG-Browser) for a browser completely written in Java.

| Name | Version| JS-Engine | Downloads |
| :-----: | :-----: | :-----: | :-----:
| [Playwright](https://github.com/microsoft/playwright)| Latest | Node.js/V8 | Yes | No |
| [Puppeteer](https://github.com/puppeteer/puppeteer) | Latest | Node.js/V8 | No | No |
| Name | JS-Engine | Downloads |
| :-----: |:---------------:| :-----:
| [Playwright](https://github.com/microsoft/playwright)| Node.js/V8 | Yes | No |
| [Puppeteer](https://github.com/puppeteer/puppeteer) | Node.js/V8 | No | No |

You can find their versions in [this class](), which also allows you to set custom versions.
(JS = JavaScript; Downloads = If the browser is able to download files other than html/xml/pdf;)

### Contribute/Build
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.osiris.headlessbrowser</groupId>
<artifactId>Headless-Browser</artifactId>
<version>3.0.1</version>
<version>3.0.2</version>
<repositories>
<repository>
<id>jitpack</id>
Expand Down
33 changes: 33 additions & 0 deletions src/main/java/com/osiris/headlessbrowser/HB.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.osiris.headlessbrowser;

import com.osiris.headlessbrowser.exceptions.NodeJsCodeException;
import com.osiris.headlessbrowser.windows.HWindow;
import com.osiris.headlessbrowser.windows.PlaywrightWindow;
import com.osiris.headlessbrowser.windows.WindowBuilder;

import java.io.IOException;

public class HB {
public static HBrowser globalHBrowserInstace = new HBrowser();

/**
* @see HBrowser#openWindow()
*/
public static PlaywrightWindow newWin() {
return new WindowBuilder(globalHBrowserInstace).buildPlaywrightWindow();
}

/**
* @see HBrowser#openWindowAndLoad(String)
*/
public PlaywrightWindow newWin(String url) throws IOException, NodeJsCodeException {
return globalHBrowserInstace.openWindow().load(url);
}

/**
* @see HBrowser#openCustomWindow()
*/
public WindowBuilder newWinBuilder() {
return new WindowBuilder(globalHBrowserInstace);
}
}
10 changes: 10 additions & 0 deletions src/main/java/com/osiris/headlessbrowser/Versions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.osiris.headlessbrowser;

public class Versions {
public static String NODEJS = "22.3.0";
public static String PLAYWRIGHT = "1.44.1";
public static String PLAYWRIGHT_EXTRA = "4.3.6";
public static String PUPPETEER = "22.11.1";
public static String PUPPETEER_EXTRA = "3.3.6";
public static String PUPPETEER_EXTRA_STEALTH_PLUGIN = "2.11.2";
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import com.osiris.autoplug.core.UtilsFiles;
import com.osiris.betterthread.BThreadManager;
import com.osiris.headlessbrowser.Versions;
import com.osiris.headlessbrowser.exceptions.NodeJsCodeException;
import com.osiris.headlessbrowser.utils.*;
import net.lingala.zip4j.ZipFile;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.Nullable;
import org.jline.utils.OSUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
Expand Down Expand Up @@ -78,7 +80,7 @@ public NodeContext(File parentNodeDir, OutputStream debugOutput, int timeout) {

try {
// Download and install NodeJS into current working directory if no installation found
install(false);
install(Versions.NODEJS, false);

File nodeExeFile, npmExeFile, npxExeFile;
if (TYPE.equals(OS.Type.WINDOWS)) {
Expand Down Expand Up @@ -183,7 +185,7 @@ private void updatePath(ProcessBuilder processBuilder, File exeFile) {
*
* @param force if true force-installs the latest release.
*/
public void install(boolean force) throws Exception {
public void install(@Nullable String version, boolean force) throws Exception {
if (!force) {
// Don't install if already done.
if (installationDir.listFiles() != null && installationDir.listFiles().length != 0) {
Expand All @@ -194,14 +196,20 @@ public void install(boolean force) throws Exception {
close(); // If this node context is still running
if(installationDir.exists()) FileUtils.deleteDirectory(installationDir);
installationDir.mkdirs();
String url = "https://nodejs.org/dist/latest/";
debugOutput.println("Installing latest NodeJS release from '" + url + "'...");

String url;
if(version != null){
url = "https://nodejs.org/dist/v"+version+"/";
} else{
url = "https://nodejs.org/dist/latest/";
}
debugOutput.println("Installing NodeJS release from '" + url + "'...");
debugOutput.println("This devices' details: "+TYPE.name+" "+ ARCH.name()+" ("+ Utils.toString(ARCH.altNames)+")");
Document docLatest = Jsoup.connect(url).get();
Document doc = Jsoup.connect(url).get();

String downloadUrl = null;
for (Element e :
docLatest.getElementsByTag("a")) {
doc.getElementsByTag("a")) {
String attr = e.attr("href");
if (isCorrectFileForOs(attr.replace(url, ""))) {
downloadUrl = url + attr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.osiris.headlessbrowser.HBrowser;
import com.osiris.headlessbrowser.Versions;
import com.osiris.headlessbrowser.data.chrome.ChromeHeaders;
import com.osiris.headlessbrowser.exceptions.NodeJsCodeException;
import com.osiris.headlessbrowser.js.contexts.NodeContext;
Expand Down Expand Up @@ -44,7 +45,7 @@ public class PlaywrightWindow implements HWindow {
* Use the {@link WindowBuilder} instead. The {@link HBrowser} has a shortcut method for creating custom windows: {@link HBrowser#openCustomWindow()}.
*/
public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, OutputStream debugOutput, int jsTimeout,
boolean isHeadless, File userDataDir, boolean isDevTools, boolean makeUndetectable, boolean temporaryUserDataDir) {
boolean isHeadless, File userDataDir, boolean isDevTools, boolean makeUndetectable, boolean temporaryUserDataDir, String existingChromeBrowserUrl) {
this.parentBrowser = parentBrowser;
if (debugOutput == null)
debugOutput = new TrashOutput();
Expand All @@ -55,14 +56,14 @@ public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, Output
this.temporaryUserDataDir = temporaryUserDataDir;
this.jsContext = new NodeContext(new File(userDataDir.getParentFile() + "/node-js"), debugOutput, jsTimeout);
try {
jsContext.npmInstall("playwright");
jsContext.executeNpxWithArgs("playwright", "install");
jsContext.npmInstall("playwright@"+Versions.PLAYWRIGHT);
jsContext.executeNpxWithArgs("playwright@"+Versions.PLAYWRIGHT, "install");
// TODO this installs all browsers (firefox and webkit), but we only need chrome

// Define global variables/constants
if (makeUndetectable) {
jsContext.npmInstall("playwright-extra");
jsContext.npmInstall("puppeteer-extra-plugin-stealth");
jsContext.npmInstall("playwright-extra@"+ Versions.PLAYWRIGHT_EXTRA);
jsContext.npmInstall("puppeteer-extra-plugin-stealth@"+ Versions.PUPPETEER_EXTRA_STEALTH_PLUGIN);
}
jsContext.executeJavaScript(
"const { chromium } = require('" + (makeUndetectable ? "playwright-extra" : "playwright") + "');\n" +
Expand All @@ -88,20 +89,27 @@ public PlaywrightWindow(HBrowser parentBrowser, boolean enableJavaScript, Output
downloadTempDir = new File(userDataDir + "/downloads-temp");
if (!downloadTempDir.exists()) downloadTempDir.mkdirs();

jsContext.executeJavaScript(
"browserCtx = await chromium.launchPersistentContext('" + userDataDir.getAbsolutePath().replace("\\", "/") + "', {\n" +
" acceptDownloads: true,\n" +
" headless : " + isHeadless + ",\n" +
" javaScriptEnabled: " + enableJavaScript + ",\n" +
//" downloadsPath: '" + downloadTempDir.getAbsolutePath().replace("\\", "/") + "',\n" + // Active issue at: https://github.com/microsoft/playwright/issues/9279
" devtools: " + isDevTools + ",\n" +
//" ignoreDefaultArgs: true,\n" +
" args: ['--disable-blink-features=AutomationControlled'],\n" + // '--enable-automation=false'
" extraHTTPHeaders: " + new GsonBuilder().setPrettyPrinting().create().toJson(new ChromeHeaders().getJson()) + ",\n" +
" userAgent: '" + new ChromeHeaders().user_agent + "'\n" + // Just to make sure...
"});\n" +
"browser = browserCtx.browser();\n" +
"page = await browserCtx.newPage();\n", 30, false);
if(existingChromeBrowserUrl != null){
jsContext.executeJavaScript(
"browser = await chromium.connectOverCDP('"+existingChromeBrowserUrl+"');\n" +
"browserCtx = browser.contexts()[0];\n" +
"page = await browserCtx.newPage();\n", 30, false);
}else{
jsContext.executeJavaScript(
"browserCtx = await chromium.launchPersistentContext('" + userDataDir.getAbsolutePath().replace("\\", "/") + "', {\n" +
" acceptDownloads: true,\n" +
" headless : " + isHeadless + ",\n" +
" javaScriptEnabled: " + enableJavaScript + ",\n" +
//" downloadsPath: '" + downloadTempDir.getAbsolutePath().replace("\\", "/") + "',\n" + // Active issue at: https://github.com/microsoft/playwright/issues/9279
" devtools: " + isDevTools + ",\n" +
//" ignoreDefaultArgs: true,\n" +
" args: ['--disable-blink-features=AutomationControlled'],\n" + // '--enable-automation=false'
" extraHTTPHeaders: " + new GsonBuilder().setPrettyPrinting().create().toJson(new ChromeHeaders().getJson()) + ",\n" +
" userAgent: '" + new ChromeHeaders().user_agent + "'\n" + // Just to make sure...
"});\n" +
"browser = browserCtx.browser();\n" +
"page = await browserCtx.newPage();\n", 30, false);
}

} catch (Exception e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.osiris.headlessbrowser.HBrowser;
import com.osiris.headlessbrowser.Versions;
import com.osiris.headlessbrowser.data.chrome.ChromeHeaders;
import com.osiris.headlessbrowser.exceptions.NodeJsCodeException;
import com.osiris.headlessbrowser.js.contexts.NodeContext;
Expand Down Expand Up @@ -69,10 +70,10 @@ public PuppeteerWindow(HBrowser parentBrowser, boolean enableJavaScript, OutputS
} else {

}
jsContext.npmInstall("puppeteer");
jsContext.npmInstall("puppeteer@"+ Versions.PUPPETEER);
if (makeUndetectable) {
jsContext.npmInstall("puppeteer-extra");
jsContext.npmInstall("puppeteer-extra-plugin-stealth");
jsContext.npmInstall("puppeteer-extra@"+Versions.PUPPETEER_EXTRA);
jsContext.npmInstall("puppeteer-extra-plugin-stealth@"+Versions.PUPPETEER_EXTRA_STEALTH_PLUGIN);
}
jsContext.executeJavaScript(
"const puppeteer = require('" + (makeUndetectable ? "puppeteer-extra" : "puppeteer") + "');\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ public class WindowBuilder {
* The directory will get deleted on {@link HWindow#close()}. <br>
*/
public boolean temporaryUserDataDir = false;
/**
* Instead of creating a new browser this will connect to an existing browser
* via the chrome dev tools protocol, if the url is provided. <br>
* CURRENTLY ONLY SUPPORTED BY PLAYWRIGHT! <br>
* const browser = await chromium.connectOverCDP('http://localhost:9222');
*/
public String existingChromeBrowserUrl = null;

public WindowBuilder(HBrowser parentBrowser) {
this.parentBrowser = parentBrowser;
Expand All @@ -79,7 +86,8 @@ public PuppeteerWindow buildPuppeteerWindow() {

public PlaywrightWindow buildPlaywrightWindow() {
return new PlaywrightWindow(this.parentBrowser, this.enableJavaScript, this.debugOutputStream, this.jsTimeout,
this.isHeadless, this.userDataDir, this.isDevTools, this.makeUndetectable, this.temporaryUserDataDir);
this.isHeadless, this.userDataDir, this.isDevTools, this.makeUndetectable, this.temporaryUserDataDir,
this.existingChromeBrowserUrl);
}

public LightWindow buildLightWindow() {
Expand Down Expand Up @@ -175,4 +183,12 @@ public WindowBuilder temporaryUserDataDir(boolean val) {
return this;
}

/**
* For details see {@link #existingChromeBrowserUrl}.
*/
public WindowBuilder existingChromeBrowserUrl(String val){
this.existingChromeBrowserUrl = val;
return this;
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.osiris.headlessbrowser.js.contexts;

import com.osiris.headlessbrowser.Versions;
import org.junit.jupiter.api.Test;

import java.io.IOException;
Expand All @@ -11,6 +12,6 @@ class NodeContextTest {
@Test
void install() throws Exception {
NodeContext ctx = new NodeContext(null, System.out, 30); // Installs and starts Node.js if not exists
ctx.install(true);
ctx.install(Versions.NODEJS, true);
}
}

0 comments on commit 2bfdf89

Please sign in to comment.