Skip to content

Commit

Permalink
Add CSV export feature (#217)
Browse files Browse the repository at this point in the history
* POC: Add CSV export

* Export CSV refactor

- Wait for file to be downloaded
- Delete tmp file and folder after

* fix for gRPC mode

* remove unnecessary dep

* return CSV file name

* fix wait function

* fix for Linux

* fix download timeout

* delete tmp folder if a filePath is provided

* undo dev change

* separate render and renderCSV features + fix reusable and clustered mode for csv

* fix RenderCSVRequest proto

* fixes after merge

* add await

* fix function name
  • Loading branch information
AgnesToulet authored May 19, 2021
1 parent 38a111c commit 452528b
Show file tree
Hide file tree
Showing 9 changed files with 378 additions and 43 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"@grpc/grpc-js": "^1.0",
"@grpc/proto-loader": "^0.5.4",
"@hapi/boom": "^9.1.0",
"chokidar": "^3.5.1",
"eslint": "^7.13.0",
"express": "^4.16.3",
"express-prom-bundle": "^5.1.5",
Expand Down
16 changes: 16 additions & 0 deletions proto/rendererv2.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ message RenderResponse {
string error = 1;
}

message RenderCSVRequest {
string url = 1;
string filePath = 2;
string renderKey = 3;
string domain = 4;
int32 timeout = 5;
string timezone = 6;
map<string, StringList> headers = 7;
}

message RenderCSVResponse {
string error = 1;
string fileName = 2;
}

service Renderer {
rpc Render(RenderRequest) returns (RenderResponse);
rpc RenderCSV(RenderCSVRequest) returns (RenderCSVResponse);
}
154 changes: 127 additions & 27 deletions src/browser/browser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import * as os from 'os';
import * as uniqueFilename from 'unique-filename';
import * as puppeteer from 'puppeteer';
import * as chokidar from 'chokidar';
import * as path from 'path';
import * as fs from 'fs';
import { Logger } from '../logger';
import { RenderingConfig } from '../config';

Expand All @@ -23,10 +26,26 @@ export interface RenderOptions {
headers?: HTTPHeaders;
}

export interface RenderCSVOptions {
url: string;
filePath: string;
timeout: string | number;
renderKey: string;
domain: string;
timezone?: string;
encoding?: string;
headers?: HTTPHeaders;
}

export interface RenderResponse {
filePath: string;
}

export interface RenderCSVResponse {
filePath: string;
fileName?: string;
}

export class Browser {
constructor(protected config: RenderingConfig, protected log: Logger) {
this.log.debug('Browser initialized', 'config', this.config);
Expand All @@ -48,15 +67,31 @@ export class Browser {

async start(): Promise<void> {}

validateOptions(options: RenderOptions) {
validateRenderOptions(options: RenderOptions | RenderCSVOptions) {
if (options.url.startsWith(`socket://`)) {
// Puppeteer doesn't support socket:// URLs
throw new Error(`Image rendering in socket mode is not supported`);
}

options.headers = options.headers || {};
const headers = {};

if (options.headers['Accept-Language']) {
headers['Accept-Language'] = options.headers['Accept-Language'];
} else if (this.config.acceptLanguage) {
headers['Accept-Language'] = this.config.acceptLanguage;
}

options.headers = headers;

options.timeout = parseInt(options.timeout as string, 10) || 30;
}

validateImageOptions(options: RenderOptions) {
this.validateRenderOptions(options);

options.width = parseInt(options.width as string, 10) || this.config.width;
options.height = parseInt(options.height as string, 10) || this.config.height;
options.timeout = parseInt(options.timeout as string, 10) || 30;

if (options.width < 10) {
options.width = this.config.width;
Expand All @@ -79,17 +114,6 @@ export class Browser {
if (options.deviceScaleFactor > this.config.maxDeviceScaleFactor) {
options.deviceScaleFactor = this.config.deviceScaleFactor;
}

options.headers = options.headers || {};
const headers = {};

if (options.headers['Accept-Language']) {
headers['Accept-Language'] = options.headers['Accept-Language'];
} else if (this.config.acceptLanguage) {
headers['Accept-Language'] = this.config.acceptLanguage;
}

options.headers = headers;
}

getLauncherOptions(options) {
Expand All @@ -111,12 +135,28 @@ export class Browser {
return launcherOptions;
}

async preparePage(page: any, options: any) {
if (this.config.verboseLogging) {
this.log.debug('Setting cookie for page', 'renderKey', options.renderKey, 'domain', options.domain);
}
await page.setCookie({
name: 'renderKey',
value: options.renderKey,
domain: options.domain,
});

if (options.headers && Object.keys(options.headers).length > 0) {
this.log.debug(`Setting extra HTTP headers for page`, 'headers', options.headers);
await page.setExtraHTTPHeaders(options.headers);
}
}

async render(options: RenderOptions): Promise<RenderResponse> {
let browser;
let page: any;

try {
this.validateOptions(options);
this.validateImageOptions(options);
const launcherOptions = this.getLauncherOptions(options);
browser = await puppeteer.launch(launcherOptions);
page = await browser.newPage();
Expand Down Expand Up @@ -152,19 +192,7 @@ export class Browser {
deviceScaleFactor: options.deviceScaleFactor,
});

if (this.config.verboseLogging) {
this.log.debug('Setting cookie for page', 'renderKey', options.renderKey, 'domain', options.domain);
}
await page.setCookie({
name: 'renderKey',
value: options.renderKey,
domain: options.domain,
});

if (options.headers && Object.keys(options.headers).length > 0) {
this.log.debug(`Setting extra HTTP headers for page`, 'headers', options.headers);
await page.setExtraHTTPHeaders(options.headers);
}
await this.preparePage(page, options);

if (this.config.verboseLogging) {
this.log.debug('Moving mouse on page', 'x', options.width, 'y', options.height);
Expand Down Expand Up @@ -202,6 +230,78 @@ export class Browser {
return { filePath: options.filePath };
}

async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
let browser;
let page: any;

try {
this.validateRenderOptions(options);
const launcherOptions = this.getLauncherOptions(options);
browser = await puppeteer.launch(launcherOptions);
page = await browser.newPage();
this.addPageListeners(page);

return await this.exportCSV(page, options);
} finally {
if (page) {
this.removePageListeners(page);
await page.close();
}
if (browser) {
await browser.close();
}
}
}

async exportCSV(page: any, options: any): Promise<RenderCSVResponse> {
await this.preparePage(page, options);

const downloadPath = uniqueFilename(os.tmpdir());
fs.mkdirSync(downloadPath);
const watcher = chokidar.watch(downloadPath);
let downloadFilePath = '';
watcher.on('add', file => {
if (!file.endsWith('.crdownload')) {
downloadFilePath = file;
}
});

await page._client.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: downloadPath });

if (this.config.verboseLogging) {
this.log.debug('Navigating and waiting for all network requests to finish', 'url', options.url);
}

await page.goto(options.url, { waitUntil: 'networkidle0', timeout: options.timeout * 1000 });

if (this.config.verboseLogging) {
this.log.debug('Waiting for download to end');
}

const startDate = Date.now();
while (Date.now() - startDate <= options.timeout * 1000) {
if (downloadFilePath !== '') {
break;
}
await new Promise(resolve => setTimeout(resolve, 500));
}

if (downloadFilePath === '') {
throw new Error(`Timeout exceeded while waiting for download to end`);
}

await watcher.close();

let filePath = downloadFilePath;
if (options.filePath) {
fs.renameSync(downloadFilePath, options.filePath);
filePath = options.filePath;
fs.rmdirSync(path.dirname(downloadFilePath));
}

return { filePath, fileName: path.basename(downloadFilePath) };
}

addPageListeners(page: any) {
page.on('error', this.logError);
page.on('pageerror', this.logPageError);
Expand Down
37 changes: 30 additions & 7 deletions src/browser/clustered.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
import { Cluster } from 'puppeteer-cluster';
import { Browser, RenderResponse, RenderOptions } from './browser';
import { Browser, RenderResponse, RenderOptions, RenderCSVOptions, RenderCSVResponse } from './browser';
import { Logger } from '../logger';
import { RenderingConfig, ClusteringConfig } from '../config';

enum RenderType {
CSV = 'csv',
PNG = 'png',
}

interface ClusterOptions {
options: RenderOptions | RenderCSVOptions;
renderType: RenderType;
}

type ClusterResponse = RenderResponse | RenderCSVResponse;

export class ClusteredBrowser extends Browser {
cluster: Cluster<any, RenderResponse>;
cluster: Cluster<ClusterOptions, ClusterResponse>;
clusteringConfig: ClusteringConfig;
concurrency: number;

Expand All @@ -27,22 +39,33 @@ export class ClusteredBrowser extends Browser {
puppeteerOptions: launcherOptions,
});
await this.cluster.task(async ({ page, data }) => {
if (data.timezone) {
if (data.options.timezone) {
// set timezone
await page.emulateTimezone(data.timezone);
await page.emulateTimezone(data.options.timezone);
}

try {
this.addPageListeners(page);
return await this.takeScreenshot(page, data);
switch (data.renderType) {
case RenderType.CSV:
return await this.exportCSV(page, data.options);
case RenderType.PNG:
default:
return await this.takeScreenshot(page, data.options);
}
} finally {
this.removePageListeners(page);
}
});
}

async render(options: RenderOptions): Promise<RenderResponse> {
this.validateOptions(options);
return await this.cluster.execute(options);
this.validateImageOptions(options);
return this.cluster.execute({ options, renderType: RenderType.PNG });
}

async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
this.validateRenderOptions(options);
return this.cluster.execute({ options, renderType: RenderType.CSV });
}
}
32 changes: 30 additions & 2 deletions src/browser/reusable.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import * as puppeteer from 'puppeteer';
import { Browser, RenderResponse, RenderOptions } from './browser';
import { Browser, RenderResponse, RenderOptions, RenderCSVResponse, RenderCSVOptions } from './browser';
import { Logger } from '../logger';
import { RenderingConfig } from '../config';

Expand All @@ -20,7 +20,7 @@ export class ReusableBrowser extends Browser {
let page: puppeteer.Page | undefined;

try {
this.validateOptions(options);
this.validateImageOptions(options);
context = await this.browser.createIncognitoBrowserContext();
page = await context.newPage();

Expand All @@ -42,4 +42,32 @@ export class ReusableBrowser extends Browser {
}
}
}

async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
let context: puppeteer.BrowserContext | undefined;
let page: puppeteer.Page | undefined;

try {
this.validateRenderOptions(options);
context = await this.browser.createIncognitoBrowserContext();
page = await context.newPage();

if (options.timezone) {
// set timezone
await page.emulateTimezone(options.timezone);
}

this.addPageListeners(page);

return await this.exportCSV(page, options);
} finally {
if (page) {
this.removePageListeners(page);
await page.close();
}
if (context) {
await context.close();
}
}
}
}
Loading

0 comments on commit 452528b

Please sign in to comment.