Skip to content

Commit

Permalink
Adding dataset images
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Oct 30, 2019
1 parent 0369446 commit a959082
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ LICENSE
README.md
tox.ini
*.secret.env
./data/
node_modules/
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
FROM frictionlessdata/datapackage-pipelines:2.1.8

RUN apk --update --no-cache add bash wget
RUN apk --update --no-cache add bash wget nodejs npm
RUN npm install -g npm@latest
RUN cd /pipelines/ && npm install puppeteer

COPY docker-dpp-run.sh /dpp/docker/run.sh

Expand Down
29 changes: 29 additions & 0 deletions datapackage_pipelines_migdar/flows/dataset_assets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import subprocess

import dataflows as DF

SCREENSHOT = os.path.join(os.path.dirname(__file__), 'node', 'screenshot.js')


def do_screenshot():
def func(row):
doc_id = row['doc_id']
url = f'https://yodaat.org/card/{doc_id}'
outpath = os.path.join('data', os.path.dirname(doc_id))
os.makedirs(outpath, exist_ok=True)
outpath = os.path.join('data', doc_id + '.png')
print('running', doc_id)
subprocess.call(['node', SCREENSHOT, url, outpath, '.card'])
return func


def flow(*_, path='data/datasets_in_es'):
return DF.Flow(
DF.load('{}/datapackage.json'.format(path)),
do_screenshot()
)


if __name__ == '__main__':
flow(path='https://api.yodaat.org/data/datasets_in_es').process()
1 change: 1 addition & 0 deletions datapackage_pipelines_migdar/flows/node/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node_modules/
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
315 changes: 315 additions & 0 deletions datapackage_pipelines_migdar/flows/node/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions datapackage_pipelines_migdar/flows/node/screenshot.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const puppeteer = require('puppeteer');

(async () => {
const [url, filename, selector] = process.argv.slice(2);

const browser = await puppeteer.launch();
const page = await browser.newPage();
page.setViewport({width: 1300, height: 1200});
await page.goto(url);
await page.waitForSelector(selector);

const rect = await page.evaluate(selector => {
const element = document.querySelector(selector);
const {x, y, width, height} = element.getBoundingClientRect();
return {x, y, width, height};
}, selector);
await page.screenshot({path: filename, clip: rect});
await browser.close();
})();
Loading

0 comments on commit a959082

Please sign in to comment.