Skip to content

Commit

Permalink
Merge pull request #1898 from openzim/1887-e2e-modularized
Browse files Browse the repository at this point in the history
Modularizing e2e tests
  • Loading branch information
kelson42 committed Oct 5, 2023
2 parents fc79938 + 3c5a0d0 commit af4f200
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 64 deletions.
2 changes: 1 addition & 1 deletion src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ async function execute(argv: any) {
RedisStore.setOptions(argv.redis || config.defaults.redisPath)
await RedisStore.connect()
const { articleDetailXId, filesToDownloadXPath, filesToRetryXPath, redirectsXId } = RedisStore

await downloader.setBaseUrls(forceRender)
// Output directory
const outputDirectory = path.isAbsolute(_outputDirectory || '') ? _outputDirectory : path.join(process.cwd(), _outputDirectory || 'out')
await mkdirPromise(outputDirectory)
Expand Down
4 changes: 2 additions & 2 deletions src/sanitize-argument.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { isValidEmail } from './util/index.js'
import * as path from 'path'
import { fileURLToPath } from 'url'
import { parameterDescriptions } from './parameterList.js'
import { RENDERERS_LIST } from './util/const.js'

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
Expand Down Expand Up @@ -192,11 +193,10 @@ export function sanitize_customFlavour(customFlavour: string): string {
}

export function sanitize_forceRender(renderName: string): string {
const renderNames = ['VisualEditor', 'WikimediaDesktop', 'WikimediaMobile']
const checkRenderName = (arr: string[], val: string) => {
return arr.some((arrVal) => val === arrVal)
}
if (checkRenderName(renderNames, renderName)) {
if (checkRenderName(RENDERERS_LIST, renderName)) {
return renderName
}
throw new Error(`Invalid render name: ${renderName}`)
Expand Down
1 change: 1 addition & 0 deletions src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor']
45 changes: 45 additions & 0 deletions test/e2e/en.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { testAllRenders } from '../testAllRenders.js'
import domino from 'domino'
import { zimdump } from '../util.js'
import 'dotenv/config.js'
import { jest } from '@jest/globals'
import rimraf from 'rimraf'

jest.setTimeout(60000)

// Check the integrity of img elements between zim file and article html taken from it
const verifyImgElements = (imgFilesArr, imgElements) => {
for (const img of imgElements) {
for (const imgFile of imgFilesArr) {
if (img.getAttribute('src').includes(imgFile)) {
return true
}
}
}
return false
}

const mwUrl = 'https://en.wikipedia.org'
const articleList = 'User:Kelson/MWoffliner_CI_reference'
const format = ''

await testAllRenders(mwUrl, articleList, format, async (outFiles) => {
const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`)
describe('e2e test for en.wikipedia.org', () => {
const articleDoc = domino.createDocument(articleFromDump)
test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => {
expect(articleDoc.querySelector('h1.article-header')).toBeTruthy()
})
test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => {
const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`)
const mediaFilesArr = mediaFiles.split('\n')
const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg'))
const imgElements = Array.from(articleDoc.querySelectorAll('img'))
expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true)
})

afterAll(() => {
rimraf.sync(`./${outFiles[0].testId}`)
})
})
})
59 changes: 59 additions & 0 deletions test/testAllRenders.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import * as logger from '../src/Logger.js'
import * as mwoffliner from '../src/mwoffliner.lib.js'
import { execa } from 'execa'
import { RENDERERS_LIST } from '../src/util/const.js'
import { zimcheckAvailable, zimdumpAvailable } from './util.js'

/*
This is the template for e2e tests of different wikis
1. Verify zimcheck and zimdump availability and caches result
2. Gets output file and checks its integrity
3. Returns output file per renderer in the callback function
*/

let zimToolsChecked = false
async function checkZimTools() {
if (zimToolsChecked) {
return
}

const zimcheckIsAvailable = await zimcheckAvailable()
const zimdumpIsAvailable = await zimdumpAvailable()

if (!zimcheckIsAvailable || !zimdumpIsAvailable) {
const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump'
logger.error(`${missingTool} not installed, exiting test`)
process.exit(1)
}

zimToolsChecked = true
}

async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string | string[]): Promise<any> {
const parameters = {
mwUrl,
adminEmail: '[email protected]',
outputDirectory: testId,
redis: process.env.REDIS,
articleList,
forceRender: renderName,
format,
}

await execa('redis-cli flushall', { shell: true })
const outFiles = await mwoffliner.execute(parameters)

return outFiles
}

export async function testAllRenders(mwUrl: string, articleList: string, format: string | string[], callback) {
await checkZimTools()
for (const renderer of RENDERERS_LIST) {
const now = new Date()
const testId = `mwo-test-${+now}`
const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format)
outFiles[0].testId = testId
outFiles[0].renderer = renderer
await callback(outFiles)
}
}
103 changes: 42 additions & 61 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { jest } from '@jest/globals'
import { getArticleUrl } from '../../src/util/saveArticles.js'
import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js'
import { RENDERERS_LIST } from '../../src/util/const.js'

jest.setTimeout(40000)

Expand Down Expand Up @@ -79,69 +80,49 @@ describe('saveArticles', () => {
expect(articleDoc.querySelector('h1.article-header')).toBeTruthy()
})

test('Check nodet article for en.wikipedia.org using Visual Editor renderer', async () => {
const visualEditorRenderer = new VisualEditorRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('VisualEditor')
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
visualEditorRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})

test('Check nodet article for en.wikipedia.org using Wikimedia Desktop renderer', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('WikimediaDesktop')
const articleId = 'London'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
wikimediaDesktopRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)
for (const renderer of RENDERERS_LIST) {
test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => {
let rendererInstance
switch (renderer) {
case 'VisualEditor':
rendererInstance = new VisualEditorRenderer()
break
case 'WikimediaDesktop':
rendererInstance = new WikimediaDesktopRenderer()
break
default:
throw new Error(`Unknown renderer: ${renderer}`)
}
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls(renderer)
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
rendererInstance,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)
const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})
const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})
}

test('Load main page and check that it is without header', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
Expand Down

0 comments on commit af4f200

Please sign in to comment.