From 3f29399c1103d17a86767674185ce07889361733 Mon Sep 17 00:00:00 2001 From: Martial Maillot Date: Wed, 2 Oct 2024 16:18:51 +0200 Subject: [PATCH 1/5] test: fiches MT en DSFR (ne pas merger) --- targets/ingester/package.json | 2 + targets/ingester/src/cli.ts | 63 ++++++++++++++++++- .../src/transform/fiche-travail-emploi.ts | 1 - yarn.lock | 55 +++++++++++++++- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/targets/ingester/package.json b/targets/ingester/package.json index bfabbb7bd..45cd043b6 100644 --- a/targets/ingester/package.json +++ b/targets/ingester/package.json @@ -19,6 +19,7 @@ "unist-util-flat-filter": "1.0.0", "unist-util-parents": "1.0.3", "unist-util-select": "4.0.0", + "unzipper": "^0.12.3", "winston": "3.3.3", "yargs": "17.1.1" }, @@ -36,6 +37,7 @@ "@types/semver": "^7.3.8", "@types/tar-fs": "^2.0.1", "@types/unist": "^2.0.6", + "@types/unzipper": "^0.10.10", "@vercel/ncc": "0.34.0", "jest": "^27.1.1", "lint-staged": "^12.0.0", diff --git a/targets/ingester/src/cli.ts b/targets/ingester/src/cli.ts index be2db53d9..c2bf6d8d7 100644 --- a/targets/ingester/src/cli.ts +++ b/targets/ingester/src/cli.ts @@ -6,6 +6,9 @@ import path from "path"; import * as semver from "semver"; import * as tar from "tar-fs"; import yargs from "yargs"; +import * as unzipper from "unzipper"; +import { promises as fs } from "fs"; +import { createWriteStream } from "fs"; import type { CdtnDocument } from "."; import { updateKaliArticles, updateLegiArticles } from "./articles"; @@ -58,7 +61,12 @@ async function download(pkgName: string, url: string) { tar.extract(getPkgPath(pkgName), { map: function mapHeader(header) { // npm tarball have a root directory called /package so we remove it when extracting - header.name = header.name.replace("package/", ""); + header.name = header.name.replace( + pkgName === "@socialgouv/fiches-travail-data" + ? "fiches-travail-data-test-mt-dsfr/" + : "package/", + "" + ); return header; }, }) @@ -68,6 +76,44 @@ async function download(pkgName: string, url: string) { }); } +async function downloadZip(pkgName: string, url: string) { + return new Promise((resolve, reject) => { + getUri(url, async function (err, rs) { + if (err || !rs) { + reject(`Error while downloading package ${pkgName} - ${url}`); + return; + } + + const outputDir = getPkgPath(pkgName); + + await fs.mkdir(outputDir, { recursive: true }); + + rs.pipe(unzipper.Parse()) + .on("entry", function (entry) { + const filePath = entry.path.replace( + "fiches-travail-data-test-mt-dsfr/", + "" + ); + + const outputPath = path.join(outputDir, filePath); + + if (entry.type === "Directory") { + fs.mkdir(outputPath, { recursive: true }).catch(reject); + entry.autodrain(); + } else { + entry.pipe(createWriteStream(outputPath)).on("error", reject); + } + }) + .on("finish", resolve) + .on("error", (error) => { + reject( + `Error while extracting package ${pkgName} - ${error.message}` + ); + }); + }); + }); +} + const dataPackages = [ { getDocuments: getCdtDocuments, pkgName: "@socialgouv/legi-data" }, { @@ -87,6 +133,12 @@ const dataPackages = [ ]; async function getPackageInfo(pkgName: string) { + if (pkgName === "@socialgouv/fiches-travail-data") { + return { + url: "https://github.com/SocialGouv/fiches-travail-data/archive/refs/heads/test-mt-dsfr.zip", + version: "v4.700.1", + }; + } const pkgInfo: PackageInfo = await got( `http://registry.npmjs.org/${pkgName}/latest` ).json(); @@ -108,7 +160,11 @@ async function main() { >(); for (const { pkgName, disableSlugUpdate, getDocuments } of dataPackages) { const pkgInfo = await getPackageInfo(pkgName); - await download(pkgName, pkgInfo.url); + if (pkgName === "@socialgouv/fiches-travail-data") { + await downloadZip(pkgName, pkgInfo.url); + } else { + await download(pkgName, pkgInfo.url); + } const ingestedVersion = await getLastIngestedVersion(pkgName); if ( @@ -132,7 +188,9 @@ async function main() { pkgName, { version, getDocuments, disableSlugUpdate }, ] of packagesToUpdate) { + console.log(`Start package: ${pkgName}`); if (!getDocuments) { + console.log(`Get document not defined, skip it !`); continue; } console.time(`update ${pkgName}`); @@ -185,6 +243,7 @@ main() console.log(`Finish ingest ${data.length} documents`); }) .catch((err) => { + console.info("Failed :("); console.error(err); process.exit(1); }); diff --git a/targets/ingester/src/transform/fiche-travail-emploi.ts b/targets/ingester/src/transform/fiche-travail-emploi.ts index 5009624d9..dca5060ef 100644 --- a/targets/ingester/src/transform/fiche-travail-emploi.ts +++ b/targets/ingester/src/transform/fiche-travail-emploi.ts @@ -82,7 +82,6 @@ const fetchSections = async ( `No html found for this section : ${JSON.stringify(section)}` ); } - return { ...section, htmlWithGlossary, diff --git a/yarn.lock b/yarn.lock index 99e5488c7..f9f9d65e6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7148,6 +7148,15 @@ __metadata: languageName: node linkType: hard +"@types/unzipper@npm:^0.10.10": + version: 0.10.10 + resolution: "@types/unzipper@npm:0.10.10" + dependencies: + "@types/node": "*" + checksum: 4ba5f6c4c5a892f5f5ce7724a4c3f2ea772a29043e296a28b725162ffff8fb25d2d0995c5536705e13bfbde7765d085f0da5408b25946457d050ac4b75aaefee + languageName: node + linkType: hard + "@types/uuid@npm:^9.0.5": version: 9.0.7 resolution: "@types/uuid@npm:9.0.7" @@ -8421,6 +8430,13 @@ __metadata: languageName: node linkType: hard +"bluebird@npm:~3.7.2": + version: 3.7.2 + resolution: "bluebird@npm:3.7.2" + checksum: 869417503c722e7dc54ca46715f70e15f4d9c602a423a02c825570862d12935be59ed9c7ba34a9b31f186c017c23cac6b54e35446f8353059c101da73eac22ef + languageName: node + linkType: hard + "body-parser@npm:1.20.1": version: 1.20.1 resolution: "body-parser@npm:1.20.1" @@ -10726,6 +10742,15 @@ __metadata: languageName: node linkType: hard +"duplexer2@npm:~0.1.4": + version: 0.1.4 + resolution: "duplexer2@npm:0.1.4" + dependencies: + readable-stream: ^2.0.2 + checksum: 744961f03c7f54313f90555ac20284a3fb7bf22fdff6538f041a86c22499560eb6eac9d30ab5768054137cb40e6b18b40f621094e0261d7d8c35a37b7a5ad241 + languageName: node + linkType: hard + "duplexer3@npm:^0.1.4": version: 0.1.5 resolution: "duplexer3@npm:0.1.5" @@ -12265,6 +12290,17 @@ __metadata: languageName: node linkType: hard +"fs-extra@npm:^11.2.0": + version: 11.2.0 + resolution: "fs-extra@npm:11.2.0" + dependencies: + graceful-fs: ^4.2.0 + jsonfile: ^6.0.1 + universalify: ^2.0.0 + checksum: b12e42fa40ba47104202f57b8480dd098aa931c2724565e5e70779ab87605665594e76ee5fb00545f772ab9ace167fe06d2ab009c416dc8c842c5ae6df7aa7e8 + languageName: node + linkType: hard + "fs-extra@npm:^8.1.0": version: 8.1.0 resolution: "fs-extra@npm:8.1.0" @@ -12861,7 +12897,7 @@ __metadata: languageName: node linkType: hard -"graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.15, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.11, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9": +"graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.15, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.11, graceful-fs@npm:^4.2.2, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9": version: 4.2.11 resolution: "graceful-fs@npm:4.2.11" checksum: ac85f94da92d8eb6b7f5a8b20ce65e43d66761c55ce85ac96df6865308390da45a8d3f0296dd3a663de65d30ba497bd46c696cc1e248c72b13d6d567138a4fc7 @@ -13520,6 +13556,7 @@ __metadata: "@types/semver": ^7.3.8 "@types/tar-fs": ^2.0.1 "@types/unist": ^2.0.6 + "@types/unzipper": ^0.10.10 "@vercel/ncc": 0.34.0 get-uri: 3.0.2 got: 11.8.5 @@ -13539,6 +13576,7 @@ __metadata: unist-util-flat-filter: 1.0.0 unist-util-parents: 1.0.3 unist-util-select: 4.0.0 + unzipper: ^0.12.3 winston: 3.3.3 yargs: 17.1.1 languageName: unknown @@ -20058,7 +20096,7 @@ __metadata: languageName: node linkType: hard -"readable-stream@npm:^2.0.0, readable-stream@npm:^2.1.4, readable-stream@npm:~2.3.6": +"readable-stream@npm:^2.0.0, readable-stream@npm:^2.0.2, readable-stream@npm:^2.1.4, readable-stream@npm:~2.3.6": version: 2.3.8 resolution: "readable-stream@npm:2.3.8" dependencies: @@ -22815,6 +22853,19 @@ __metadata: languageName: node linkType: hard +"unzipper@npm:^0.12.3": + version: 0.12.3 + resolution: "unzipper@npm:0.12.3" + dependencies: + bluebird: ~3.7.2 + duplexer2: ~0.1.4 + fs-extra: ^11.2.0 + graceful-fs: ^4.2.2 + node-int64: ^0.4.0 + checksum: 2e3296d1fad307b02b3d0f3e9c4ac1bdd56047e66fe5108a9e580b417f4ac9b07c31e9ded3e006e01edaaba3e20b13c638bd3c893600f75c589b6e0f778d9ffd + languageName: node + linkType: hard + "upath@npm:^2.0.1": version: 2.0.1 resolution: "upath@npm:2.0.1" From 224b00eee627f26e30765f0c4c81aaaccdfe30e2 Mon Sep 17 00:00:00 2001 From: Martial Maillot Date: Thu, 3 Oct 2024 13:46:34 +0200 Subject: [PATCH 2/5] add logs --- targets/export-elasticsearch/src/services/export.ts | 3 ++- targets/export-elasticsearch/src/workers/ingester-preprod.ts | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/targets/export-elasticsearch/src/services/export.ts b/targets/export-elasticsearch/src/services/export.ts index cce1a26dd..482550d37 100644 --- a/targets/export-elasticsearch/src/services/export.ts +++ b/targets/export-elasticsearch/src/services/export.ts @@ -70,7 +70,7 @@ export class ExportService { const exportEsDone = await this.exportRepository.getOne(id); const message = `**${envName}:** mise à jour terminée (${exportEsDone.documentsCount?.total} documents) 🎉`; - logger.info(message); + logger.info(message); await sendMattermostMessage( message, process.env.MATTERMOST_CHANNEL_EXPORT @@ -82,6 +82,7 @@ export class ExportService { new Date() ); } catch (e: any) { + console.log("Error during process: ", e); await sendMattermostMessage( `⚠️ **${envName}:** La mise à jour a échouée. ⚠️`, process.env.MATTERMOST_CHANNEL_EXPORT diff --git a/targets/export-elasticsearch/src/workers/ingester-preprod.ts b/targets/export-elasticsearch/src/workers/ingester-preprod.ts index 995034a46..937a96c33 100644 --- a/targets/export-elasticsearch/src/workers/ingester-preprod.ts +++ b/targets/export-elasticsearch/src/workers/ingester-preprod.ts @@ -18,6 +18,7 @@ const ingester = async (): Promise => { ); resolve("Export elasticsearch completed successfully"); } catch (error: unknown) { + console.error("Failure during ingest", error); reject(error); } }); From a15d56c7d867a117a59f9a3278c27fe313d63cf4 Mon Sep 17 00:00:00 2001 From: Martial Maillot Date: Thu, 3 Oct 2024 14:49:36 +0200 Subject: [PATCH 3/5] add logs --- .kontinuous/env/dev/templates/export.configmap.yaml | 2 ++ targets/ingester/src/transform/fiche-travail-emploi.ts | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.kontinuous/env/dev/templates/export.configmap.yaml b/.kontinuous/env/dev/templates/export.configmap.yaml index f47d5ade8..1271641bf 100644 --- a/.kontinuous/env/dev/templates/export.configmap.yaml +++ b/.kontinuous/env/dev/templates/export.configmap.yaml @@ -19,3 +19,5 @@ data: ELASTICSEARCH_INDEX_PROD: "cdtn-dev-v2" MATTERMOST_CHANNEL_EXPORT: "s-cdtn-administration-veille-dev" DISABLE_LIMIT_EXPORT: "true" + DISABLE_SITEMAP: "true" + DISABLE_AGREEMENTS: "true" diff --git a/targets/ingester/src/transform/fiche-travail-emploi.ts b/targets/ingester/src/transform/fiche-travail-emploi.ts index dca5060ef..ac876e635 100644 --- a/targets/ingester/src/transform/fiche-travail-emploi.ts +++ b/targets/ingester/src/transform/fiche-travail-emploi.ts @@ -53,8 +53,9 @@ const fetchSections = async ( return await pMap( sections, async ({ references, ...section }) => { - let htmlWithGlossary = section.html; + const htmlWithGlossary = section.html; if (section.html && section.html !== "") { + /* const fetchResult: any = await got .post(`${URL_EXPORT}/glossary`, { json: { @@ -77,6 +78,7 @@ const fetchSections = async ( } else { htmlWithGlossary = fetchResult.result; } + */ } else { console.warn( `No html found for this section : ${JSON.stringify(section)}` From 3c6479d7f6c41b4c2c955ac9d835651186d75e3c Mon Sep 17 00:00:00 2001 From: Martial Maillot Date: Tue, 15 Oct 2024 15:34:43 +0200 Subject: [PATCH 4/5] increase ressources --- .kontinuous/env/dev/values.yaml | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/.kontinuous/env/dev/values.yaml b/.kontinuous/env/dev/values.yaml index edba2986c..d9c67d9fb 100644 --- a/.kontinuous/env/dev/values.yaml +++ b/.kontinuous/env/dev/values.yaml @@ -5,7 +5,7 @@ jobs: buildArgs: NEXT_PUBLIC_BASE_PATH: https://www-{{.Values.global.host}} post-restore: - ~needs: [pg, hasura] + ~needs: [ pg, hasura ] use: psql with: sqlFile: .kontinuous/sql/post-restore.sql @@ -13,11 +13,20 @@ jobs: hasura: imagePackage: hasura - ~needs: [build-hasura] + ~needs: [ build-hasura ] ~preDeploy.cleaner: match: kind: Deployment value: true + autoscale: + enabled: true + resources: + limits: + cpu: "1000m" + memory: "1.5Gi" + requests: + cpu: "500m" + memory: "1Gi" envFrom: - secretRef: name: pg-app @@ -27,6 +36,14 @@ hasura: name: hasura export: + replicas: 1 + resources: + limits: + cpu: "2000m" + memory: "4096Mi" + requests: + cpu: "1500m" + memory: "2Gi" env: - name: "BRANCH_NAME_SLUG" value: "{{.Values.global.branchSlug32}}" @@ -39,6 +56,12 @@ pg: kind: Cluster value: true cnpg-cluster: + resources: + requests: + memory: 1Gi + limits: + cpu: "1" + memory: 1Gi backup: # don't enable backup because we would need a new path each time as the cluster is recreated ~tpl~enabled: "false" From 0924562a775bfa413ec1f18cb36897a274b73f09 Mon Sep 17 00:00:00 2001 From: Martial Maillot Date: Tue, 15 Oct 2024 16:18:30 +0200 Subject: [PATCH 5/5] increase ressources --- .kontinuous/env/dev/values.yaml | 22 --------------------- .kontinuous/templates/ingester.cronjob.yaml | 3 +++ .kontinuous/templates/ingester.job.tpl | 7 ------- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/.kontinuous/env/dev/values.yaml b/.kontinuous/env/dev/values.yaml index d9c67d9fb..83f194126 100644 --- a/.kontinuous/env/dev/values.yaml +++ b/.kontinuous/env/dev/values.yaml @@ -18,15 +18,6 @@ hasura: match: kind: Deployment value: true - autoscale: - enabled: true - resources: - limits: - cpu: "1000m" - memory: "1.5Gi" - requests: - cpu: "500m" - memory: "1Gi" envFrom: - secretRef: name: pg-app @@ -37,13 +28,6 @@ hasura: export: replicas: 1 - resources: - limits: - cpu: "2000m" - memory: "4096Mi" - requests: - cpu: "1500m" - memory: "2Gi" env: - name: "BRANCH_NAME_SLUG" value: "{{.Values.global.branchSlug32}}" @@ -56,12 +40,6 @@ pg: kind: Cluster value: true cnpg-cluster: - resources: - requests: - memory: 1Gi - limits: - cpu: "1" - memory: 1Gi backup: # don't enable backup because we would need a new path each time as the cluster is recreated ~tpl~enabled: "false" diff --git a/.kontinuous/templates/ingester.cronjob.yaml b/.kontinuous/templates/ingester.cronjob.yaml index 19a40da61..563392de5 100644 --- a/.kontinuous/templates/ingester.cronjob.yaml +++ b/.kontinuous/templates/ingester.cronjob.yaml @@ -2,6 +2,9 @@ apiVersion: batch/v1 kind: CronJob metadata: name: cron-ingester + annotations: + oblik.socialgouv.io/min-request-cpu: 1500m + oblik.socialgouv.io/min-request-memory: 3Gi spec: concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 diff --git a/.kontinuous/templates/ingester.job.tpl b/.kontinuous/templates/ingester.job.tpl index 9c1c0237b..76f590002 100644 --- a/.kontinuous/templates/ingester.job.tpl +++ b/.kontinuous/templates/ingester.job.tpl @@ -11,13 +11,6 @@ spec: containers: - name: update-ingester image: "{{ or .Values.registry .Values.global.registry }}/{{ .Values.global.imageProject }}/{{ .Values.global.imageRepository }}/ingester:{{ .Values.global.imageTag }}" - resources: - requests: - cpu: 1500m - memory: 2.5Gi - limits: - cpu: 2000m - memory: 3Gi workingDir: /app env: - name: PRODUCTION