Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: fiches MT en DSFR (ne pas merger) #1490

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .kontinuous/env/dev/templates/export.configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ data:
ELASTICSEARCH_INDEX_PROD: "cdtn-dev-v2"
MATTERMOST_CHANNEL_EXPORT: "s-cdtn-administration-veille-dev"
DISABLE_LIMIT_EXPORT: "true"
DISABLE_SITEMAP: "true"
DISABLE_AGREEMENTS: "true"
3 changes: 2 additions & 1 deletion targets/export-elasticsearch/src/services/export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export class ExportService {
const exportEsDone = await this.exportRepository.getOne(id);

const message = `**${envName}:** mise à jour terminée (${exportEsDone.documentsCount?.total} documents) 🎉`;
logger.info(message);
logger.info(message);
await sendMattermostMessage(
message,
process.env.MATTERMOST_CHANNEL_EXPORT
Expand All @@ -82,6 +82,7 @@ export class ExportService {
new Date()
);
} catch (e: any) {
console.log("Error during process: ", e);
await sendMattermostMessage(
`⚠️ **${envName}:** La mise à jour a échouée. ⚠️`,
process.env.MATTERMOST_CHANNEL_EXPORT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const ingester = async (): Promise<string> => {
);
resolve("Export elasticsearch completed successfully");
} catch (error: unknown) {
console.error("Failure during ingest", error);
reject(error);
}
});
Expand Down
2 changes: 2 additions & 0 deletions targets/ingester/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"unist-util-flat-filter": "1.0.0",
"unist-util-parents": "1.0.3",
"unist-util-select": "4.0.0",
"unzipper": "^0.12.3",
"winston": "3.3.3",
"yargs": "17.1.1"
},
Expand All @@ -36,6 +37,7 @@
"@types/semver": "^7.3.8",
"@types/tar-fs": "^2.0.1",
"@types/unist": "^2.0.6",
"@types/unzipper": "^0.10.10",
"@vercel/ncc": "0.34.0",
"jest": "^27.1.1",
"lint-staged": "^12.0.0",
Expand Down
63 changes: 61 additions & 2 deletions targets/ingester/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import * as semver from "semver";
import * as tar from "tar-fs";
import yargs from "yargs";
import * as unzipper from "unzipper";
import { promises as fs } from "fs";
import { createWriteStream } from "fs";

import type { CdtnDocument } from ".";
import { updateKaliArticles, updateLegiArticles } from "./articles";
Expand Down Expand Up @@ -58,7 +61,12 @@
tar.extract(getPkgPath(pkgName), {
map: function mapHeader(header) {
// npm tarball have a root directory called /package so we remove it when extracting
header.name = header.name.replace("package/", "");
header.name = header.name.replace(
pkgName === "@socialgouv/fiches-travail-data"
? "fiches-travail-data-test-mt-dsfr/"
: "package/",
""
);
return header;
},
})
Expand All @@ -68,6 +76,44 @@
});
}

async function downloadZip(pkgName: string, url: string) {
return new Promise((resolve, reject) => {
getUri(url, async function (err, rs) {
if (err || !rs) {
reject(`Error while downloading package ${pkgName} - ${url}`);
return;
}

const outputDir = getPkgPath(pkgName);

await fs.mkdir(outputDir, { recursive: true });

rs.pipe(unzipper.Parse())
.on("entry", function (entry) {
const filePath = entry.path.replace(

Check failure

Code scanning / CodeQL

Arbitrary file access during archive extraction ("Zip Slip") High

Unsanitized archive entry, which may contain '..', is used in a
file system operation
.

Copilot Autofix AI 6 days ago

To fix the problem, we need to ensure that the entry.path does not contain any directory traversal sequences like .. before using it to construct outputPath. This can be achieved by validating the filePath and ensuring it is within the intended directory.

  • We will add a check to ensure that filePath does not contain .. and is within the outputDir.
  • If the filePath is invalid, we will skip processing that entry and log a warning message.
  • This change will be made in the downloadZip function, specifically around the construction and usage of outputPath.
Suggested changeset 1
targets/ingester/src/cli.ts

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/targets/ingester/src/cli.ts b/targets/ingester/src/cli.ts
--- a/targets/ingester/src/cli.ts
+++ b/targets/ingester/src/cli.ts
@@ -97,4 +97,17 @@
 
+          // Validate filePath to prevent directory traversal
+          if (filePath.includes("..")) {
+            console.log('skipping bad path', filePath);
+            entry.autodrain();
+            return;
+          }
+
           const outputPath = path.join(outputDir, filePath);
 
+          if (!outputPath.startsWith(outputDir)) {
+            console.log('skipping bad path', filePath);
+            entry.autodrain();
+            return;
+          }
+
           if (entry.type === "Directory") {
EOF
@@ -97,4 +97,17 @@

// Validate filePath to prevent directory traversal
if (filePath.includes("..")) {
console.log('skipping bad path', filePath);
entry.autodrain();
return;
}

const outputPath = path.join(outputDir, filePath);

if (!outputPath.startsWith(outputDir)) {
console.log('skipping bad path', filePath);
entry.autodrain();
return;
}

if (entry.type === "Directory") {
Copilot is powered by AI and may make mistakes. Always verify output.
Positive Feedback
Negative Feedback

Provide additional feedback

Please help us improve GitHub Copilot by sharing more details about this comment.

Please select one or more of the options
"fiches-travail-data-test-mt-dsfr/",
""
);

const outputPath = path.join(outputDir, filePath);

Check warning on line 98 in targets/ingester/src/cli.ts

View workflow job for this annotation

GitHub Actions / Lint (ingester)

Unsafe argument of type `any` assigned to a parameter of type `string`

if (entry.type === "Directory") {
fs.mkdir(outputPath, { recursive: true }).catch(reject);
entry.autodrain();
} else {
entry.pipe(createWriteStream(outputPath)).on("error", reject);
}
})
.on("finish", resolve)
.on("error", (error) => {
reject(
`Error while extracting package ${pkgName} - ${error.message}`
);
});
});
});
}

const dataPackages = [
{ getDocuments: getCdtDocuments, pkgName: "@socialgouv/legi-data" },
{
Expand All @@ -87,6 +133,12 @@
];

async function getPackageInfo(pkgName: string) {
if (pkgName === "@socialgouv/fiches-travail-data") {
return {
url: "https://github.com/SocialGouv/fiches-travail-data/archive/refs/heads/test-mt-dsfr.zip",
version: "v4.700.1",
};
}
const pkgInfo: PackageInfo = await got(
`http://registry.npmjs.org/${pkgName}/latest`
).json();
Expand All @@ -108,7 +160,11 @@
>();
for (const { pkgName, disableSlugUpdate, getDocuments } of dataPackages) {
const pkgInfo = await getPackageInfo(pkgName);
await download(pkgName, pkgInfo.url);
if (pkgName === "@socialgouv/fiches-travail-data") {
await downloadZip(pkgName, pkgInfo.url);
} else {
await download(pkgName, pkgInfo.url);
}

const ingestedVersion = await getLastIngestedVersion(pkgName);
if (
Expand All @@ -132,7 +188,9 @@
pkgName,
{ version, getDocuments, disableSlugUpdate },
] of packagesToUpdate) {
console.log(`Start package: ${pkgName}`);
if (!getDocuments) {
console.log(`Get document not defined, skip it !`);
continue;
}
console.time(`update ${pkgName}`);
Expand All @@ -141,7 +199,7 @@
console.timeEnd(` getDocuments ${pkgName}`);
console.log(` ${pkgName}: ${documents.length} documents`);
if (!args.dryRun && documents.length > 0) {
await initDocAvailabity(documents[0].source);

Check warning on line 202 in targets/ingester/src/cli.ts

View workflow job for this annotation

GitHub Actions / Lint (ingester)

Unsafe argument of type `any` assigned to a parameter of type `string`
console.log(
` ready to ingest ${documents.length} documents from ${pkgName}`
);
Expand Down Expand Up @@ -185,6 +243,7 @@
console.log(`Finish ingest ${data.length} documents`);
})
.catch((err) => {
console.info("Failed :(");
console.error(err);
process.exit(1);
});
5 changes: 3 additions & 2 deletions targets/ingester/src/transform/fiche-travail-emploi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ const fetchSections = async (
return await pMap(
sections,
async ({ references, ...section }) => {
let htmlWithGlossary = section.html;
const htmlWithGlossary = section.html;
if (section.html && section.html !== "") {
/*
const fetchResult: any = await got
.post(`${URL_EXPORT}/glossary`, {
json: {
Expand All @@ -77,12 +78,12 @@ const fetchSections = async (
} else {
htmlWithGlossary = fetchResult.result;
}
*/
} else {
console.warn(
`No html found for this section : ${JSON.stringify(section)}`
);
}

return {
...section,
htmlWithGlossary,
Expand Down
55 changes: 53 additions & 2 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7148,6 +7148,15 @@ __metadata:
languageName: node
linkType: hard

"@types/unzipper@npm:^0.10.10":
version: 0.10.10
resolution: "@types/unzipper@npm:0.10.10"
dependencies:
"@types/node": "*"
checksum: 4ba5f6c4c5a892f5f5ce7724a4c3f2ea772a29043e296a28b725162ffff8fb25d2d0995c5536705e13bfbde7765d085f0da5408b25946457d050ac4b75aaefee
languageName: node
linkType: hard

"@types/uuid@npm:^9.0.5":
version: 9.0.7
resolution: "@types/uuid@npm:9.0.7"
Expand Down Expand Up @@ -8421,6 +8430,13 @@ __metadata:
languageName: node
linkType: hard

"bluebird@npm:~3.7.2":
version: 3.7.2
resolution: "bluebird@npm:3.7.2"
checksum: 869417503c722e7dc54ca46715f70e15f4d9c602a423a02c825570862d12935be59ed9c7ba34a9b31f186c017c23cac6b54e35446f8353059c101da73eac22ef
languageName: node
linkType: hard

"body-parser@npm:1.20.1":
version: 1.20.1
resolution: "body-parser@npm:1.20.1"
Expand Down Expand Up @@ -10726,6 +10742,15 @@ __metadata:
languageName: node
linkType: hard

"duplexer2@npm:~0.1.4":
version: 0.1.4
resolution: "duplexer2@npm:0.1.4"
dependencies:
readable-stream: ^2.0.2
checksum: 744961f03c7f54313f90555ac20284a3fb7bf22fdff6538f041a86c22499560eb6eac9d30ab5768054137cb40e6b18b40f621094e0261d7d8c35a37b7a5ad241
languageName: node
linkType: hard

"duplexer3@npm:^0.1.4":
version: 0.1.5
resolution: "duplexer3@npm:0.1.5"
Expand Down Expand Up @@ -12265,6 +12290,17 @@ __metadata:
languageName: node
linkType: hard

"fs-extra@npm:^11.2.0":
version: 11.2.0
resolution: "fs-extra@npm:11.2.0"
dependencies:
graceful-fs: ^4.2.0
jsonfile: ^6.0.1
universalify: ^2.0.0
checksum: b12e42fa40ba47104202f57b8480dd098aa931c2724565e5e70779ab87605665594e76ee5fb00545f772ab9ace167fe06d2ab009c416dc8c842c5ae6df7aa7e8
languageName: node
linkType: hard

"fs-extra@npm:^8.1.0":
version: 8.1.0
resolution: "fs-extra@npm:8.1.0"
Expand Down Expand Up @@ -12861,7 +12897,7 @@ __metadata:
languageName: node
linkType: hard

"graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.15, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.11, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9":
"graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.15, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.11, graceful-fs@npm:^4.2.2, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9":
version: 4.2.11
resolution: "graceful-fs@npm:4.2.11"
checksum: ac85f94da92d8eb6b7f5a8b20ce65e43d66761c55ce85ac96df6865308390da45a8d3f0296dd3a663de65d30ba497bd46c696cc1e248c72b13d6d567138a4fc7
Expand Down Expand Up @@ -13520,6 +13556,7 @@ __metadata:
"@types/semver": ^7.3.8
"@types/tar-fs": ^2.0.1
"@types/unist": ^2.0.6
"@types/unzipper": ^0.10.10
"@vercel/ncc": 0.34.0
get-uri: 3.0.2
got: 11.8.5
Expand All @@ -13539,6 +13576,7 @@ __metadata:
unist-util-flat-filter: 1.0.0
unist-util-parents: 1.0.3
unist-util-select: 4.0.0
unzipper: ^0.12.3
winston: 3.3.3
yargs: 17.1.1
languageName: unknown
Expand Down Expand Up @@ -20058,7 +20096,7 @@ __metadata:
languageName: node
linkType: hard

"readable-stream@npm:^2.0.0, readable-stream@npm:^2.1.4, readable-stream@npm:~2.3.6":
"readable-stream@npm:^2.0.0, readable-stream@npm:^2.0.2, readable-stream@npm:^2.1.4, readable-stream@npm:~2.3.6":
version: 2.3.8
resolution: "readable-stream@npm:2.3.8"
dependencies:
Expand Down Expand Up @@ -22815,6 +22853,19 @@ __metadata:
languageName: node
linkType: hard

"unzipper@npm:^0.12.3":
version: 0.12.3
resolution: "unzipper@npm:0.12.3"
dependencies:
bluebird: ~3.7.2
duplexer2: ~0.1.4
fs-extra: ^11.2.0
graceful-fs: ^4.2.2
node-int64: ^0.4.0
checksum: 2e3296d1fad307b02b3d0f3e9c4ac1bdd56047e66fe5108a9e580b417f4ac9b07c31e9ded3e006e01edaaba3e20b13c638bd3c893600f75c589b6e0f778d9ffd
languageName: node
linkType: hard

"upath@npm:^2.0.1":
version: 2.0.1
resolution: "upath@npm:2.0.1"
Expand Down
Loading