From 94fdc30192b3326cea3741be92cae32a680095ec Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Tue, 15 Feb 2022 19:29:02 -0500 Subject: [PATCH 1/2] implement syllabi parser api --- package-lock.json | 148 +++++++++++++++++++++++++++++++++++++++++- package.json | 5 +- src/index.js | 4 +- src/routes/manager.js | 54 +++++++++++++++ 4 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 src/routes/manager.js diff --git a/package-lock.json b/package-lock.json index 53e4fb7..630cf89 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,6 +25,15 @@ "integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ==", "dev": true }, + "@types/express-rate-limit": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@types/express-rate-limit/-/express-rate-limit-6.0.0.tgz", + "integrity": "sha512-nZxo3nwU20EkTl/f2eGdndQkDIJYwkXIX4S3Vrp2jMdSdFJ6AWtIda8gOz0wiMuOFoeH/UUlCAiacz3x3eWNFA==", + "dev": true, + "requires": { + "express-rate-limit": "*" + } + }, "abbrev": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", @@ -69,6 +78,11 @@ "picomatch": "^2.0.4" } }, + "append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha1-HjRA6RXwsSA9I3SOeO3XubW0PlY=" + }, "array-flatten": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", @@ -180,6 +194,43 @@ "resolved": "https://registry.npmjs.org/bson/-/bson-1.1.4.tgz", "integrity": "sha512-S/yKGU1syOMzO86+dGpg2qGoDL0zvzcb262G+gqEy6TgP6rt6z6qxSFX/8X6vLC91P7G7C3nLs0+bvDzmvBA3Q==" }, + "buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==" + }, + "busboy": { + "version": "0.2.14", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-0.2.14.tgz", + "integrity": "sha1-bCpiLvz0fFe7vh4qnDetNseSVFM=", + "requires": { + "dicer": "0.2.5", + "readable-stream": "1.1.x" + }, + "dependencies": { + "isarray": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", + "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" + }, + "readable-stream": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", + "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.1", + "isarray": "0.0.1", + "string_decoder": "~0.10.x" + } + }, + "string_decoder": { + "version": "0.10.31", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", + "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" + } + } + }, "bytes": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", @@ -308,6 +359,17 @@ "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", "dev": true }, + "concat-stream": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", + "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==", + "requires": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^2.2.2", + "typedarray": "^0.0.6" + } + }, "configstore": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/configstore/-/configstore-5.0.1.tgz", @@ -423,6 +485,38 @@ "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" }, + "dicer": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/dicer/-/dicer-0.2.5.tgz", + "integrity": "sha1-WZbAhrszIYyBLAkL3cCc0S+stw8=", + "requires": { + "readable-stream": "1.1.x", + "streamsearch": "0.1.2" + }, + "dependencies": { + "isarray": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", + "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" + }, + "readable-stream": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", + "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.1", + "isarray": "0.0.1", + "string_decoder": "~0.10.x" + } + }, + "string_decoder": { + "version": "0.10.31", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", + "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" + } + } + }, "dot-prop": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-5.2.0.tgz", @@ -924,8 +1018,15 @@ "minimist": { "version": "1.2.5", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", - "dev": true + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" + }, + "mkdirp": { + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz", + "integrity": "sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==", + "requires": { + "minimist": "^1.2.5" + } }, "mongodb": { "version": "3.5.9", @@ -999,6 +1100,21 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "multer": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/multer/-/multer-1.4.4.tgz", + "integrity": "sha512-2wY2+xD4udX612aMqMcB8Ws2Voq6NIUPEtD1be6m411T4uDH/VtL9i//xvcyFlTVfRdaBsk7hV5tgrGQqhuBiw==", + "requires": { + "append-field": "^1.0.0", + "busboy": "^0.2.11", + "concat-stream": "^1.5.2", + "mkdirp": "^0.5.4", + "object-assign": "^4.1.1", + "on-finished": "^2.3.0", + "type-is": "^1.6.4", + "xtend": "^4.0.0" + } + }, "negotiator": { "version": "0.6.2", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", @@ -1112,6 +1228,19 @@ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" }, + "pdfdataextract": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/pdfdataextract/-/pdfdataextract-3.2.0.tgz", + "integrity": "sha512-t4W7h+cdr/aefdftzxmf+3w4ntVO70OlOFAGgH2zrbc+lDmYKIzAUXJhP+zpIqK6SnkRnZrQOC0fv/sejUJnrg==", + "requires": { + "pdfjs-dist": "2.10.377" + } + }, + "pdfjs-dist": { + "version": "2.10.377", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.10.377.tgz", + "integrity": "sha512-i0jRShtvgfsVQUNCoFYH4SVhPO3U0yhtiFLfZ0RR0B+68N+Vnwq+8B3cjWjLEwWGh8wg1XQ/sYMYKUlHn/Qpsw==" + }, "picomatch": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.2.tgz", @@ -1401,6 +1530,11 @@ "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=" }, + "streamsearch": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-0.1.2.tgz", + "integrity": "sha1-gIudDlb8Jz2Am6VzOOkpkZoanxo=" + }, "string-width": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.0.tgz", @@ -1521,6 +1655,11 @@ "mime-types": "~2.1.24" } }, + "typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=" + }, "typedarray-to-buffer": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz", @@ -1647,6 +1786,11 @@ "resolved": "https://registry.npmjs.org/xdg-basedir/-/xdg-basedir-4.0.0.tgz", "integrity": "sha512-PSNhEJDejZYV7h50BohL09Er9VaIefr2LMAf3OEmpCkjOi34eYyQYAXUTjEQtZJTKcF0E2UKTh+osDLsgNim9Q==", "dev": true + }, + "xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==" } } } diff --git a/package.json b/package.json index 0e45c6b..5bab56b 100644 --- a/package.json +++ b/package.json @@ -16,9 +16,12 @@ "express": "^4.17.1", "express-rate-limit": "^5.1.3", "mongodb": "^3.5.9", - "mongoose": "^5.9.20" + "mongoose": "^5.9.20", + "multer": "^1.4.4", + "pdfdataextract": "^3.2.0" }, "devDependencies": { + "@types/express-rate-limit": "^6.0.0", "nodemon": "^2.0.4" } } diff --git a/src/index.js b/src/index.js index 7834d89..28cf669 100644 --- a/src/index.js +++ b/src/index.js @@ -1,6 +1,7 @@ require("./db/mongoose") const express = require("express") -const courseRouter = require("./routes/course") +const courseRouter = require("./routes/course"); +const managerRouter = require("./routes/manager") const rateLimit = require("express-rate-limit"); const cors = require("cors") const axios = require("axios") @@ -16,6 +17,7 @@ app.set('trust proxy', 1); app.use(express.json()) // parse request as json app.use(courseRouter) +app.use(managerRouter) if (process.env.NODE_ENV === 'production') { app.use((req, res, next) => { diff --git a/src/routes/manager.js b/src/routes/manager.js new file mode 100644 index 0000000..1a3d9e9 --- /dev/null +++ b/src/routes/manager.js @@ -0,0 +1,54 @@ +const express = require("express") +const cors = require("cors") +const rateLimit = require("express-rate-limit"); +const router = new express.Router() +const { + PdfData, + VerbosityLevel +} = require('pdfdataextract'); +const multer = require('multer'); +const upload = multer(); + +let corsOptions = { + origin: 'https://timetable.viaplanner.ca', // allow only viaplanner to use the api + optionsSuccessStatus: 200 +} + +const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100 // limit each IP to 100 requests per 15 minutes, so 9 requests per seconds +}); + +const re = /(Class Participation|Lab|Assignment|Term Test|Final Exam|Other|Quiz|Presentations|Final Exam Changed To)(.*|(?:.*\n+)*)(On-going|TBA|TBD|\d{4}-\d{2}-\d{2})+ (\d{1,3}%)\n/gmi; + +router.post('/manager/parser', [upload.single('syllabus'), limiter, cors(corsOptions)], (req, res) => { + try { + PdfData.extract(req.file.buffer, { + verbosity: VerbosityLevel.ERRORS, // set the verbosity level for parsing + get: { // enable or disable data extraction (all are optional and enabled by default) + text: true, // get text of each page + }, + }).then((data) => { + let result = [] + let content = data.text.join(); + let matches = [...content.matchAll(re)]; + for (const match of matches) { + const item = { + type: match[1], + description: match[2].trim(), + deadline: /\d{4}-\d{2}-\d{2}/.test(match[3]) ? match[3] : null, + on_going: match[3] === 'On-going', + weight: match[4] + } + result.push(item); + } + res.status(200).send(result); + }); + } catch (e) { + res.status(500).send({ + message: e.message + }) + } +}) + +module.exports = router \ No newline at end of file From 1cef1268b6c2d7e57eec5eebb2ffe0df6497def6 Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Sat, 9 Apr 2022 23:46:03 -0400 Subject: [PATCH 2/2] implement error handling --- src/routes/manager.js | 48 +++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/routes/manager.js b/src/routes/manager.js index 1a3d9e9..3d4af68 100644 --- a/src/routes/manager.js +++ b/src/routes/manager.js @@ -22,33 +22,37 @@ const limiter = rateLimit({ const re = /(Class Participation|Lab|Assignment|Term Test|Final Exam|Other|Quiz|Presentations|Final Exam Changed To)(.*|(?:.*\n+)*)(On-going|TBA|TBD|\d{4}-\d{2}-\d{2})+ (\d{1,3}%)\n/gmi; router.post('/manager/parser', [upload.single('syllabus'), limiter, cors(corsOptions)], (req, res) => { - try { - PdfData.extract(req.file.buffer, { - verbosity: VerbosityLevel.ERRORS, // set the verbosity level for parsing - get: { // enable or disable data extraction (all are optional and enabled by default) - text: true, // get text of each page - }, - }).then((data) => { - let result = [] - let content = data.text.join(); - let matches = [...content.matchAll(re)]; - for (const match of matches) { - const item = { - type: match[1], - description: match[2].trim(), - deadline: /\d{4}-\d{2}-\d{2}/.test(match[3]) ? match[3] : null, - on_going: match[3] === 'On-going', - weight: match[4] - } - result.push(item); + PdfData.extract(req.file.buffer, { + verbosity: VerbosityLevel.ERRORS, // set the verbosity level for parsing + get: { // enable or disable data extraction (all are optional and enabled by default) + text: true, // get text of each page + }, + }).then((data) => { + let result = []; + let content = data.text.join(); + let matches = [...content.matchAll(re)]; + for (const match of matches) { + const item = { + type: match[1], + description: match[2].trim(), + deadline: /\d{4}-\d{2}-\d{2}/.test(match[3]) ? match[3] : null, + on_going: match[3] === 'On-going', + weight: match[4] } + result.push(item); + } + if (result.length === 0) { + res.status(400).send({ + message: 'Invalid syllabus format. Cannot parse the uploaded syllabus.' + }) + } else { res.status(200).send(result); - }); - } catch (e) { + } + }).catch(e => { res.status(500).send({ message: e.message }) - } + }) }) module.exports = router \ No newline at end of file