From c439282adaf068fc70dea3e56325312e5c481a1e Mon Sep 17 00:00:00 2001 From: Matthew <38759997+friendlymatthew@users.noreply.github.com> Date: Tue, 30 Jan 2024 16:52:45 -0500 Subject: [PATCH] Add query validation to library (#77) * add query validation * refactor --- examples/client/index.html | 131 ++++----------------------- src/{ => db}/database.ts | 27 +++--- src/db/query-validation.ts | 178 +++++++++++++++++++++++++++++++++++++ src/index-file.ts | 14 +-- src/index.ts | 4 +- src/tests/database.test.ts | 119 ------------------------- src/tests/query.test.ts | 150 +++++++++++++++++++++++++++++++ 7 files changed, 363 insertions(+), 260 deletions(-) rename src/{ => db}/database.ts (93%) create mode 100644 src/db/query-validation.ts delete mode 100644 src/tests/database.test.ts create mode 100644 src/tests/query.test.ts diff --git a/examples/client/index.html b/examples/client/index.html index b48a3e2f..d05f9e06 100644 --- a/examples/client/index.html +++ b/examples/client/index.html @@ -13,15 +13,13 @@ "green_tripdata_2023-01.csv.index", Appendable.FormatType.Csv ).then(async (db) => { - let dbFields = new Set(); - let fieldTypes = {}; + let dbFields = []; let queryHeaders = []; // populate fields db.fields().then((fields) => { fields.map((field) => { - dbFields.add(field.fieldName); - fieldTypes[field.fieldName] = field.fieldType; + dbFields.push(field.fieldName); }); document.getElementById("fields").innerHTML = JSON.stringify( @@ -38,129 +36,30 @@ }); // then execute the query - document.getElementById("execute").onclick = () => { + document.getElementById("execute").onclick = async () => { document.getElementById("results").innerHTML = ""; const queryJson = JSON.parse(editor.getValue()); - const validationResult = validateQuery( - queryJson, - dbFields, - fieldTypes - ); - - if (validationResult !== "Valid Query") { - document.getElementById("results").innerHTML = validationResult; - return; + try { + const query = await db.query(queryJson); + let queryHeaders = queryJson.select ?? dbFields; + await bindQuery(query, queryHeaders); + } catch (error) { + console.log("error: ", error); + document.getElementById("results-header").innerHTML = ""; + document.getElementById("results").innerHTML = error.message; } - - const query = db.query(queryJson); - let queryHeaders = queryJson.select ?? Array.from(dbFields); - bindQuery(query, queryHeaders); }; document.getElementById("results").innerHTML = ""; const queryJson = JSON.parse(editor.getValue()); - queryHeaders = queryJson.select ?? Array.from(dbFields); + queryHeaders = queryJson.select ?? dbFields; const query = db.query(JSON.parse(editor.getValue())); - bindQuery(query, queryHeaders); + await bindQuery(query, queryHeaders); }); - function validateQuery(query, dbFields, fieldTypes) { - if ( - !query.where || - !Array.isArray(query.where) || - query.where.length === 0 - ) { - return "Error: Missing 'where' clause."; - } - - // validate the `where` clause - for (const whereNode of query.where) { - if (!["<", "<=", "==", ">=", ">"].includes(whereNode.operation)) { - return "Error: Invalid operation in 'where' clause."; - } - if (typeof whereNode.key !== "string") { - return "Error: 'key' in 'where' clause must be a string."; - } - - if (!dbFields.has(whereNode.key)) { - return `Error: key: ${whereNode.key} in 'where' clause does not exist in dataset.`; - } - - if (typeof whereNode.value === "undefined") { - return "Error: 'value' in 'where' clause is missing."; - } - - const fieldType = fieldTypes[whereNode.key]; - - if (whereNode.value === null) { - if ( - !Appendable.containsType(fieldType, Appendable.FieldType.Null) - ) { - return `Error: 'key: ${whereNode.key} does not have type: null.`; - } - } - if (typeof whereNode.value === "boolean") { - if ( - !Appendable.containsType(fieldType, Appendable.FieldType.Boolean) - ) { - return `Error: 'key: ${whereNode.key} does not have type: boolean.`; - } - } - if ( - typeof whereNode.value === "number" || - typeof whereNode.value === "bigint" - ) { - if ( - !Appendable.containsType(fieldType, Appendable.FieldType.Number) - ) { - return `Error: 'key: ${whereNode.key} does not have type: number.`; - } - } - if (typeof whereNode.value === "string") { - if ( - !Appendable.containsType(fieldType, Appendable.FieldType.String) - ) { - return `Error: 'key: ${whereNode.key} does not have type: string.`; - } - } - } - - if (query.orderBy) { - // validate the `orderBy` clause - if (!Array.isArray(query.orderBy) || query.orderBy.length === 0) { - return "Error: Invalid 'orderby' clause."; - } - - const orderBy = query.orderBy[0]; - - if (!["ASC", "DESC"].includes(orderBy.direction)) { - return "Error: Invalid direction in `orderBy`."; - } - - if (orderBy.key !== query.where[0].key) { - return "Error: 'key' in `orderBy` must match `key` in `where` clause"; - } - } - - if (query.select) { - // validate the `selectFields` clause - if (!Array.isArray(query.select) || query.select.length === 0) { - return "Error: Invalid 'selectFields' clause."; - } - - for (const field of query.select) { - if (!dbFields.has(field)) { - return `Error: 'key': ${field} in 'selectFields' clause does not exist in dataset.`; - } - } - } - - return "Valid Query"; - } - async function bindQuery(query, headers) { const resultsHeaderElement = document.getElementById("results-header"); resultsHeaderElement.innerHTML = ""; @@ -237,6 +136,10 @@ max-height: calc(100vh - 50px); overflow-y: auto; } + #results { + overflow-y: auto; + max-height: calc(100vh - 670px); + } #results-header { width: max-content; } diff --git a/src/database.ts b/src/db/database.ts similarity index 93% rename from src/database.ts rename to src/db/database.ts index 4d309334..8cf90c50 100644 --- a/src/database.ts +++ b/src/db/database.ts @@ -1,23 +1,24 @@ -import { FormatType } from "."; -import { DataFile } from "./data-file"; -import { IndexFile, VersionedIndexFile } from "./index-file"; +import { FormatType } from ".."; +import { DataFile } from "../data-file"; +import { IndexFile, VersionedIndexFile } from "../index-file"; +import { validateQuery } from "./query-validation"; -type Schema = { +export type Schema = { [key: string]: {}; }; -type WhereNode = { +export type WhereNode = { operation: "<" | "<=" | "==" | ">=" | ">"; key: keyof T; value: T[K]; }; -type OrderBy = { +export type OrderBy = { key: keyof T; direction: "ASC" | "DESC"; }; -type SelectField = keyof T; +export type SelectField = keyof T; export type Query = { where?: WhereNode[]; @@ -32,11 +33,6 @@ export enum FieldType { Null = 1 << 5, } -// given a fieldType and the desired type, this function performs a bitwise operation to test membership -export function containsType(fieldType: bigint, desiredType: FieldType) { - return (fieldType & BigInt(desiredType)) !== BigInt(0); -} - function parseIgnoringSuffix( x: string, format: FormatType, @@ -151,6 +147,13 @@ export class Database { // convert each of the where nodes into a range of field values. const headers = await this.indexFile.indexHeaders(); const headerFields = headers.map((header) => header.fieldName); + + try { + await validateQuery(query, headers); + } catch (error) { + throw new Error(`Query validation failed: ${(error as Error).message}`); + } + const fieldRanges = await Promise.all( (query.where ?? []).map(async ({ key, value, operation }) => { const header = headers.find((header) => header.fieldName === key); diff --git a/src/db/query-validation.ts b/src/db/query-validation.ts new file mode 100644 index 00000000..d000c27d --- /dev/null +++ b/src/db/query-validation.ts @@ -0,0 +1,178 @@ +import { Header } from "../index-file"; +import { + FieldType, + OrderBy, + Query, + Schema, + SelectField, + WhereNode, +} from "./database"; + +/** + * containsType checks if the given compositeType includes the specified singleType. + * It uses a bitwise AND operation to determine if the singleType's bit is set in the compositeType. + * + * @param {bigint} compositeType - A bigint representing a composite of multiple types. + * @param {FieldType} singleType - The specific type to check for within the compositeType. + * @returns {boolean} - Returns true if singleType is included in compositeType, false otherwise. + */ +function containsType(compositeType: bigint, singleType: FieldType): boolean { + return (compositeType & BigInt(singleType)) !== BigInt(0); +} + +/** + * validateWhere validates the 'where' clause of the query. + * + * @param {WhereNode[] | undefined} where - The 'where' clause to validate. + * @param {Header[]} headers - List of headers to check field existence and type compatibility. + * @throws {Error} - Throws an error if the 'where' clause is missing, invalid, or refers to non-existent fields. + */ +function validateWhere( + where: WhereNode[] | undefined, + headers: Header[] +): void { + if (!where || !Array.isArray(where) || where.length === 0) { + throw new Error("Missing 'where' clause."); + } + + for (const whereNode of where) { + if (!["<", "<=", "==", ">=", ">"].includes(whereNode.operation)) { + throw new Error("Invalid operation in 'where' clause."); + } + + if (typeof whereNode.key !== "string") { + throw new Error("'key' in 'where' clause must be a string."); + } + + const header = headers.find((h) => h.fieldName === whereNode.key); + + if (!header) { + throw new Error( + `key: ${whereNode.key} in 'where' clause does not exist in dataset.` + ); + } + + if (typeof whereNode.value === "undefined") { + throw new Error("'value' in 'where' clause is missing."); + } + + const headerType = header.fieldType; + + if (whereNode.value === null) { + if (!containsType(headerType, FieldType.Null)) { + throw new Error(`'key: ${whereNode.key} does not have type: null.`); + } + } else { + function fieldTypeError( + key: string, + actual: FieldType, + expected: bigint + ): string { + return `key: ${key} does not have type: ${actual}. Expected: ${expected}`; + } + + switch (typeof whereNode.value) { + case "bigint": + case "number": + if (!containsType(headerType, FieldType.Number)) { + throw new Error( + fieldTypeError(whereNode.key, FieldType.Number, headerType) + ); + } + break; + case "boolean": + if (!containsType(headerType, FieldType.Boolean)) { + throw new Error( + fieldTypeError(whereNode.key, FieldType.Boolean, headerType) + ); + } + break; + case "string": + if (!containsType(headerType, FieldType.String)) { + throw new Error( + fieldTypeError(whereNode.key, FieldType.String, headerType) + ); + } + break; + default: + throw new Error(`Unsupported type for key: ${whereNode.key}`); + } + } + } +} + +/** + * validateOrderBy validates the 'orderBy' clause of the query. + * Currently supports strictly one 'orderBy' condition that must match the 'where' clause key. + * + * @param {OrderBy[] | undefined} orderBy - The 'orderBy' clause to validate. + * @param {string} whereKey - The key used in the 'where' clause for compatibility. + * @throws {Error} Throws an error if the 'orderBy' clause is invalid or doesn't match the 'where' clause key. + */ +function validateOrderBy( + orderBy: OrderBy[] | undefined, + whereKey: string +): void { + if (orderBy) { + if (!Array.isArray(orderBy) || orderBy.length === 0) { + throw new Error("Invalid 'orderBy' clause."); + } + + // Note: currently we only support one orderBy and it must be the where clause. When we add composite indexes and complex querying, refactor. + const orderByObj = orderBy[0]; + + if (!["ASC", "DESC"].includes(orderByObj.direction)) { + throw new Error("Invalid direction in `orderBy`."); + } + + if (orderByObj.key !== whereKey) { + throw new Error("'key' in `orderBy` must match `key` in `where` clause"); + } + } +} + +/** + * validateSelect validates the 'select' fields of a query. + * + * @param {SelectField[] | undefined} select - The 'select' clause to validate. + * @param {Header[]} headers - List of headers to check for field existence. + * @throws {Error} Throws an error if any field in the 'select' clause doesn't exist in headers. + */ +function validateSelect( + select: SelectField[] | undefined, + headers: Header[] +): void { + if (select) { + if (!Array.isArray(select) || select.length === 0) { + throw new Error("Invalid 'selectFields' clause"); + } + + for (const field of select) { + const header = headers.find((h) => h.fieldName === field); + + if (!header) { + throw new Error( + `'key': ${field as string} in 'selectFields' clause does not exist in dataset.` + ); + } + } + } +} + +/** + * validateQuery checks the structure and syntax of the query against the provided headers. + * It ensures that the fields specified in the query are valid and exist in the headers. + * If any part of the query is invalid (e.g., a field doesn't exist), it throws an error. + * + * @param {Query} query - The query object to validate. + * @param {Header[]} headers - The headers against which to validate the query fields. + * @throws {Error} Throws an error if query is invalid. + */ +export async function validateQuery( + query: Query, + headers: Header[] +): Promise { + validateWhere(query.where, headers); + validateOrderBy(query.orderBy, query.where![0].key as string); + validateSelect(query.select, headers); +} diff --git a/src/index-file.ts b/src/index-file.ts index d0e40b89..f7e35bbe 100644 --- a/src/index-file.ts +++ b/src/index-file.ts @@ -60,13 +60,7 @@ export interface VersionedIndexFile { indexLength: number; dataCount: number; }>; - indexHeaders(): Promise< - { - fieldName: string; - fieldType: bigint; - indexRecordCount: bigint; - }[] - >; + indexHeaders(): Promise; indexRecord( field: keyof T, offset: number @@ -85,11 +79,7 @@ class IndexFileV1 implements VersionedIndexFile { indexLength: number; dataCount: number; }; - private _indexHeaders?: { - fieldName: string; - fieldType: bigint; - indexRecordCount: bigint; - }[]; + private _indexHeaders?: Header[]; private static INDEX_RECORD_SIZE = 18; diff --git a/src/index.ts b/src/index.ts index 410b53df..9702bb83 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ import { DataFile } from "./data-file"; -import { Database, FieldType, containsType } from "./database"; +import { Database, FieldType} from "./db/database"; import { IndexFile } from "./index-file"; import { RangeResolver } from "./resolver"; @@ -28,7 +28,6 @@ interface GlobalMap { Appendable: { init: Function; FieldType: typeof FieldType; - containsType: typeof containsType; FormatType: typeof FormatType; }; } @@ -40,6 +39,5 @@ declare global { globalThis.Appendable = { init, FieldType, - containsType, FormatType, }; diff --git a/src/tests/database.test.ts b/src/tests/database.test.ts deleted file mode 100644 index d2fe07f5..00000000 --- a/src/tests/database.test.ts +++ /dev/null @@ -1,119 +0,0 @@ -import { Database, FieldType, Query, containsType } from "../database"; -import { DataFile } from "../data-file"; -import { VersionedIndexFile } from "../index-file"; -import { FormatType } from ".."; - -jest.mock("../data-file"); -jest.mock("../index-file"); - -describe("test query relation", () => { - let mockDataFile: jest.Mocked; - let mockIndexFile: jest.Mocked>; - let database: Database; - beforeEach(() => { - (DataFile.forUrl as jest.Mock).mockReturnValue({ - get: jest.fn().mockResolvedValue("mocked response"), - }); - mockDataFile = DataFile.forUrl( - "http://example.com/data" - ) as jest.Mocked; - - mockIndexFile = { - indexFileHeader: jest.fn(), - indexHeaders: jest.fn(), - indexRecord: jest.fn(), - dataRecord: jest.fn(), - } as jest.Mocked>; - - // instantiate a Database object with given mocked data file and index file - database = Database.forDataFileAndIndexFile( - mockDataFile, - mockIndexFile, - FormatType.Jsonl - ); - }); - - /* - This test case tests the query function in `database.ts`. - */ - it("should handle a simple query", async () => { - mockIndexFile.indexHeaders.mockResolvedValue([ - { - fieldName: "weight", - fieldType: BigInt(4), - indexRecordCount: BigInt(1), - }, - { - fieldName: "age", - fieldType: BigInt(4), - indexRecordCount: BigInt(1), - }, - ]); - - mockIndexFile.indexRecord.mockResolvedValue({ - dataNumber: 1, - fieldStartByteOffset: 0, - fieldLength: 10, - }); - - mockIndexFile.dataRecord.mockResolvedValue({ - startByteOffset: 0, - endByteOffset: 10, - }); - - // Adjust the mocked DataFile.get to return a string that represents a valid JSON object - mockDataFile.get.mockImplementation( - async (startByteOffset, endByteOffset) => { - const mockData = { weight: 25, age: 30 }; // Mock data - const field = "weight"; // Field being queried - return JSON.stringify(mockData[field]); - } - ); - - type PersonSchema = { - weight: number; - age: number; - }; - - const query: Query = { - where: [ - { - operation: "<", - key: "weight", - value: 30, - }, - ], - orderBy: [ - { - key: "weight", - direction: "ASC", - }, - ], - }; - - const results = []; - for await (const item of database.query(query)) { - results.push(item); - } - - expect(results).toEqual([25]); - }); -}); - -describe("test field type", () => { - it("check valid type", async () => { - const testCases = [ - { fieldType: BigInt(2), desiredType: FieldType.Number, expected: true }, - { fieldType: BigInt(34), desiredType: FieldType.Null, expected: true }, - { fieldType: BigInt(2), desiredType: FieldType.Null, expected: false }, - { fieldType: BigInt(1), desiredType: FieldType.String, expected: true }, - ]; - - testCases.forEach(({ fieldType, desiredType, expected }) => { - const result = containsType(fieldType, desiredType); - - expect(result).toEqual(expected); - }); - }); -}); - diff --git a/src/tests/query.test.ts b/src/tests/query.test.ts new file mode 100644 index 00000000..4514721f --- /dev/null +++ b/src/tests/query.test.ts @@ -0,0 +1,150 @@ +import { Query } from "../db/database"; +import { validateQuery } from "../db/query-validation"; +import { Header } from "../index-file"; + +describe("test validate queries", () => { + interface MockSchema { + [key: string]: {}; + VendorID: {}; + store_and_fwd_flag: {}; + fare_amount: {}; + payment_type: {}; + } + + const headers: Header[] = [ + { + fieldName: "VendorID", + fieldType: BigInt(2), + indexRecordCount: BigInt(683211), + }, + { + fieldName: "store_and_fwd_flag", + fieldType: BigInt(33), + indexRecordCount: BigInt(423), + }, + { + fieldName: "fare_amount", + fieldType: BigInt(2), + indexRecordCount: BigInt(68211), + }, + { + fieldName: "payment_type", + fieldType: BigInt(34), + indexRecordCount: BigInt(63887), + }, + ]; + + const validQueries: Query[] = [ + { + where: [ + { + operation: "==", + key: "VendorID", + value: 1, + }, + ], + }, + { + where: [ + { + operation: "<", + key: "fare_amount", + value: 100, + }, + ], + orderBy: [ + { + key: "fare_amount", + direction: "ASC", + }, + ], + }, + { + where: [ + { + operation: ">=", + key: "payment_type", + value: 300, + }, + ], + orderBy: [ + { + key: "payment_type", + direction: "DESC", + }, + ], + select: ["payment_type", "fare_amount"], + }, + { + where: [ + { + operation: "==", + key: "store_and_fwd_flag", + value: "", + }, + ], + select: ["fare_amount", "payment_type"], + }, + ]; + + validQueries.forEach((query) => { + it("test valid query", async () => { + expect(async () => { + await validateQuery(query, headers); + }).not.toThrow(); + }); + }); + + const notValidQueries: Query[] = [ + { + where: [ + { + operation: "<=", + key: "vendorid", + value: 1, + }, + ], + }, + { + where: [ + { + operation: "==", + key: "store_and_fwd_flag", + value: 10, + }, + ], + orderBy: [ + { + key: "store_and_flag", + direction: "ASC", + }, + ], + }, + { + where: [ + { + operation: "<", + key: "payment_type", + value: 100, + }, + ], + select: ["payment_type", "vendorid", "store_and_fwd_flag"], + }, + { + where: [ + { + operation: "==", + key: "payment_type", + value: "", + } + ], + select: ["payment_type"] + } + ]; + + notValidQueries.forEach((query, index) => { + it(`test invalid query ${index}`, async () => { + await expect(validateQuery(query, headers)).rejects.toThrow(); + }); + }); +});