Skip to content

Commit

Permalink
feat: attempt fix for bm25
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminshafii committed Sep 24, 2024
1 parent 09d491b commit d5aafa3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 26 deletions.
41 changes: 41 additions & 0 deletions web/app/api/(newai)/folders/embeddings/bm25.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import BM25TextSearch from "wink-bm25-text-search";

class BM25Singleton {
private static instance: BM25TextSearch | null = null;

/**
* Returns a singleton instance of BM25TextSearch.
* Initializes the index if not already initialized.
* @param folders Array of folder names
* @returns BM25TextSearch instance
*/
static getInstance(folders: string[]): BM25TextSearch {
if (!BM25Singleton.instance) {
BM25Singleton.instance = BM25TextSearch({
fieldsToIndex: ['folder'],
});
folders.forEach(folder => {
BM25Singleton.instance!.addDoc({ folder }, folder);
});
BM25Singleton.instance.finalize();
}
return BM25Singleton.instance;
}

/**
* Resets the BM25 index.
* Call this method if the folder list changes.
* @param folders New array of folder names
*/
static resetInstance(folders: string[]) {
BM25Singleton.instance = BM25TextSearch({
fieldsToIndex: ['folder'],
});
folders.forEach(folder => {
BM25Singleton.instance!.addDoc({ folder }, folder);
});
BM25Singleton.instance.finalize();
}
}

export default BM25Singleton;
34 changes: 8 additions & 26 deletions web/app/api/(newai)/folders/embeddings/route.ts
Original file line number Diff line number Diff line change
@@ -1,42 +1,24 @@
// import { NextRequest, NextResponse } from "next/server";
import { NextRequest, NextResponse } from "next/server";
import { handleAuthorization } from "@/lib/handleAuthorization";
import { incrementAndLogTokenUsage } from "@/lib/incrementAndLogTokenUsage";
import { openai } from "@ai-sdk/openai";
import { cosineSimilarity, embed, embedMany } from "ai";
import BM25TextSearch from "wink-bm25-text-search";
import { NextRequest, NextResponse } from "next/server";
import BM25Singleton from "./bm25";

// Define constants for weighting
const KEYWORD_WEIGHT = 0.5;
const EMBEDDING_WEIGHT = 0.5;

// Initialize BM25 Text Search with configuration
const bm25 = BM25TextSearch({
fieldsToIndex: ['folder'], // Specify the field to index
// You can optionally customize other parameters like 'k1' and 'b' here
});

/**
* Function to initialize BM25 index with folder names.
* Each folder is added as a document with a 'folder' field.
* @param folders Array of folder names
*/
function initializeBM25(folders: string[]) {
folders.forEach(folder => {
bm25.addDoc({ folder }, folder); // { folder: 'FolderName' }, 'FolderName'
});
bm25.finalize(); // Finalize the index after adding all documents
}

/**
* Function to compute BM25 scores for a query.
* @param query The search query string
* @param bm25 BM25TextSearch instance
* @returns A Map of folder names to their BM25 scores
*/
function computeBM25Scores(query: string): Map<string, number> {
function computeBM25Scores(query: string, bm25: any): Map<string, number> {
const results = bm25.search(query);
const scoreMap = new Map<string, number>();
results.forEach(result => {
results.forEach((result: any) => {
scoreMap.set(result.ref, result.score);
});
return scoreMap;
Expand All @@ -53,11 +35,11 @@ export async function POST(request: NextRequest) {
console.log("content", content);
console.log("folders", folders);

// Initialize BM25 with folder names
initializeBM25(folders);
// Get BM25 instance (singleton)
const bm25 = BM25Singleton.getInstance(folders);

// Compute BM25 scores based on input content
const bm25ScoresMap = computeBM25Scores(content);
const bm25ScoresMap = computeBM25Scores(content, bm25);
const bm25Scores = folders.map(folder => bm25ScoresMap.get(folder) || 0);

// Generate embedding for the input content
Expand Down

0 comments on commit d5aafa3

Please sign in to comment.