From 98b5fdee93f1e79e6183b9e773213a5a60ea3040 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 11:02:05 +0530 Subject: [PATCH 1/9] feat: add weave documentation in ingestion pipeline --- src/wandbot/ingestion/config.py | 17 +++++++++- src/wandbot/ingestion/prepare_data.py | 49 ++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index 91c9683..410d2fc 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -18,7 +18,6 @@ from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings - from wandbot.utils import get_logger logger = get_logger(__name__) @@ -184,6 +183,22 @@ class WeaveExamplesStoreConfig(DataStoreConfig): docstore_dir: pathlib.Path = pathlib.Path("weave_examples") +class WeaveDocStoreConfig(DataStoreConfig): + name: str = "Weave Documentation" + source_type: str = "documentation" + data_source: DataSource = DataSource( + remote_path="https://wandb.github.io/weave/", + repo_path="https://github.com/wandb/weave", + base_path="docs/docs", + file_patterns=[ + "*.md", + ], + is_git_repo=True, + ) + language: str = "en" + docstore_dir: pathlib.Path = pathlib.Path("weave_documentation") + + class WandbEduCodeStoreConfig(DataStoreConfig): name: str = "Wandb Edu code" source_type: str = "code" diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index c98a852..6b553e2 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -23,13 +23,12 @@ import nbformat import pandas as pd +import wandb from google.cloud import bigquery from langchain.schema import Document from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders.base import BaseLoader from nbconvert import MarkdownExporter - -import wandb from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, @@ -41,6 +40,7 @@ SDKTestsStoreConfig, WandbEduCodeStoreConfig, WeaveCodeStoreConfig, + WeaveDocStoreConfig, WeaveExamplesStoreConfig, ) from wandbot.ingestion.utils import ( @@ -253,7 +253,7 @@ def lazy_load( document.metadata["source"] )[-1] document.metadata["source"] = document_files[ - document.metadata["source"] + pathlib.Path(document.metadata["source"]) ] document.metadata["language"] = self.config.language document.metadata["description"] = self.extract_description( @@ -270,6 +270,24 @@ def lazy_load( ) +class WeaveDocsDataLoader(DocodileDataLoader): + def generate_site_url( + self, base_path: pathlib.Path, file_path: pathlib.Path + ) -> str: + chapter = "" + slug = "" + file_loc = "" + + file_name = file_path.stem + if file_path.name in ("introduction.md",): + file_name = "" + site_relative_path = os.path.join(chapter, slug, file_loc, file_name) + site_url = urljoin( + str(self.config.data_source.remote_path), str(site_relative_path) + ) + return site_url + + class CodeDataLoader(DataLoader): def lazy_load(self) -> Iterator[Document]: """A lazy loader for code documents. @@ -788,15 +806,35 @@ def lazy_load(self) -> Iterator[Document]: SOURCE_TYPE_TO_LOADER_MAP = { - "documentation": DocodileDataLoader, + "wandb_documentation": DocodileDataLoader, + "weave_documentation": WeaveDocsDataLoader, "code": CodeDataLoader, "notebook": CodeDataLoader, "report": FCReportsDataLoader, } +def get_loader_from_config(config: DataStoreConfig) -> DataLoader: + """Get the DataLoader class based on the source type. + + Args: + config: The configuration for the data store. + + Returns: + The DataLoader class. + """ + source_type = config.source_type + if source_type == "documentation": + if "weave" in config.name.lower(): + source_type = "weave_documentation" + else: + source_type = "wandb_documentation" + + return SOURCE_TYPE_TO_LOADER_MAP[source_type](config) + + def load_from_config(config: DataStoreConfig) -> pathlib.Path: - loader = SOURCE_TYPE_TO_LOADER_MAP[config.source_type](config) + loader = get_loader_from_config(config) loader.config.docstore_dir.mkdir(parents=True, exist_ok=True) with (loader.config.docstore_dir / "config.json").open("w") as f: @@ -848,6 +886,7 @@ def load( ExampleNotebookStoreConfig(), SDKCodeStoreConfig(), SDKTestsStoreConfig(), + WeaveDocStoreConfig(), WeaveCodeStoreConfig(), WeaveExamplesStoreConfig(), WandbEduCodeStoreConfig(), From f7dec4693b7f81eeed12fef8cf1da218238c38f9 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 11:02:30 +0530 Subject: [PATCH 2/9] fix: remove js documentation from indexing --- src/wandbot/ingestion/config.py | 14 -------------- src/wandbot/ingestion/prepare_data.py | 2 -- 2 files changed, 16 deletions(-) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index 410d2fc..5f7687a 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -91,20 +91,6 @@ class DocodileEnglishStoreConfig(DataStoreConfig): docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_en") -class DocodileJapaneseStoreConfig(DataStoreConfig): - name: str = "Japanese Documentation" - source_type: str = "documentation" - data_source: DataSource = DataSource( - remote_path="https://docs.wandb.ai/ja/", - repo_path="https://github.com/wandb/docodile", - base_path="i18n/ja/docusaurus-plugin-content-docs/current", - file_patterns=["*.md"], - is_git_repo=True, - ) - language: str = "ja" - docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_ja") - - class ExampleCodeStoreConfig(DataStoreConfig): name: str = "Examples code" source_type: str = "code" diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 6b553e2..1e1ce83 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -32,7 +32,6 @@ from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, - DocodileJapaneseStoreConfig, ExampleCodeStoreConfig, ExampleNotebookStoreConfig, FCReportsStoreConfig, @@ -881,7 +880,6 @@ def load( configs = [ DocodileEnglishStoreConfig(), - DocodileJapaneseStoreConfig(), ExampleCodeStoreConfig(), ExampleNotebookStoreConfig(), SDKCodeStoreConfig(), From 7eed339a32859f0e4c466f49d86734568b5fcfb2 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 11:55:44 +0530 Subject: [PATCH 3/9] chore: run formatters and linters --- src/wandbot/ingestion/config.py | 1 + src/wandbot/ingestion/prepare_data.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index 5f7687a..c3d4808 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -18,6 +18,7 @@ from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings + from wandbot.utils import get_logger logger = get_logger(__name__) diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 1e1ce83..12920b4 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -23,12 +23,13 @@ import nbformat import pandas as pd -import wandb from google.cloud import bigquery from langchain.schema import Document from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders.base import BaseLoader from nbconvert import MarkdownExporter + +import wandb from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, From e1069951ec8f1b732cbdff0beb72262d47805948 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 14:58:23 +0530 Subject: [PATCH 4/9] Revert "fix: remove js documentation from indexing" This reverts commit f7dec4693b7f81eeed12fef8cf1da218238c38f9. --- src/wandbot/ingestion/config.py | 14 ++++++++++++++ src/wandbot/ingestion/prepare_data.py | 2 ++ 2 files changed, 16 insertions(+) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index c3d4808..eab1e1b 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -92,6 +92,20 @@ class DocodileEnglishStoreConfig(DataStoreConfig): docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_en") +class DocodileJapaneseStoreConfig(DataStoreConfig): + name: str = "Japanese Documentation" + source_type: str = "documentation" + data_source: DataSource = DataSource( + remote_path="https://docs.wandb.ai/ja/", + repo_path="https://github.com/wandb/docodile", + base_path="i18n/ja/docusaurus-plugin-content-docs/current", + file_patterns=["*.md"], + is_git_repo=True, + ) + language: str = "ja" + docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_ja") + + class ExampleCodeStoreConfig(DataStoreConfig): name: str = "Examples code" source_type: str = "code" diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 12920b4..218af83 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -33,6 +33,7 @@ from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, + DocodileJapaneseStoreConfig, ExampleCodeStoreConfig, ExampleNotebookStoreConfig, FCReportsStoreConfig, @@ -881,6 +882,7 @@ def load( configs = [ DocodileEnglishStoreConfig(), + DocodileJapaneseStoreConfig(), ExampleCodeStoreConfig(), ExampleNotebookStoreConfig(), SDKCodeStoreConfig(), From 43ea0a9ea6039eb347089c9f268d2956d46d8559 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 14:59:51 +0530 Subject: [PATCH 5/9] chore: run formatters and linters --- src/wandbot/api/app.py | 2 +- src/wandbot/api/routers/database.py | 2 +- src/wandbot/chat/chat.py | 2 +- src/wandbot/evaluation/eval/async_main.py | 2 +- src/wandbot/ingestion/prepare_data.py | 2 +- src/wandbot/ingestion/preprocess_data.py | 2 +- src/wandbot/ingestion/report.py | 3 +-- src/wandbot/ingestion/vectorstores.py | 2 +- src/wandbot/retriever/base.py | 2 +- src/wandbot/utils.py | 3 +-- 10 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/wandbot/api/app.py b/src/wandbot/api/app.py index 3e6c0c2..7d5fb49 100644 --- a/src/wandbot/api/app.py +++ b/src/wandbot/api/app.py @@ -33,9 +33,9 @@ from datetime import datetime, timezone import pandas as pd +import wandb from fastapi import FastAPI -import wandb from wandbot.api.routers import chat as chat_router from wandbot.api.routers import database as database_router from wandbot.api.routers import retrieve as retrieve_router diff --git a/src/wandbot/api/routers/database.py b/src/wandbot/api/routers/database.py index b45b710..a0b5bb8 100644 --- a/src/wandbot/api/routers/database.py +++ b/src/wandbot/api/routers/database.py @@ -1,8 +1,8 @@ +import wandb from fastapi import APIRouter from starlette import status from starlette.responses import Response -import wandb from wandbot.database.client import DatabaseClient from wandbot.database.database import engine from wandbot.database.models import Base diff --git a/src/wandbot/chat/chat.py b/src/wandbot/chat/chat.py index 55ec833..974850d 100644 --- a/src/wandbot/chat/chat.py +++ b/src/wandbot/chat/chat.py @@ -26,9 +26,9 @@ """ from typing import List +import wandb from weave.monitoring import StreamTable -import wandb from wandbot.chat.config import ChatConfig from wandbot.chat.rag import RAGPipeline, RAGPipelineOutput from wandbot.chat.schemas import ChatRequest, ChatResponse diff --git a/src/wandbot/evaluation/eval/async_main.py b/src/wandbot/evaluation/eval/async_main.py index d219d5c..c9166b2 100644 --- a/src/wandbot/evaluation/eval/async_main.py +++ b/src/wandbot/evaluation/eval/async_main.py @@ -7,11 +7,11 @@ import aiofiles import httpx import pandas as pd +import wandb from llama_index.llms.openai import OpenAI from tenacity import retry, stop_after_attempt, wait_random_exponential from tqdm import tqdm -import wandb from wandbot.evaluation.config import EvalConfig from wandbot.evaluation.eval.correctness import ( CORRECTNESS_EVAL_TEMPLATE, diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 218af83..6117936 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -23,13 +23,13 @@ import nbformat import pandas as pd +import wandb from google.cloud import bigquery from langchain.schema import Document from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders.base import BaseLoader from nbconvert import MarkdownExporter -import wandb from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, diff --git a/src/wandbot/ingestion/preprocess_data.py b/src/wandbot/ingestion/preprocess_data.py index af883f9..2cc96f2 100644 --- a/src/wandbot/ingestion/preprocess_data.py +++ b/src/wandbot/ingestion/preprocess_data.py @@ -24,9 +24,9 @@ from typing import Any, List, Sequence import tiktoken +import wandb from langchain_core.documents import BaseDocumentTransformer, Document -import wandb from wandbot.ingestion.preprocessors.markdown import MarkdownTextTransformer from wandbot.ingestion.preprocessors.source_code import CodeTextTransformer from wandbot.utils import ( diff --git a/src/wandbot/ingestion/report.py b/src/wandbot/ingestion/report.py index 68b7a79..760fa6c 100644 --- a/src/wandbot/ingestion/report.py +++ b/src/wandbot/ingestion/report.py @@ -19,9 +19,8 @@ import pathlib from datetime import datetime -import wandb.apis.reports as wr - import wandb +import wandb.apis.reports as wr def log_raw_counts(metadata: dict[str, dict[str, int]]) -> list[str]: diff --git a/src/wandbot/ingestion/vectorstores.py b/src/wandbot/ingestion/vectorstores.py index 5498ed1..f3ef115 100644 --- a/src/wandbot/ingestion/vectorstores.py +++ b/src/wandbot/ingestion/vectorstores.py @@ -16,12 +16,12 @@ import pathlib from typing import List +import wandb from langchain_community.vectorstores.chroma import Chroma from langchain_core.documents import Document from langchain_openai import OpenAIEmbeddings from tqdm import trange -import wandb from wandbot.ingestion.config import VectorStoreConfig from wandbot.utils import get_logger diff --git a/src/wandbot/retriever/base.py b/src/wandbot/retriever/base.py index 5b8a479..990150d 100644 --- a/src/wandbot/retriever/base.py +++ b/src/wandbot/retriever/base.py @@ -1,12 +1,12 @@ from operator import itemgetter from typing import List +import wandb from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain_community.vectorstores.chroma import Chroma from langchain_core.documents import Document from langchain_core.runnables import RunnableLambda, RunnableParallel -import wandb from wandbot.ingestion.config import VectorStoreConfig from wandbot.retriever.reranking import CohereRerankChain from wandbot.retriever.utils import OpenAIEmbeddingsModel diff --git a/src/wandbot/utils.py b/src/wandbot/utils.py index 3a7ccd3..3d2d793 100644 --- a/src/wandbot/utils.py +++ b/src/wandbot/utils.py @@ -36,12 +36,11 @@ import fasttext import nest_asyncio import tiktoken +import wandb from langchain_core.documents import Document from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict -import wandb - def get_logger(name: str) -> logging.Logger: """Creates and returns a logger with the specified name. From 44f00f8db199328335a38c549cad6af64e30486f Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 16:16:44 +0530 Subject: [PATCH 6/9] feat: add support for git repo data sourcing by branch --- src/wandbot/ingestion/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/wandbot/ingestion/utils.py b/src/wandbot/ingestion/utils.py index 8548efe..8f63ff4 100644 --- a/src/wandbot/ingestion/utils.py +++ b/src/wandbot/ingestion/utils.py @@ -38,7 +38,6 @@ import markdownify from bs4 import BeautifulSoup, Comment from git import Repo - from wandbot.utils import get_logger logger = get_logger(__name__) @@ -131,6 +130,8 @@ def fetch_git_repo(paths: Any, id_file: Path) -> Dict[str, str]: f"Repo {paths.local_path} already exists... Pulling changes from {repo.remotes.origin.url}" ) with repo.git.custom_environment(GIT_SSH_COMMAND=git_command): + if paths.branch is not None: + repo.git.checkout(paths.branch) repo.remotes.origin.pull() else: remote_url = giturlparse.parse(f"{paths.repo_path}").urls.get("ssh") @@ -139,6 +140,8 @@ def fetch_git_repo(paths: Any, id_file: Path) -> Dict[str, str]: repo = Repo.clone_from( remote_url, paths.local_path, env=dict(GIT_SSH_COMMAND=git_command) ) + if paths.branch is not None: + repo.git.checkout(paths.branch) return fetch_repo_metadata(repo) From 684fd7a178e3feb9d0bfb771bd7baec2e179edaa Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 16:18:06 +0530 Subject: [PATCH 7/9] fix: japanese docs data sourcing from branch --- src/wandbot/ingestion/config.py | 5 +++-- src/wandbot/ingestion/prepare_data.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index eab1e1b..6b1f712 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -18,7 +18,6 @@ from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings - from wandbot.utils import get_logger logger = get_logger(__name__) @@ -32,6 +31,7 @@ class DataSource(BaseSettings): remote_path: str = "" repo_path: str = "" local_path: Optional[pathlib.Path] = None + branch: Optional[str] = None base_path: Optional[str] = "" file_patterns: List[str] = ["*.*"] is_git_repo: bool = False @@ -98,9 +98,10 @@ class DocodileJapaneseStoreConfig(DataStoreConfig): data_source: DataSource = DataSource( remote_path="https://docs.wandb.ai/ja/", repo_path="https://github.com/wandb/docodile", - base_path="i18n/ja/docusaurus-plugin-content-docs/current", + base_path="docs", file_patterns=["*.md"], is_git_repo=True, + branch="japanese_docs", ) language: str = "ja" docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_ja") diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 6117936..6b553e2 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -29,7 +29,6 @@ from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders.base import BaseLoader from nbconvert import MarkdownExporter - from wandbot.ingestion.config import ( DataStoreConfig, DocodileEnglishStoreConfig, From 8ea83e46068d333b2f2d1f7b05e2779bb1df6526 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 16:18:27 +0530 Subject: [PATCH 8/9] feat: add support for korean docs ingestion --- src/wandbot/ingestion/config.py | 15 +++++++++++++++ src/wandbot/ingestion/prepare_data.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/src/wandbot/ingestion/config.py b/src/wandbot/ingestion/config.py index 6b1f712..9cd4654 100644 --- a/src/wandbot/ingestion/config.py +++ b/src/wandbot/ingestion/config.py @@ -107,6 +107,21 @@ class DocodileJapaneseStoreConfig(DataStoreConfig): docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_ja") +class DocodileKoreanStoreConfig(DataStoreConfig): + name: str = "Korean Documentation" + source_type: str = "documentation" + data_source: DataSource = DataSource( + remote_path="https://docs.wandb.ai/ko/", + repo_path="https://github.com/wandb/docodile", + base_path="docs", + file_patterns=["*.md"], + is_git_repo=True, + branch="korean_docs", + ) + language: str = "ko" + docstore_dir: pathlib.Path = pathlib.Path("wandb_documentation_ko") + + class ExampleCodeStoreConfig(DataStoreConfig): name: str = "Examples code" source_type: str = "code" diff --git a/src/wandbot/ingestion/prepare_data.py b/src/wandbot/ingestion/prepare_data.py index 6b553e2..ad66a21 100644 --- a/src/wandbot/ingestion/prepare_data.py +++ b/src/wandbot/ingestion/prepare_data.py @@ -33,6 +33,7 @@ DataStoreConfig, DocodileEnglishStoreConfig, DocodileJapaneseStoreConfig, + DocodileKoreanStoreConfig, ExampleCodeStoreConfig, ExampleNotebookStoreConfig, FCReportsStoreConfig, @@ -882,6 +883,7 @@ def load( configs = [ DocodileEnglishStoreConfig(), DocodileJapaneseStoreConfig(), + DocodileKoreanStoreConfig(), ExampleCodeStoreConfig(), ExampleNotebookStoreConfig(), SDKCodeStoreConfig(), From d9126ddba4b39e1ff10c362705bcbde5b92b1582 Mon Sep 17 00:00:00 2001 From: Bharat Ramanathan Date: Thu, 25 Apr 2024 16:19:31 +0530 Subject: [PATCH 9/9] fix: change retrieval to non-parent retrieval initial eval on parent retrieval doesn't yield good results --- src/wandbot/rag/retrieval.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wandbot/rag/retrieval.py b/src/wandbot/rag/retrieval.py index 1a72ac9..0af9c7e 100644 --- a/src/wandbot/rag/retrieval.py +++ b/src/wandbot/rag/retrieval.py @@ -3,7 +3,6 @@ from langchain.retrievers.document_compressors import CohereRerank from langchain_core.documents import Document from langchain_core.runnables import Runnable, RunnablePassthrough - from wandbot.rag.utils import get_web_contexts from wandbot.retriever.base import VectorStore from wandbot.retriever.web_search import YouSearch, YouSearchConfig @@ -64,7 +63,7 @@ def __init__( self.top_k = top_k self.search_type = search_type - self.retriever = self.vectorstore.as_parent_retriever( + self.retriever = self.vectorstore.as_retriever( search_type=self.search_type, search_kwargs={"k": self.top_k} )