Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update llama-index + dependencies #2092

Merged
merged 8 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ services:
ollama:
image: traefik:v2.10
ports:
- "11434:11434"
- "8080:8080"
command:
- "--providers.file.filename=/etc/router.yml"
- "--log.level=ERROR"
Expand All @@ -83,6 +83,8 @@ services:
# Ollama service for the CPU mode
ollama-cpu:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
profiles:
Expand All @@ -92,6 +94,8 @@ services:
# Ollama service for the CUDA mode
ollama-cuda:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
deploy:
Expand Down
5,015 changes: 2,680 additions & 2,335 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion private_gpt/components/ingest/ingest_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None
self.transformations,
show_progress=self.show_progress,
)
self.node_q.put(("process", file_name, documents, nodes))
self.node_q.put(("process", file_name, documents, list(nodes)))
finally:
self.doc_semaphore.release()
self.doc_q.task_done() # unblock Q joins
Expand Down
9 changes: 4 additions & 5 deletions private_gpt/components/llm/llm_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def __init__(self, settings: Settings) -> None:
api_version="",
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
max_new_tokens=settings.llm.max_new_tokens,
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
tokenizer=settings.llm.tokenizer,
Expand Down Expand Up @@ -184,10 +183,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:

return wrapper

Ollama.chat = add_keep_alive(Ollama.chat)
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
Ollama.complete = add_keep_alive(Ollama.complete)
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
Ollama.chat = add_keep_alive(Ollama.chat) # type: ignore
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) # type: ignore
Ollama.complete = add_keep_alive(Ollama.complete) # type: ignore
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) # type: ignore

self.llm = llm

Expand Down
8 changes: 5 additions & 3 deletions private_gpt/components/llm/prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
return prompt

def completion_to_prompt(self, completion: str) -> str:
def completion_to_prompt(self, prompt: str) -> str:
completion = prompt # Fix: Llama-index parameter has to be named as prompt
prompt = self._completion_to_prompt(completion)
logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
return prompt
Expand Down Expand Up @@ -285,8 +286,9 @@ def _completion_to_prompt(self, completion: str) -> str:


def get_prompt_style(
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
| None
prompt_style: (
Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
)
) -> AbstractPromptStyle:
"""Get the prompt style to use from the given string.

Expand Down
5 changes: 3 additions & 2 deletions private_gpt/components/node_store/node_store_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def __init__(self, settings: Settings) -> None:

case "postgres":
try:
from llama_index.core.storage.docstore.postgres_docstore import (
from llama_index.storage.docstore.postgres import ( # type: ignore
PostgresDocumentStore,
)
from llama_index.core.storage.index_store.postgres_index_store import (
from llama_index.storage.index_store.postgres import ( # type: ignore
PostgresIndexStore,
)
except ImportError:
Expand All @@ -55,6 +55,7 @@ def __init__(self, settings: Settings) -> None:
self.index_store = PostgresIndexStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)

self.doc_store = PostgresDocumentStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)
Expand Down
17 changes: 10 additions & 7 deletions private_gpt/components/vector_store/batched_chroma.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from collections.abc import Generator
from typing import Any
from collections.abc import Generator, Sequence
from typing import TYPE_CHECKING, Any

from llama_index.core.schema import BaseNode, MetadataMode
from llama_index.core.vector_stores.utils import node_to_metadata_dict
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore

if TYPE_CHECKING:
from collections.abc import Mapping


def chunk_list(
lst: list[BaseNode], max_chunk_size: int
) -> Generator[list[BaseNode], None, None]:
lst: Sequence[BaseNode], max_chunk_size: int
) -> Generator[Sequence[BaseNode], None, None]:
"""Yield successive max_chunk_size-sized chunks from lst.

Args:
Expand Down Expand Up @@ -60,7 +63,7 @@ def __init__(
)
self.chroma_client = chroma_client

def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
"""Add nodes to index, batching the insertion to avoid issues.

Args:
Expand All @@ -78,8 +81,8 @@ def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:

all_ids = []
for node_chunk in node_chunks:
embeddings = []
metadatas = []
embeddings: list[Sequence[float]] = []
metadatas: list[Mapping[str, Any]] = []
ids = []
documents = []
for node in node_chunk:
Expand Down
15 changes: 11 additions & 4 deletions private_gpt/server/chat/chat_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING

from injector import inject, singleton
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
Expand Down Expand Up @@ -26,6 +27,9 @@
from private_gpt.server.chunks.chunks_service import Chunk
from private_gpt.settings.settings import Settings

if TYPE_CHECKING:
from llama_index.core.postprocessor.types import BaseNodePostprocessor


class Completion(BaseModel):
response: str
Expand Down Expand Up @@ -114,12 +118,15 @@ def _chat_engine(
context_filter=context_filter,
similarity_top_k=self.settings.rag.similarity_top_k,
)
node_postprocessors = [
node_postprocessors: list[BaseNodePostprocessor] = [
MetadataReplacementPostProcessor(target_metadata_key="window"),
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
),
]
if settings.rag.similarity_value:
node_postprocessors.append(
SimilarityPostprocessor(
similarity_cutoff=settings.rag.similarity_value
)
)

if settings.rag.rerank.enabled:
rerank_postprocessor = SentenceTransformerRerank(
Expand Down
6 changes: 3 additions & 3 deletions private_gpt/server/recipes/summarize/summarize_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def _summarize(
# Add context documents to summarize
if use_context:
# 1. Recover all ref docs
ref_docs: dict[
str, RefDocInfo
] | None = self.storage_context.docstore.get_all_ref_doc_info()
ref_docs: dict[str, RefDocInfo] | None = (
self.storage_context.docstore.get_all_ref_doc_info()
)
if ref_docs is None:
raise ValueError("No documents have been ingested yet.")

Expand Down
26 changes: 13 additions & 13 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
)
prompt_style: Literal[
"default", "llama2", "llama3", "tag", "mistral", "chatml"
] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)
)


Expand Down
1 change: 1 addition & 0 deletions private_gpt/ui/ui.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This file should be imported if and only if you want to run the UI locally."""

import base64
import logging
import time
Expand Down
93 changes: 42 additions & 51 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,63 +7,54 @@ authors = ["Zylon <[email protected]>"]
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
# PrivateGPT
fastapi = { extras = ["all"], version = "^0.111.0" }
python-multipart = "^0.0.9"
injector = "^0.21.0"
pyyaml = "^6.0.1"
fastapi = { extras = ["all"], version = "^0.115.0" }
python-multipart = "^0.0.10"
injector = "^0.22.0"
pyyaml = "^6.0.2"
watchdog = "^4.0.1"
transformers = "^4.42.3"
transformers = "^4.44.2"
docx2txt = "^0.8"
cryptography = "^3.1"
# LlamaIndex core libs
llama-index-core = "^0.10.52"
llama-index-readers-file = "^0.1.27"
llama-index-core = ">=0.11.2,<0.12.0"
llama-index-readers-file = "*"
# Optional LlamaIndex integration libs
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
llama-index-llms-llama-cpp = {version = "*", optional = true}
llama-index-llms-openai = {version ="*", optional = true}
llama-index-llms-openai-like = {version ="*", optional = true}
llama-index-llms-ollama = {version ="*", optional = true}
llama-index-llms-azure-openai = {version ="*", optional = true}
llama-index-llms-gemini = {version ="*", optional = true}
llama-index-embeddings-ollama = {version ="*", optional = true}
llama-index-embeddings-huggingface = {version ="*", optional = true}
llama-index-embeddings-openai = {version ="*", optional = true}
llama-index-embeddings-azure-openai = {version ="*", optional = true}
llama-index-embeddings-gemini = {version ="*", optional = true}
llama-index-embeddings-mistralai = {version ="*", optional = true}
llama-index-vector-stores-qdrant = {version ="*", optional = true}
llama-index-vector-stores-milvus = {version ="*", optional = true}
llama-index-vector-stores-chroma = {version ="*", optional = true}
llama-index-vector-stores-postgres = {version ="*", optional = true}
llama-index-vector-stores-clickhouse = {version ="*", optional = true}
llama-index-storage-docstore-postgres = {version ="*", optional = true}
llama-index-storage-index-store-postgres = {version ="*", optional = true}
# Postgres
psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true}

# ClickHouse
clickhouse-connect = {version = "^0.7.15", optional = true}
clickhouse-connect = {version = "^0.7.19", optional = true}

# Optional Sagemaker dependency
boto3 = {version ="^1.34.139", optional = true}

# Optional Qdrant client
qdrant-client = {version ="^1.9.0", optional = true}
boto3 = {version ="^1.35.26", optional = true}

# Optional Reranker dependencies
torch = {version ="^2.3.1", optional = true}
sentence-transformers = {version ="^3.0.1", optional = true}
torch = {version ="^2.4.1", optional = true}
sentence-transformers = {version ="^3.1.1", optional = true}

# Optional UI
gradio = {version ="^4.37.2", optional = true}
ffmpy = "0.4.0"

# Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true}

# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}
gradio = {version ="^4.44.0", optional = true}
ffmpy = {version ="^0.4.0", optional = true}

# Optional HF Transformers
einops = {version = "^0.8.0", optional = true}
Expand All @@ -74,11 +65,11 @@ ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-ollama = ["llama-index-llms-ollama"]
llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
llms-gemini = ["llama-index-llms-gemini"]
embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
Expand All @@ -94,14 +85,14 @@ storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-ind
rerank-sentence-transformers = ["torch", "sentence-transformers"]

[tool.poetry.group.dev.dependencies]
black = "^22"
mypy = "^1.2"
pre-commit = "^2"
pytest = "^7"
pytest-cov = "^3"
black = "^24"
mypy = "^1.11"
pre-commit = "^3"
pytest = "^8"
pytest-cov = "^5"
ruff = "^0"
pytest-asyncio = "^0.21.1"
types-pyyaml = "^6.0.12.12"
pytest-asyncio = "^0.24.0"
types-pyyaml = "^6.0.12.20240917"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/fast_api_test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests.fixtures.mock_injector import MockInjector


@pytest.fixture()
@pytest.fixture
def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
if request is not None and hasattr(request, "param"):
injector.bind_settings(request.param or {})
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/ingest_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ def ingest_file(self, path: Path) -> IngestResponse:
return ingest_result


@pytest.fixture()
@pytest.fixture
def ingest_helper(test_client: TestClient) -> IngestHelper:
return IngestHelper(test_client)
2 changes: 1 addition & 1 deletion tests/fixtures/mock_injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ def get(self, interface: type[T]) -> T:
return self.test_injector.get(interface)


@pytest.fixture()
@pytest.fixture
def injector() -> MockInjector:
return MockInjector()
2 changes: 1 addition & 1 deletion tests/server/ingest/test_local_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fastapi.testclient import TestClient


@pytest.fixture()
@pytest.fixture
def file_path() -> str:
return "test.txt"

Expand Down
Loading