From a802baaa5ba83cd4051f1ef8eacb76fc6517c398 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:28:47 -0400 Subject: [PATCH] Add chroma default --- docs/guides/default-memory.mdx | 33 +++++++--------------- docs/patterns/memory.mdx | 20 +++++++++---- src/controlflow/defaults.py | 11 ++++---- src/controlflow/memory/memory.py | 18 ++++++++---- src/controlflow/memory/providers/chroma.py | 6 ++-- src/controlflow/settings.py | 2 +- 6 files changed, 46 insertions(+), 44 deletions(-) diff --git a/docs/guides/default-memory.mdx b/docs/guides/default-memory.mdx index 520a26c..44ae38c 100644 --- a/docs/guides/default-memory.mdx +++ b/docs/guides/default-memory.mdx @@ -5,39 +5,40 @@ description: Set up a default persistent memory provider for your agents icon: brain --- -This guide focuses on configuring a default memory provider for ControlFlow. For detailed information on using memory, including custom providers and available configurations, please refer to the [Memory doc](/patterns/memory). - ControlFlow's [memory](/patterns/memory) feature allows agents to store and retrieve information across multiple workflows. Memory modules are backed by a vector database, configured using a `MemoryProvider`. Setting up a default provider simplifies the process of creating memory objects throughout your application. Once configured, you can create memory objects without specifying a provider each time. + +While ControlFlow does not include any vector database dependencies by default, the default provider is set to `"chroma-db"`. This means that if you install the `chromadb` package, your memory modules will work without any additional configuration. + + ## Install dependencies To use a provider, you must first install its dependencies. Please refer to the [Memory doc](/patterns/memory) to see all supported providers and their required dependencies. -For example, to use the [Chroma](https://trychroma.com/) provider, you need to install `chromadb`: +For example, to use the default [Chroma](https://trychroma.com/) provider, you need to install `chromadb`: ```bash pip install chromadb ``` - -## Configure default provider +## Configure a default provider There are two ways to set up a default provider: using a string setting for common defaults, or instantiating a custom provider. Here, we'll use a persistent Chroma database as our example. ### String configurations -For simple provider setups, you can use a string value with the `provider` parameter: +For simple provider setups, you can modify ControlFlow's default settings using a string value. The default value is `"chroma-db"`, which will create a persistent Chroma database. To change it: ```bash Environment variable -export CONTROLFLOW_MEMORY_PROVIDER="chroma-db" +export CONTROLFLOW_MEMORY_PROVIDER="chroma-ephemeral" ``` ```python Runtime import controlflow as cf -cf.settings.memory_provider = "chroma-db" +cf.settings.memory_provider = "chroma-ephemeral" ``` @@ -50,24 +51,10 @@ For more advanced setups, instantiate a provider with custom settings and assign ```python import controlflow as cf from controlflow.memory.providers.chroma import ChromaMemory +import chromadb # Set the default provider cf.defaults.memory_provider = ChromaMemory( client=chromadb.PersistentClient(path="/custom/path"), - collection_name="my-collection-{key}" -) -``` - -## Using the default provider - -Once you've set a default provider, you can create memory objects that use it automatically: - -```python -import controlflow as cf - -weather_memory = cf.Memory( - key="weather", - instructions="Store information about the weather." ) ``` - diff --git a/docs/patterns/memory.mdx b/docs/patterns/memory.mdx index eddcf31..0c558f6 100644 --- a/docs/patterns/memory.mdx +++ b/docs/patterns/memory.mdx @@ -29,7 +29,8 @@ To create a memory object, you need to provide a `key` and `instructions`. The ` ControlFlow does not include any vector database dependencies by default to keep the library lightweight, so you must [install and configure](#provider) a provider before creating a memory object. -The examples below assume you have [configured a default provider](/guides/default-memory), so they do not specify a provider explicitly. +The examples in this document will work if you install `chromadb`, as Chroma is the default provider. You can also configure a different default provider, as described in the [default provider guide](/guides/default-memory). + ```python @@ -112,8 +113,13 @@ project_memory = cf.Memory( The `provider` is the underlying storage mechanism for the memory. It is responsible for storing and retrieving the memory objects. + #### Installing provider dependencies -To configure a provider, you need to install its package and either configure the provider with a string value or create an instance of the provider and pass it to the memory module. ControlFlow does not include any vector database dependencies by default, in order to keep the library lightweight. +To configure a provider, you need to install its package and either configure the provider with a string value or create an instance of the provider and pass it to the memory module. + +ControlFlow does not include any vector database dependencies by default, in order to keep the library lightweight. + + This table shows the supported providers and their respective dependencies: @@ -129,18 +135,20 @@ For straightforward provider configurations, you can pass a string value to the |Provider | Provider string | Description | | -------- | -------- | ----------------- | -| Chroma | `chroma` | An ephemeral (in-memory) database. | +| Chroma | `chroma-ephemeral` | An ephemeral (in-memory) database. | | Chroma | `chroma-db` | A persistent, local-file-based database, with a default path of `~/.controlflow/memory/chroma`. | - -For example, if `chromadb` is installed, the following code will create a memory module that uses a persistent Chroma database in the default location: +For example, if `chromadb` is installed, the following code will create a memory module that uses an ephemeral Chroma database: ```python import controlflow as cf -cf.Memory(..., provider="chroma-db") +cf.Memory(..., provider="chroma-ephemeral") ``` + +Though ControlFlow does not include any vector database dependencies by default, the default provider is `"chroma-db"`. + #### Configuring a Provider instance For more complex configurations, you can instantiate a provider directly and pass it to the memory module. diff --git a/src/controlflow/defaults.py b/src/controlflow/defaults.py index 50edaa2..39e52ad 100644 --- a/src/controlflow/defaults.py +++ b/src/controlflow/defaults.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, Union from pydantic import field_validator @@ -7,7 +7,6 @@ import controlflow.utilities.logging from controlflow.llm.models import BaseChatModel from controlflow.memory.memory import MemoryProvider, get_memory_provider -from controlflow.memory.providers.chroma import ChromaMemory from controlflow.utilities.general import ControlFlowModel from .agents import Agent @@ -21,10 +20,10 @@ _default_model = _get_initial_default_model() _default_history = InMemoryHistory() _default_agent = Agent(name="Marvin") -if controlflow.settings.memory_provider is not None: +try: _default_memory_provider = get_memory_provider(controlflow.settings.memory_provider) -else: - _default_memory_provider = None +except Exception: + _default_memory_provider = controlflow.settings.memory_provider class Defaults(ControlFlowModel): @@ -40,7 +39,7 @@ class Defaults(ControlFlowModel): model: Optional[Any] history: History agent: Agent - memory_provider: Optional[MemoryProvider] + memory_provider: Optional[Union[MemoryProvider, str]] # add more defaults here def __repr__(self) -> str: diff --git a/src/controlflow/memory/memory.py b/src/controlflow/memory/memory.py index 8852ece..cccf55f 100644 --- a/src/controlflow/memory/memory.py +++ b/src/controlflow/memory/memory.py @@ -1,7 +1,6 @@ import abc import re -import textwrap -from typing import Dict, List, Optional, Self +from typing import Dict, List, Optional, Union from pydantic import Field, field_validator, model_validator @@ -56,7 +55,9 @@ def __hash__(self) -> int: @field_validator("provider", mode="before") @classmethod - def validate_provider(cls, v: Optional[MemoryProvider]) -> MemoryProvider: + def validate_provider( + cls, v: Optional[Union[MemoryProvider, str]] + ) -> MemoryProvider: if isinstance(v, str): return get_memory_provider(v) if v is None: @@ -93,7 +94,7 @@ def validate_key(cls, v: str) -> str: return sanitized @model_validator(mode="after") - def _configure_provider(self) -> Self: + def _configure_provider(self): self.provider.configure(self.key) return self @@ -130,11 +131,16 @@ def get_memory_provider(provider: str) -> MemoryProvider: # --- CHROMA --- if provider.startswith("chroma"): - import chromadb + try: + import chromadb + except ImportError: + raise ImportError( + "To use Chroma as a memory provider, please install the `chromadb` package." + ) import controlflow.memory.providers.chroma as chroma_providers - if provider == "chroma": + if provider == "chroma-ephemeral": return chroma_providers.EphemeralChromaMemory() elif provider == "chroma-db": return chroma_providers.PersistentChromaMemory() diff --git a/src/controlflow/memory/providers/chroma.py b/src/controlflow/memory/providers/chroma.py index 5bf4ab1..139cb59 100644 --- a/src/controlflow/memory/providers/chroma.py +++ b/src/controlflow/memory/providers/chroma.py @@ -13,7 +13,8 @@ def get_client(**kwargs) -> chromadb.Client: class ChromaMemory(MemoryProvider): - client: chromadb.Client = Field( + model_config = dict(arbitrary_types_allowed=True) + client: chromadb.ClientAPI = Field( default_factory=lambda: chromadb.PersistentClient( path=str(controlflow.settings.home_path / "memory/chroma") ) @@ -58,6 +59,7 @@ def EphemeralChromaMemory() -> ChromaMemory: def PersistentChromaMemory(path: str = None, **kwargs) -> ChromaMemory: return ChromaMemory( client=chromadb.PersistentClient( - path=path or str(controlflow.settings.home_path / "memory/chroma") + path=path or str(controlflow.settings.home_path / "memory/chroma"), + **kwargs, ) ) diff --git a/src/controlflow/settings.py b/src/controlflow/settings.py index 6402a9c..3f962aa 100644 --- a/src/controlflow/settings.py +++ b/src/controlflow/settings.py @@ -84,7 +84,7 @@ class Settings(ControlFlowSettings): # ------------ Memory settings ------------ memory_provider: Optional[str] = Field( - default=None, + default="chroma-db", description="The default memory provider for agents.", )