From a802baaa5ba83cd4051f1ef8eacb76fc6517c398 Mon Sep 17 00:00:00 2001
From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com>
Date: Mon, 23 Sep 2024 18:28:47 -0400
Subject: [PATCH] Add chroma default
---
docs/guides/default-memory.mdx | 33 +++++++---------------
docs/patterns/memory.mdx | 20 +++++++++----
src/controlflow/defaults.py | 11 ++++----
src/controlflow/memory/memory.py | 18 ++++++++----
src/controlflow/memory/providers/chroma.py | 6 ++--
src/controlflow/settings.py | 2 +-
6 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/docs/guides/default-memory.mdx b/docs/guides/default-memory.mdx
index 520a26c..44ae38c 100644
--- a/docs/guides/default-memory.mdx
+++ b/docs/guides/default-memory.mdx
@@ -5,39 +5,40 @@ description: Set up a default persistent memory provider for your agents
icon: brain
---
-This guide focuses on configuring a default memory provider for ControlFlow. For detailed information on using memory, including custom providers and available configurations, please refer to the [Memory doc](/patterns/memory).
-
ControlFlow's [memory](/patterns/memory) feature allows agents to store and retrieve information across multiple workflows. Memory modules are backed by a vector database, configured using a `MemoryProvider`.
Setting up a default provider simplifies the process of creating memory objects throughout your application. Once configured, you can create memory objects without specifying a provider each time.
+
+While ControlFlow does not include any vector database dependencies by default, the default provider is set to `"chroma-db"`. This means that if you install the `chromadb` package, your memory modules will work without any additional configuration.
+
+
## Install dependencies
To use a provider, you must first install its dependencies. Please refer to the [Memory doc](/patterns/memory) to see all supported providers and their required dependencies.
-For example, to use the [Chroma](https://trychroma.com/) provider, you need to install `chromadb`:
+For example, to use the default [Chroma](https://trychroma.com/) provider, you need to install `chromadb`:
```bash
pip install chromadb
```
-
-## Configure default provider
+## Configure a default provider
There are two ways to set up a default provider: using a string setting for common defaults, or instantiating a custom provider. Here, we'll use a persistent Chroma database as our example.
### String configurations
-For simple provider setups, you can use a string value with the `provider` parameter:
+For simple provider setups, you can modify ControlFlow's default settings using a string value. The default value is `"chroma-db"`, which will create a persistent Chroma database. To change it:
```bash Environment variable
-export CONTROLFLOW_MEMORY_PROVIDER="chroma-db"
+export CONTROLFLOW_MEMORY_PROVIDER="chroma-ephemeral"
```
```python Runtime
import controlflow as cf
-cf.settings.memory_provider = "chroma-db"
+cf.settings.memory_provider = "chroma-ephemeral"
```
@@ -50,24 +51,10 @@ For more advanced setups, instantiate a provider with custom settings and assign
```python
import controlflow as cf
from controlflow.memory.providers.chroma import ChromaMemory
+import chromadb
# Set the default provider
cf.defaults.memory_provider = ChromaMemory(
client=chromadb.PersistentClient(path="/custom/path"),
- collection_name="my-collection-{key}"
-)
-```
-
-## Using the default provider
-
-Once you've set a default provider, you can create memory objects that use it automatically:
-
-```python
-import controlflow as cf
-
-weather_memory = cf.Memory(
- key="weather",
- instructions="Store information about the weather."
)
```
-
diff --git a/docs/patterns/memory.mdx b/docs/patterns/memory.mdx
index eddcf31..0c558f6 100644
--- a/docs/patterns/memory.mdx
+++ b/docs/patterns/memory.mdx
@@ -29,7 +29,8 @@ To create a memory object, you need to provide a `key` and `instructions`. The `
ControlFlow does not include any vector database dependencies by default to keep the library lightweight, so you must [install and configure](#provider) a provider before creating a memory object.
-The examples below assume you have [configured a default provider](/guides/default-memory), so they do not specify a provider explicitly.
+The examples in this document will work if you install `chromadb`, as Chroma is the default provider. You can also configure a different default provider, as described in the [default provider guide](/guides/default-memory).
+
```python
@@ -112,8 +113,13 @@ project_memory = cf.Memory(
The `provider` is the underlying storage mechanism for the memory. It is responsible for storing and retrieving the memory objects.
+
#### Installing provider dependencies
-To configure a provider, you need to install its package and either configure the provider with a string value or create an instance of the provider and pass it to the memory module. ControlFlow does not include any vector database dependencies by default, in order to keep the library lightweight.
+To configure a provider, you need to install its package and either configure the provider with a string value or create an instance of the provider and pass it to the memory module.
+
+ControlFlow does not include any vector database dependencies by default, in order to keep the library lightweight.
+
+
This table shows the supported providers and their respective dependencies:
@@ -129,18 +135,20 @@ For straightforward provider configurations, you can pass a string value to the
|Provider | Provider string | Description |
| -------- | -------- | ----------------- |
-| Chroma | `chroma` | An ephemeral (in-memory) database. |
+| Chroma | `chroma-ephemeral` | An ephemeral (in-memory) database. |
| Chroma | `chroma-db` | A persistent, local-file-based database, with a default path of `~/.controlflow/memory/chroma`. |
-
-For example, if `chromadb` is installed, the following code will create a memory module that uses a persistent Chroma database in the default location:
+For example, if `chromadb` is installed, the following code will create a memory module that uses an ephemeral Chroma database:
```python
import controlflow as cf
-cf.Memory(..., provider="chroma-db")
+cf.Memory(..., provider="chroma-ephemeral")
```
+
+Though ControlFlow does not include any vector database dependencies by default, the default provider is `"chroma-db"`.
+
#### Configuring a Provider instance
For more complex configurations, you can instantiate a provider directly and pass it to the memory module.
diff --git a/src/controlflow/defaults.py b/src/controlflow/defaults.py
index 50edaa2..39e52ad 100644
--- a/src/controlflow/defaults.py
+++ b/src/controlflow/defaults.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional
+from typing import Any, Optional, Union
from pydantic import field_validator
@@ -7,7 +7,6 @@
import controlflow.utilities.logging
from controlflow.llm.models import BaseChatModel
from controlflow.memory.memory import MemoryProvider, get_memory_provider
-from controlflow.memory.providers.chroma import ChromaMemory
from controlflow.utilities.general import ControlFlowModel
from .agents import Agent
@@ -21,10 +20,10 @@
_default_model = _get_initial_default_model()
_default_history = InMemoryHistory()
_default_agent = Agent(name="Marvin")
-if controlflow.settings.memory_provider is not None:
+try:
_default_memory_provider = get_memory_provider(controlflow.settings.memory_provider)
-else:
- _default_memory_provider = None
+except Exception:
+ _default_memory_provider = controlflow.settings.memory_provider
class Defaults(ControlFlowModel):
@@ -40,7 +39,7 @@ class Defaults(ControlFlowModel):
model: Optional[Any]
history: History
agent: Agent
- memory_provider: Optional[MemoryProvider]
+ memory_provider: Optional[Union[MemoryProvider, str]]
# add more defaults here
def __repr__(self) -> str:
diff --git a/src/controlflow/memory/memory.py b/src/controlflow/memory/memory.py
index 8852ece..cccf55f 100644
--- a/src/controlflow/memory/memory.py
+++ b/src/controlflow/memory/memory.py
@@ -1,7 +1,6 @@
import abc
import re
-import textwrap
-from typing import Dict, List, Optional, Self
+from typing import Dict, List, Optional, Union
from pydantic import Field, field_validator, model_validator
@@ -56,7 +55,9 @@ def __hash__(self) -> int:
@field_validator("provider", mode="before")
@classmethod
- def validate_provider(cls, v: Optional[MemoryProvider]) -> MemoryProvider:
+ def validate_provider(
+ cls, v: Optional[Union[MemoryProvider, str]]
+ ) -> MemoryProvider:
if isinstance(v, str):
return get_memory_provider(v)
if v is None:
@@ -93,7 +94,7 @@ def validate_key(cls, v: str) -> str:
return sanitized
@model_validator(mode="after")
- def _configure_provider(self) -> Self:
+ def _configure_provider(self):
self.provider.configure(self.key)
return self
@@ -130,11 +131,16 @@ def get_memory_provider(provider: str) -> MemoryProvider:
# --- CHROMA ---
if provider.startswith("chroma"):
- import chromadb
+ try:
+ import chromadb
+ except ImportError:
+ raise ImportError(
+ "To use Chroma as a memory provider, please install the `chromadb` package."
+ )
import controlflow.memory.providers.chroma as chroma_providers
- if provider == "chroma":
+ if provider == "chroma-ephemeral":
return chroma_providers.EphemeralChromaMemory()
elif provider == "chroma-db":
return chroma_providers.PersistentChromaMemory()
diff --git a/src/controlflow/memory/providers/chroma.py b/src/controlflow/memory/providers/chroma.py
index 5bf4ab1..139cb59 100644
--- a/src/controlflow/memory/providers/chroma.py
+++ b/src/controlflow/memory/providers/chroma.py
@@ -13,7 +13,8 @@ def get_client(**kwargs) -> chromadb.Client:
class ChromaMemory(MemoryProvider):
- client: chromadb.Client = Field(
+ model_config = dict(arbitrary_types_allowed=True)
+ client: chromadb.ClientAPI = Field(
default_factory=lambda: chromadb.PersistentClient(
path=str(controlflow.settings.home_path / "memory/chroma")
)
@@ -58,6 +59,7 @@ def EphemeralChromaMemory() -> ChromaMemory:
def PersistentChromaMemory(path: str = None, **kwargs) -> ChromaMemory:
return ChromaMemory(
client=chromadb.PersistentClient(
- path=path or str(controlflow.settings.home_path / "memory/chroma")
+ path=path or str(controlflow.settings.home_path / "memory/chroma"),
+ **kwargs,
)
)
diff --git a/src/controlflow/settings.py b/src/controlflow/settings.py
index 6402a9c..3f962aa 100644
--- a/src/controlflow/settings.py
+++ b/src/controlflow/settings.py
@@ -84,7 +84,7 @@ class Settings(ControlFlowSettings):
# ------------ Memory settings ------------
memory_provider: Optional[str] = Field(
- default=None,
+ default="chroma-db",
description="The default memory provider for agents.",
)