argilla-io · sdiazlor · Oct 7, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/LICENSE_HEADER b/LICENSE_HEADER
@@ -1,13 +1,13 @@
-Copyright 2023-present, Argilla, Inc.
+ Copyright 2021-present, the Recognai S.L. team.
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+     http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ If you already have deployed Argilla, you can skip this step. Otherwise, you can
 
 ## Basic Usage
 
-To easily log your data into Argilla within your LlamaIndex workflow, you only need a simple step. Just call the Argilla global handler for Llama Index before starting production with your LLM.
+To easily log your data into Argilla within your LlamaIndex workflow, you only need to initialize the span handler and attach it to the Llama Index dispatcher. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.
 
 - `dataset_name`: The name of the dataset. If the dataset does not exist, it will be created with the specified name. Otherwise, it will be updated.
 - `api_url`: The URL to connect to the Argilla instance.
@@ -33,21 +33,21 @@ To easily log your data into Argilla within your LlamaIndex workflow, you only n
 > For more information about the credentials, check the documentation for [users](https://docs.argilla.io/latest/how_to_guides/user/) and [workspaces](https://docs.argilla.io/latest/how_to_guides/workspace/).
 
 ```python
-from llama_index.core import set_global_handler
+import llama_index.core.instrumentation as instrument
+from argilla_llama_index import ArgillaSpanHandler
 
-set_global_handler(
-    "argilla",
-    dataset_name="query_model",
+span_handler = ArgillaSpanHandler(
+    dataset_name="query_llama_index",
     api_url="http://localhost:6900",
     api_key="argilla.apikey",
     number_of_retrievals=2,
 )
+
+dispatcher = instrument.get_dispatcher().add_span_handler(span_handler)
 ```
 
 Let's log some data into Argilla. With the code below, you can create a basic LlamaIndex workflow. We will use GPT3.5 from OpenAI as our LLM ([OpenAI API key](https://openai.com/blog/openai-api)). Moreover, we will use an example `.txt` file obtained from the [Llama Index documentation](https://docs.llamaindex.ai/en/stable/getting_started/starter_example.html).
 
-
-
 ```python
 import os 
 
@@ -63,8 +63,8 @@ Settings.llm = OpenAI(
 documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 
-# Create the query engine
-query_engine = index.as_query_engine()
+# Create the query engine with the same similarity top k as the number of retrievals
+query_engine = index.as_query_engine(similarity_top_k=2)
 ```
 
 Now, let's run the `query_engine` to have a response from the model. The generated response will be logged into Argilla.

diff --git a/docs/tutorials/getting_started.ipynb b/docs/tutorials/getting_started.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# ✨🦙 Getting started with Argilla's LlamaIndex Integration\n",
     "\n",
-    "In this tutorial, we will show the basic usage of this integration that allows the user to include the feedback loop that Argilla offers into the LlamaIndex ecosystem. It's based on a callback handler to be run within the LlamaIndex workflow. \n",
+    "In this tutorial, we will show the basic usage of this integration that allows the user to include the feedback loop that Argilla offers into the LlamaIndex ecosystem. It's based on a span handler to be run within the LlamaIndex workflow. \n",
     "\n",
     "Don't hesitate to check out both [LlamaIndex](https://github.com/run-llama/llama_index) and [Argilla](https://github.com/argilla-io/argilla)"
    ]
@@ -37,7 +37,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install argilla-llama-index"
+    "%pip install \"argilla-llama-index>=2.1.0\""
    ]
   },
   {
@@ -53,13 +53,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import llama_index.core.instrumentation as instrument\n",
     "from llama_index.core import (\n",
     "    Settings,\n",
     "    VectorStoreIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    set_global_handler,\n",
     ")\n",
-    "from llama_index.llms.openai import OpenAI"
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "from argilla_llama_index import ArgillaSpanHandler"
    ]
   },
   {
@@ -87,7 +89,7 @@
    "source": [
     "## Set the Argilla's LlamaIndex handler\n",
     "\n",
-    "To easily log your data into Argilla within your LlamaIndex workflow, you only need a simple step. Just call the Argilla global handler for Llama Index before starting production with your LLM. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.\n",
+    "To easily log your data into Argilla within your LlamaIndex workflow, you only need to initialize the span handler and attach it to the Llama Index dispatcher. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.\n",
     "\n",
     "- `dataset_name`: The name of the dataset. If the dataset does not exist, it will be created with the specified name. Otherwise, it will be updated.\n",
     "- `api_url`: The URL to connect to the Argilla instance.\n",
@@ -104,13 +106,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "set_global_handler(\n",
-    "    \"argilla\",\n",
-    "    dataset_name=\"query_model\",\n",
+    "span_handler = ArgillaSpanHandler(\n",
+    "    dataset_name=\"query_llama_index\",\n",
     "    api_url=\"http://localhost:6900\",\n",
     "    api_key=\"argilla.apikey\",\n",
     "    number_of_retrievals=2,\n",
-    ")"
+    ")\n",
+    "\n",
+    "dispatcher = instrument.get_dispatcher().add_span_handler(span_handler)"
    ]
   },
   {
@@ -151,8 +154,8 @@
     "documents = SimpleDirectoryReader(\"../../data\").load_data()\n",
     "index = VectorStoreIndex.from_documents(documents)\n",
     "\n",
-    "# Create the query engine\n",
-    "query_engine = index.as_query_engine()"
+    "# Create the query engine with the same similarity top k as the number of retrievals\n",
+    "query_engine = index.as_query_engine(similarity_top_k=2)"
    ]
   },
   {

diff --git a/docs/tutorials/github_rag_llamaindex_argilla.ipynb b/docs/tutorials/github_rag_llamaindex_argilla.ipynb
@@ -9,7 +9,7 @@
     "In this tutorial, we'll show you how to create a RAG system that can answer questions about a specific GitHub repository. As example, we will target the [Argilla repository](https://github.com/argilla-io/argilla). This RAG system will target the docs of the repository, as that's where most of the natural language information about the repository can be found.\n",
     "\n",
     "This tutorial includes the following steps:\n",
-    "-   Setting up the Argilla callback handler for LlamaIndex.\n",
+    "-   Setting up the Argilla span handler for LlamaIndex.\n",
     "-   Initializing a GitHub client\n",
     "-   Creating an index with a specific set of files from the GitHub repository of our choice.\n",
     "-   Create a RAG system out of the Argilla repository, ask questions, and automatically log the answers to Argilla.\n",
@@ -43,8 +43,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install \"argilla-llama-index>=2.0.0\"\n",
-    "!pip install \"llama-index-readers-github==0.1.9\""
+    "%pip install \"argilla-llama-index>=2.1.0\"\n",
+    "%pip install \"llama-index-readers-github==0.1.9\""
    ]
   },
   {
@@ -60,16 +60,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import llama_index.core.instrumentation as instrument\n",
     "from llama_index.core import (\n",
     "    Settings,\n",
     "    VectorStoreIndex,\n",
-    "    set_global_handler,\n",
     ")\n",
     "from llama_index.llms.openai import OpenAI\n",
     "from llama_index.readers.github import (\n",
     "    GithubClient,\n",
     "    GithubRepositoryReader,\n",
-    ")"
+    ")\n",
+    "\n",
+    "from argilla_llama_index import ArgillaSpanHandler"
    ]
   },
   {
@@ -81,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -90,7 +92,7 @@
     "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
     "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
     "\n",
-    "os.environ[\"GITHUB_TOKEN\"] = \"ghp_...\"\n",
+    "os.environ[\"GITHUB_TOKEN\"] = \"github_pat_....\"\n",
     "github_token = os.getenv(\"GITHUB_TOKEN\")"
    ]
   },
@@ -100,7 +102,7 @@
    "source": [
     "## Set the Argilla's LlamaIndex handler\n",
     "\n",
-    "To easily log your data into Argilla within your LlamaIndex workflow, you only need a simple step. Just call the Argilla global handler for Llama Index before starting production with your LLM. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.\n",
+    "To easily log your data into Argilla within your LlamaIndex workflow, you only need to initialize the span handler and attach it to the Llama Index dispatcher. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.\n",
     "\n",
     "- `dataset_name`: The name of the dataset. If the dataset does not exist, it will be created with the specified name. Otherwise, it will be updated.\n",
     "- `api_url`: The URL to connect to the Argilla instance.\n",
@@ -117,13 +119,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "set_global_handler(\n",
-    "    \"argilla\",\n",
-    "    dataset_name=\"github_query_model\",\n",
+    "span_handler = ArgillaSpanHandler(\n",
+    "    dataset_name=\"github_query_llama_index\",\n",
     "    api_url=\"http://localhost:6900\",\n",
     "    api_key=\"argilla.apikey\",\n",
     "    number_of_retrievals=2,\n",
-    ")"
+    ")\n",
+    "\n",
+    "dispatcher = instrument.get_dispatcher().add_span_handler(span_handler)"
    ]
   },
   {
@@ -137,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -171,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -230,8 +233,8 @@
     "# Load the data and create the index\n",
     "index = VectorStoreIndex.from_documents(documents)\n",
     "\n",
-    "# Create the query engine\n",
-    "query_engine = index.as_query_engine()"
+    "# Create the query engine with the same similarity top k as the number of retrievals\n",
+    "query_engine = index.as_query_engine(similarity_top_k=2)"
    ]
   },
   {
@@ -270,16 +273,16 @@
      "output_type": "stream",
      "text": [
       "Question: How can I list the available datasets?\n",
-      "Answer: You can list all the datasets available in a workspace by utilizing the `datasets` attribute of the `Workspace` class. Additionally, you can determine the number of datasets in a workspace by using `len(workspace.datasets)`. To list the datasets, you can iterate over them and print out each dataset. Remember that dataset settings are not preloaded when listing datasets, and if you need to work with settings, you must load them explicitly for each dataset.\n",
+      "Answer: To list the available datasets, you can utilize the `datasets` attribute of the `Workspace` class. By importing `argilla as rg` and setting up the `client` with your API URL and key, you can access the datasets in a workspace. Simply loop through the datasets and print each one to display the list of available datasets. Remember that when listing datasets, the dataset settings are not preloaded, so you may need to load them separately if you want to work with settings while listing datasets.\n",
       "----------------------------\n",
       "Question: Which are the user credentials?\n",
-      "Answer: The user credentials in Argilla consist of a username, password, and API key.\n",
+      "Answer: The user credentials typically consist of a username, password, and an API key in Argilla.\n",
       "----------------------------\n",
       "Question: Can I use markdown in Argilla?\n",
       "Answer: Yes, you can use Markdown in Argilla.\n",
       "----------------------------\n",
       "Question: Could you explain how to annotate datasets in Argilla?\n",
-      "Answer: To annotate datasets in Argilla, users can manage their data annotation projects by setting up `Users`, `Workspaces`, `Datasets`, and `Records`. By deploying Argilla on the Hugging Face Hub or with `Docker`, installing the Python SDK with `pip`, and creating the first project, users can get started in just 5 minutes. The tool allows for interacting with data in a more engaging way through features like quick labeling with filters, AI feedback suggestions, and semantic search, enabling users to focus on training models and monitoring their performance effectively.\n",
+      "Answer: To annotate datasets in Argilla, users can deploy the tool for free on the Hugging Face Hub or with Docker. They can then install the Python SDK with pip and create their first project. By managing Users, Workspaces, Datasets, and Records, users can set up their data annotation projects in Argilla. Additionally, users can interact with their data through engaging labeling processes that involve filters, AI feedback suggestions, and semantic search to efficiently label the data while focusing on training models and monitoring their performance.\n",
       "----------------------------\n"
      ]
     },

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,8 +18,7 @@ classifiers = [
 ]
 dependencies = [
     "argilla >= 2.0.0, < 3.0.0",
-    "llama-index >= 0.10.0, < 1.0",
-    "llama-index-callbacks-argilla >= 0.1.4",
+    "llama-index >= 0.10.20, < 1.0",
     "markdown >= 3.6.0",
     "packaging >= 23.2",
     "typing-extensions >= 4.3.0",

diff --git a/src/argilla_llama_index/__init__.py b/src/argilla_llama_index/__init__.py
@@ -1,19 +1,19 @@
-# Copyright 2023-present, Argilla, Inc.
+#  Copyright 2021-present, the Recognai S.L. team.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#      http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 
-__version__ = "2.0.0"
+__version__ = "2.1.0"
 
-from argilla_llama_index.llama_index_handler import ArgillaCallbackHandler
+from argilla_llama_index.llama_index_handler import ArgillaSpanHandler
 
-__all__ = ["ArgillaCallbackHandler"]
+__all__ = ["ArgillaSpanHandler"]