From 17884a738a11626386096e293acdabf379f39b6b Mon Sep 17 00:00:00 2001 From: ayulockin Date: Thu, 9 May 2024 12:26:27 +0530 Subject: [PATCH] add weave Model as evaluator --- src/wandbot/evaluation/weave_eval/main.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/wandbot/evaluation/weave_eval/main.py b/src/wandbot/evaluation/weave_eval/main.py index 0e485e9..d4ab641 100644 --- a/src/wandbot/evaluation/weave_eval/main.py +++ b/src/wandbot/evaluation/weave_eval/main.py @@ -6,6 +6,7 @@ import weave import asyncio from weave import Evaluation +from weave import Model from llama_index.llms.openai import OpenAI from wandbot.evaluation.config import EvalConfig @@ -47,7 +48,6 @@ async def get_answer(question: str, application: str = "api-eval") -> str: @weave.op() async def get_eval_record( question: str, - ground_truth: str, ) -> dict: response = await get_answer(question) response = json.loads(response) @@ -63,6 +63,16 @@ async def get_eval_record( } +class EvaluatorModel(Model): + eval_judge_model: str = config.eval_judge_model + + @weave.op() + async def predict(self, question: str) -> dict: + # Model logic goes here + prediction = await get_eval_record(question) + return prediction + + @weave.op() async def get_answer_correctness( question: str, @@ -100,4 +110,4 @@ async def get_answer_correctness( ) if __name__ == "__main__": - asyncio.run(evaluation.evaluate(get_eval_record)) + asyncio.run(evaluation.evaluate(EvaluatorModel()))