-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #151 from helixml/evals
User provided feedback
- Loading branch information
Showing
9 changed files
with
264 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package helix | ||
|
||
import ( | ||
"github.com/helixml/helix/api/pkg/evals" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var evalTargets []string | ||
|
||
func newEvalsCommand() *cobra.Command { | ||
var evalsCmd = &cobra.Command{ | ||
Use: "evals", | ||
Short: "A CLI tool for evaluating finetuned LLMs", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
evals.Run() | ||
}, | ||
} | ||
evalsCmd.Flags().StringSliceVar(&evalTargets, "target", []string{}, | ||
"Target(s) to use, defaults to all", | ||
) | ||
|
||
return evalsCmd | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package evals | ||
|
||
import "log" | ||
|
||
func Run() { | ||
log.Printf("hello from evals") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# End-to-end testing of helix's ability to learn from documents. | ||
# To cover fine-tuning, RAG and combinations thereof. | ||
|
||
# manually extract these examples from the database and construct qapairs | ||
# manually to be graded automatically by gpt4 for similarity | ||
manual_evals: | ||
- name: junior-doctors | ||
download: | ||
- https://www.theguardian.com/society/2023/dec/05/junior-doctors-in-england-to-stage-more-strikes | ||
question: what are the doctors going to do? | ||
expected_answer: the doctors are going to go on strike | ||
checker: gpt4-similarity | ||
|
||
|
||
# manually extract these urls from the database and have gpt4 construct qapairs | ||
# to feed back into inference mode of the model and then auto-grade the result | ||
automatic_evals: | ||
- name: junior-doctors | ||
download: | ||
- https://www.theguardian.com/society/2023/dec/05/junior-doctors-in-england-to-stage-more-strikes | ||
checker: gpt4-autoqa | ||
|
||
|
||
checkers: | ||
- name: gpt4-similarity | ||
prompt: | | ||
Given the context | ||
[BEGIN_DATA] | ||
{{.Context}} | ||
[END_DATA] | ||
How similar is the real answer: | ||
[BEGIN_REAL_ANSWER] | ||
{{.RealAnswer}} | ||
[END_REAL_ANSWER] | ||
To the expected answer: | ||
[BEGIN_EXPECTED_ANSWER] | ||
{{.ExpectedAnswer}} | ||
[END_EXPECTED_ANSWER] | ||
Answer with one of the following values: Good, OK, Bad | ||
values: | ||
Bad: 0.0 | ||
OK: 0.5 | ||
Good: 1.0 | ||
|
||
|
||
- name: gpt4-autoqa | ||
prompt: | | ||
Given the context | ||
[BEGIN_DATA] | ||
{{.Context}} | ||
[END_DATA] | ||
Construct a list of questions and expected answers about the article. | ||
Answer in the following schema: | ||
```json | ||
[{ | ||
"question": "...", | ||
"answer": "..." | ||
}, | ||
{ | ||
"question": "...", | ||
"answer": "..." | ||
}, | ||
] | ||
``` | ||
# using the resulting qapairs, feed them back into inference mode in the | ||
# model and check the with gpt4-similarity | ||
|
||
|
||
finetune_targets: | ||
- name: production | ||
api_url: https://api.tryhelix.ai/v1 | ||
token_from_env: HELIX_API_KEY | ||
|
||
|
||
llm_targets: | ||
- name: openai | ||
api_url: https://api.openai.com/v1 | ||
model: gpt-4-1106-preview | ||
token_from_env: OPENAI_API_KEY | ||
- name: together-mixtral | ||
api_url: https://api.together.xyz/v1 | ||
model: mistralai/Mixtral-8x7B-Instruct-v0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters