Skip to content

Commit

Permalink
[Sync] Sync with internal codes 2024.06.28 (#1279)
Browse files Browse the repository at this point in the history
  • Loading branch information
Leymore authored Jun 28, 2024
1 parent 842fb1c commit a32f21a
Show file tree
Hide file tree
Showing 284 changed files with 6,238 additions and 1,295 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ configs/sft_cfg/60B/*
configs/sft_cfg/100B/*

configs/cky/
configs/_internal_legacy*
# in case llama clone in the opencompass
llama/

Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config-zh-cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ repos:
exclude: |
(?x)^(
.*\.jsonl|
opencompass/datasets/subjective/mtbench101.py|
configs/
)
- repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks
Expand Down
4 changes: 2 additions & 2 deletions configs/dataset_collections/chat_OC15.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from ..datasets.cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
from ..datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import triviaqa_datasets
from ..datasets.nq.nq_open_1shot_gen_01cf41 import nq_datasets
from ..datasets.triviaqa.triviaqa_wiki_1shot_gen_bc5f21 import triviaqa_datasets
from ..datasets.nq.nq_open_1shot_gen_2e45e5 import nq_datasets
from ..datasets.race.race_gen_69ee4f import race_datasets
from ..datasets.winogrande.winogrande_5shot_gen_b36770 import winogrande_datasets
from ..datasets.hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
Expand Down
57 changes: 57 additions & 0 deletions configs/datasets/CHARM/charm_reason_ppl_3da4de.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.datasets import CharmDataset
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator

charm_tasks = [
['Chinese_Anachronisms_Judgment', 'AB'],
['Chinese_Movie_and_Music_Recommendation', 'ABCD'],
['Chinese_Natural_Language_Inference', 'ABC'],
['Chinese_Reading_Comprehension', 'ABCD'],
['Chinese_Sequence_Understanding', 'ABCD'],
['Chinese_Sport_Understanding', 'AB'],
['Chinese_Time_Understanding', 'ABCD'],
['Global_Anachronisms_Judgment', 'AB'],
['Global_Movie_and_Music_Recommendation', 'ABCD'],
['Global_Natural_Language_Inference', 'ABC'],
['Global_Reading_Comprehension', 'ABCD'],
['Global_Sequence_Understanding', 'ABCD'],
['Global_Sport_Understanding', 'AB'],
['Global_Time_Understanding', 'ABCDEF'],
]

charm_reason_datasets = []
for task_name, options in charm_tasks:

with open(os.path.join(os.path.dirname(__file__), 'few-shot-examples', f'{task_name}_Direct.txt'), 'r') as f:
few_shot_example = f.read()

charm_reason_reader_cfg = dict(input_columns=['input'], output_column='target')

charm_reason_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
f'({opt})': f'{few_shot_example}\n{{input}}\nA: {opt}' for opt in options
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)

charm_reason_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator))

charm_reason_datasets.append(
dict(
type=CharmDataset,
abbr=f'charm-reason-{task_name}_Direct',
path=f'data/CHARM/reasoning',
name=task_name,
reader_cfg=charm_reason_reader_cfg,
infer_cfg=charm_reason_infer_cfg,
eval_cfg=charm_reason_eval_cfg,
)
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HFDataset, HumanEvalEvaluator, humaneval_postprocess

apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
Expand All @@ -17,7 +17,7 @@
inferencer=dict(type=GenInferencer))

apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
evaluator=dict(type=HumanEvalEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type=humaneval_postprocess),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HFDataset, HumanEvalEvaluator, humaneval_postprocess

apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
Expand All @@ -24,7 +24,7 @@
inferencer=dict(type=GenInferencer))

apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
evaluator=dict(type=HumanEvalEvaluator),
pred_role='BOT',
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type=humaneval_postprocess),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
from opencompass.datasets import HFDataset, HumanEvalEvaluator, humaneval_postprocess

apps_reader_cfg = dict(
input_columns=['question'], output_column='problem_id', train_split='test')
Expand All @@ -15,7 +15,7 @@
inferencer=dict(type=GenInferencer, max_out_len=512))

apps_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
evaluator=dict(type=HumanEvalEvaluator),
k=[1, 10, 100],
pred_postprocessor=dict(type=humaneval_postprocess),
)
Expand Down
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..gsm8k.gsm8k_gen_ee684f import gsm8k_datasets
from ..math.math_evaluatorv2_gen_2f4a71 import math_datasets
from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
from ..humaneval.humaneval_gen_d2537e import humaneval_datasets
from ..humaneval.deprecated_humaneval_gen_d2537e import humaneval_datasets
from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets

datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
2 changes: 1 addition & 1 deletion configs/datasets/collections/base_medium_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..agieval.agieval_mixed_713d14 import agieval_datasets
from ..GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
from ..bbh.bbh_gen_5b92b0 import bbh_datasets
from ..humaneval.humaneval_gen_a82cae import humaneval_datasets
from ..humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
Expand Down
4 changes: 2 additions & 2 deletions configs/datasets/collections/chat_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from ..cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
from ..ceval.ceval_internal_gen_2daf24 import ceval_datasets
from ..GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
from ..triviaqa.triviaqa_wiki_1shot_gen_eaf81e import triviaqa_datasets
from ..nq.nq_open_1shot_gen_01cf41 import nq_datasets
from ..triviaqa.triviaqa_wiki_1shot_gen_bc5f21 import triviaqa_datasets
from ..nq.nq_open_1shot_gen_2e45e5 import nq_datasets
from ..race.race_gen_69ee4f import race_datasets
from ..winogrande.winogrande_5shot_gen_6447e6 import winogrande_datasets
from ..hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
Expand Down
4 changes: 2 additions & 2 deletions configs/datasets/collections/leaderboard/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
from ...siqa.siqa_ppl_e8d8c5 import siqa_datasets
from ...math.math_gen_265cce import math_datasets
from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
from ...drop.drop_gen_8a9ed9 import drop_datasets
from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
from ...drop.deprecated_drop_gen_8a9ed9 import drop_datasets
from ...humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ...bbh.bbh_gen_5bf00b import bbh_datasets

Expand Down
4 changes: 2 additions & 2 deletions configs/datasets/collections/leaderboard/qwen_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
from ...siqa.siqa_ppl_e8d8c5 import siqa_datasets
from ...math.math_gen_265cce import math_datasets
from ...gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
from ...drop.drop_gen_8a9ed9 import drop_datasets
from ...humaneval.humaneval_gen_a82cae import humaneval_datasets
from ...drop.deprecated_drop_gen_8a9ed9 import drop_datasets
from ...humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
from ...mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
from ...bbh.bbh_gen_5b92b0 import bbh_datasets

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import AgentInferencer
from opencompass.datasets import CIBenchDataset, CIBenchEvaluator

libs = [
'/lightgbm',
'/matplotlib',
'/nltk',
'/opencv',
'/pandas',
'/pytorch',
'/scipy',
'/seaborn',
'/sklearn',
'/tensorflow',
'_chinese/lightgbm',
'_chinese/matplotlib',
'_chinese/nltk',
'_chinese/opencv',
'_chinese/pandas',
'_chinese/pytorch',
'_chinese/scipy',
'_chinese/seaborn',
'_chinese/sklearn',
'_chinese/tensorflow',
]


cibench_datasets = []
for lib in libs:
cibench_reader_cfg = dict(
input_columns=['questions'], output_column='references', train_split='test', test_split='test'
)

cibench_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{questions}',
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=AgentInferencer, infer_mode='every'),
)

cibench_eval_cfg = dict(evaluator=dict(type=CIBenchEvaluator), pred_role='BOT')

cibench_datasets.append(
dict(
abbr=f'cibench_template{lib}',
type=CIBenchDataset,
path=f'data/compassbench_v1.1/agent-cibench/cibench_template{lib}',
internet_check=False,
reader_cfg=cibench_reader_cfg,
infer_cfg=cibench_infer_cfg,
eval_cfg=cibench_eval_cfg,
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import ChatInferencer
from opencompass.openicl.icl_evaluator import TEvalEvaluator
from opencompass.datasets import teval_postprocess, TEvalDataset

plugin_eval_subject_mapping = {
'instruct': ['instruct_v1'],
'instruct_zh': ['instruct_v1_zh'],
'plan': ['plan_json_v1', 'plan_str_v1'],
'plan_zh': ['plan_json_v1_zh', 'plan_str_v1_zh'],
'review': ['review_str_v1'],
'review_zh': ['review_str_v1_zh'],
'reason_retrieve_understand': ['reason_retrieve_understand_json_v1'],
'reason_retrieve_understand_zh': ['reason_retrieve_understand_json_v1_zh'],
'reason': ['reason_str_v1'],
'reason_zh': ['reason_str_v1_zh'],
'retrieve': ['retrieve_str_v1'],
'retrieve_zh': ['retrieve_str_v1_zh'],
'understand': ['understand_str_v1'],
'understand_zh': ['understand_str_v1_zh'],
}

plugin_eval_datasets = []
for _name in plugin_eval_subject_mapping:
plugin_eval_reader_cfg = dict(input_columns=['prompt'], output_column='ground_truth')
plugin_eval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=ChatInferencer),
)
plugin_eval_eval_cfg = dict(
evaluator=dict(type=TEvalEvaluator, subset=_name),
pred_postprocessor=dict(type=teval_postprocess),
num_gpus=1,
)

for subset in plugin_eval_subject_mapping[_name]:
plugin_eval_datasets.append(
dict(
abbr='plugin_eval-mus-p10-' + subset,
type=TEvalDataset,
path='data/compassbench_v1.1/agent-teval-p10',
name=subset,
reader_cfg=plugin_eval_reader_cfg,
infer_cfg=plugin_eval_infer_cfg,
eval_cfg=plugin_eval_eval_cfg,
)
)
Loading

0 comments on commit a32f21a

Please sign in to comment.