open-compass · liushz · Oct 8, 2024 · Oct 9, 2024
diff --git a/configs/datasets/gaokao_math/gaokao_math_gen_9b076f.py b/configs/datasets/gaokao_math/gaokao_math_gen_9b076f.py
@@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GaoKaoMATHDataset, GaoKaoMATHEvaluator
+
+
+MATH_CN_PROMPT="""
+你是一个数学阅卷专家，任务是从给定的回答句子中提取精确的关键答案。你必须只提供提取的关键答案，不包括任何额外的文字。
+—
+我将为你提供一个问题、回答句子和问题类型。回答句子是对所提供问题的回应。利用提供的信息，你必须准确而精确地确定并从回答句子中提取预期的关键答案。请不要对问题发表主观看法。
+
+对于单选题，答案应该是选项字母，例如 "A"；
+对于多选题，答案应该是一个选项字母的列表，例如 "A" 或 "A", "B", "C"；
+对于填空题，答案应该是一个填入空白处的答案列表，列表的数量应该与问题中的空白数量相同，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+对于问答题，类似填空题，为每个小问抽出相应答案，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+
+如果回答句子提供了多个不同的答案，请仔细判断后面提供的答案是否是对前面答案的修正或修改。如果是这样，提取这个修正或修改后的答案作为最终答案。相反，如果回答句子在多个答案之间波动而没有明确的最终答案，你应该输出 [No valid answer]。
+—
+问题类型: {question_type}
+原始问题: {question}
+回答: {response}
+提取的关键答案:
+"""
+
+gaokao_math_reader_cfg = dict(input_columns=['question', 'response', 'question_type'], output_column='extract_answer')
+
+
+gaokao_math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role='HUMAN', prompt=MATH_CN_PROMPT),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+gaokao_math_eval_cfg = dict(
+    evaluator=dict(type=GaoKaoMATHEvaluator, url='http://0.0.0.0:23333/v1'))
+
+gaokao_math_datasets = [
+    dict(
+        type=GaoKaoMATHDataset,
+        abbr='GaoKaoMATH',
+        path='./data/gaokao_math/test_2k.json',
+        reader_cfg=gaokao_math_reader_cfg,
+        infer_cfg=gaokao_math_infer_cfg,
+        eval_cfg=gaokao_math_eval_cfg)
+]
diff --git a/configs/datasets/math/math_0shot_llm_judge_gen_393424.py b/configs/datasets/math/math_0shot_llm_judge_gen_393424.py
@@ -0,0 +1,78 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2, GaoKaoMATHEvaluator
+from opencompass.utils.model_postprocessors import naive_model_postprocess, xfinder_postprocess
+from opencompass.utils.postprocessors.naive import MATH_NAVIE_PROMPT_TEMPLATE
+
+# ----------------------------- Eval Parameters -----------------------------
+## Postprocess function
+post_func = 're' # 're', 'xfinder_model', 'naive_model'
+
+## Evalute function
+eval_func = 'naive_model' # 're', 'naive_model'
+
+## Model api url
+xfinder_url = 'http://0.0.0.0:23333/v1' # for 'xFinder-qwen1505' if post_func is 'xfinder_model'
+naive_model_name = 'Qwen/Qwen2.5-72B-Instruct' # replace with your model name
+naive_model_url = ['http://22.8.6.22:23333/v1', 'http://22.8.67.84:23333/v1', 'http://22.8.72.81:23333/v1', 'http://22.9.42.143:23333/v1'] # Multi-apis for accerlation
+
+# ----------------------------- Detailed Config -----------------------------
+
+math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
+
+math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='{problem}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=1024),
+)
+
+if post_func == 're':
+    pred_postprocessor = dict(type=math_postprocess_v2)
+elif post_func == 'xfinder_model':
+    pred_postprocessor = dict(
+        type=xfinder_postprocess,
+        question_type='math',
+        model_name='xFinder-qwen1505',
+        num_processes=128,
+        api_url=xfinder_url,
+    )
+elif post_func == 'naive_model':
+    pred_postprocessor = dict(
+        type=naive_model_postprocess,
+        custom_instruction=MATH_NAVIE_PROMPT_TEMPLATE,
+        model_name=naive_model_name,
+        num_processes=64,
+        api_url=naive_model_url,
+    )
+
+if eval_func == 're':
+    evaluator = dict(type=MATHEvaluator, version='v2')
+elif eval_func == 'naive_model':
+    evaluator = dict(
+        type=GaoKaoMATHEvaluator,
+        model_name=naive_model_name,
+        url=naive_model_url,
+    )
+
+math_eval_cfg = dict(
+    evaluator=evaluator, pred_postprocessor=pred_postprocessor,
+)
+
+math_datasets = [
+    dict(
+        type=MATHDataset,
+        abbr='math',
+        path='opencompass/math',
+        reader_cfg=math_reader_cfg,
+        infer_cfg=math_infer_cfg,
+        eval_cfg=math_eval_cfg,
+    )
+]
diff --git a/opencompass/configs/datasets/gaokao_math/gaokao_math_gen_9b076f.py b/opencompass/configs/datasets/gaokao_math/gaokao_math_gen_9b076f.py
@@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GaoKaoMATHDataset, GaoKaoMATHEvaluator
+
+
+MATH_CN_PROMPT="""
+你是一个数学阅卷专家，任务是从给定的回答句子中提取精确的关键答案。你必须只提供提取的关键答案，不包括任何额外的文字。
+—
+我将为你提供一个问题、回答句子和问题类型。回答句子是对所提供问题的回应。利用提供的信息，你必须准确而精确地确定并从回答句子中提取预期的关键答案。请不要对问题发表主观看法。
+
+对于单选题，答案应该是选项字母，例如 "A"；
+对于多选题，答案应该是一个选项字母的列表，例如 "A" 或 "A", "B", "C"；
+对于填空题，答案应该是一个填入空白处的答案列表，列表的数量应该与问题中的空白数量相同，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+对于问答题，类似填空题，为每个小问抽出相应答案，例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
+
+如果回答句子提供了多个不同的答案，请仔细判断后面提供的答案是否是对前面答案的修正或修改。如果是这样，提取这个修正或修改后的答案作为最终答案。相反，如果回答句子在多个答案之间波动而没有明确的最终答案，你应该输出 [No valid answer]。
+—
+问题类型: {question_type}
+原始问题: {question}
+回答: {response}
+提取的关键答案:
+"""
+
+gaokao_math_reader_cfg = dict(input_columns=['question', 'response', 'question_type'], output_column='extract_answer')
+
+
+gaokao_math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role='HUMAN', prompt=MATH_CN_PROMPT),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+gaokao_math_eval_cfg = dict(
+    evaluator=dict(type=GaoKaoMATHEvaluator, url='http://0.0.0.0:23333/v1'))
+
+gaokao_math_datasets = [
+    dict(
+        type=GaoKaoMATHDataset,
+        abbr='GaoKaoMATH',
+        path='./data/gaokao_math/test_2k.json',
+        reader_cfg=gaokao_math_reader_cfg,
+        infer_cfg=gaokao_math_infer_cfg,
+        eval_cfg=gaokao_math_eval_cfg)
+]
diff --git a/opencompass/configs/datasets/math/math_0shot_llm_judge_gen_393424.py b/opencompass/configs/datasets/math/math_0shot_llm_judge_gen_393424.py
@@ -0,0 +1,78 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2, GaoKaoMATHEvaluator
+from opencompass.utils.model_postprocessors import naive_model_postprocess, xfinder_postprocess
+from opencompass.utils.postprocessors.naive import MATH_NAVIE_PROMPT_TEMPLATE
+
+# ----------------------------- Eval Parameters -----------------------------
+## Postprocess function
+post_func = 're' # 're', 'xfinder_model', 'naive_model'
+
+## Evalute function
+eval_func = 'naive_model' # 're', 'naive_model'
+
+## Model api url
+xfinder_url = 'http://0.0.0.0:23333/v1' # for 'xFinder-qwen1505' if post_func is 'xfinder_model'
+naive_model_name = 'Qwen/Qwen2.5-72B-Instruct' # replace with your model name
+naive_model_url = ['http://22.8.6.22:23333/v1', 'http://22.8.67.84:23333/v1', 'http://22.8.72.81:23333/v1', 'http://22.9.42.143:23333/v1'] # Multi-apis for accerlation
+
+# ----------------------------- Detailed Config -----------------------------
+
+math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
+
+math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='{problem}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=1024),
+)
+
+if post_func == 're':
+    pred_postprocessor = dict(type=math_postprocess_v2)
+elif post_func == 'xfinder_model':
+    pred_postprocessor = dict(
+        type=xfinder_postprocess,
+        question_type='math',
+        model_name='xFinder-qwen1505',
+        num_processes=128,
+        api_url=xfinder_url,
+    )
+elif post_func == 'naive_model':
+    pred_postprocessor = dict(
+        type=naive_model_postprocess,
+        custom_instruction=MATH_NAVIE_PROMPT_TEMPLATE,
+        model_name=naive_model_name,
+        num_processes=64,
+        api_url=naive_model_url,
+    )
+
+if eval_func == 're':
+    evaluator = dict(type=MATHEvaluator, version='v2')
+elif eval_func == 'naive_model':
+    evaluator = dict(
+        type=GaoKaoMATHEvaluator,
+        model_name=naive_model_name,
+        url=naive_model_url,
+    )
+
+math_eval_cfg = dict(
+    evaluator=evaluator, pred_postprocessor=pred_postprocessor,
+)
+
+math_datasets = [
+    dict(
+        type=MATHDataset,
+        abbr='math',
+        path='opencompass/math',
+        reader_cfg=math_reader_cfg,
+        infer_cfg=math_infer_cfg,
+        eval_cfg=math_eval_cfg,
+    )
+]
diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py
@@ -44,6 +44,7 @@
 from .flames import *  # noqa: F401, F403
 from .flores import *  # noqa: F401, F403
 from .game24 import *  # noqa: F401, F403
+from .gaokao_math import *  # noqa: F401, F403
 from .GaokaoBench import *  # noqa: F401, F403
 from .govrepcrs import *  # noqa: F401, F403
 from .gpqa import *  # noqa: F401, F403