Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support MMMLU & MMMLU-lite Benchmark #1565

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions configs/summarizers/groups/mmmlu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
categories = ['mmlu_AR-XY','mmlu_BN-BD','mmlu_DE-DE','mmlu_ES-LA','mmlu_FR-FR','mmlu_HI-IN','mmlu_ID-ID','mmlu_IT-IT','mmlu_JA-JP','mmlu_KO-KR','mmlu_PT-BR','mmlu_SW-KE','mmlu_YO-NG','mmlu_ZH-CN']

mmmlu_summary_groups = [
{'name': 'mmmlu', 'subsets': [f'openai_m{c}' for c in categories]},
]
25 changes: 25 additions & 0 deletions configs/summarizers/mmmlu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from mmengine.config import read_base

with read_base():
from .groups.mmmlu import mmmlu_summary_groups

summarizer = dict(
dataset_abbrs=[
'openai_mmmlu_AR-XY',
'openai_mmmlu_BN-BD',
'openai_mmmlu_DE-DE',
'openai_mmmlu_ES-LA',
'openai_mmmlu_FR-FR',
'openai_mmmlu_HI-IN',
'openai_mmmlu_ID-ID',
'openai_mmmlu_IT-IT',
'openai_mmmlu_JA-JP',
'openai_mmmlu_KO-KR',
'openai_mmmlu_PT-BR',
'openai_mmmlu_SW-KE',
'openai_mmmlu_YO-NG',
'openai_mmmlu_ZH-CN',
'mmmlu',
],
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
)
26 changes: 26 additions & 0 deletions configs/summarizers/mmmlu_lite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
categories = ['mmlu_lite_AR-XY','mmlu_lite_BN-BD','mmlu_lite_DE-DE','mmlu_lite_ES-LA','mmlu_lite_FR-FR','mmlu_lite_HI-IN','mmlu_lite_ID-ID','mmlu_lite_IT-IT','mmlu_lite_JA-JP','mmlu_lite_KO-KR','mmlu_lite_PT-BR','mmlu_lite_SW-KE','mmlu_lite_YO-NG','mmlu_lite_ZH-CN']

mmmlu_summary_groups = [
{'name': 'mmmlu_lite', 'subsets': [f'openai_m{c}' for c in categories]},
]

summarizer = dict(
dataset_abbrs=[
'openai_mmmlu_lite_AR-XY',
'openai_mmmlu_lite_BN-BD',
'openai_mmmlu_lite_DE-DE',
'openai_mmmlu_lite_ES-LA',
'openai_mmmlu_lite_FR-FR',
'openai_mmmlu_lite_HI-IN',
'openai_mmmlu_lite_ID-ID',
'openai_mmmlu_lite_IT-IT',
'openai_mmmlu_lite_JA-JP',
'openai_mmmlu_lite_KO-KR',
'openai_mmmlu_lite_PT-BR',
'openai_mmmlu_lite_SW-KE',
'openai_mmmlu_lite_YO-NG',
'openai_mmmlu_lite_ZH-CN',
'mmmlu_lite'
],
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
)
35 changes: 35 additions & 0 deletions opencompass/configs/datasets/mmmlu/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# MMMLU
## Dataset Description
Multilingual Massive Multitask Language Understanding (MMMLU)
The MMLU is a widely recognized benchmark of general knowledge attained by AI models. It covers a broad range of topics from 57 different categories, covering elementary-level knowledge up to advanced professional subjects like law, physics, history, and computer science.

We translated the MMLU’s test set into 14 languages using professional human translators. Relying on human translators for this evaluation increases confidence in the accuracy of the translations, especially for low-resource languages like Yoruba. We are publishing the professional human translations and the code we use to run the evaluations.

This effort reflects our commitment to improving the multilingual capabilities of AI models, ensuring they perform accurately across languages, particularly for underrepresented communities. By prioritizing high-quality translations, we aim to make AI technology more inclusive and effective for users worldwide.
MMMLU contains the MMLU test set translated into the following locales:

- AR_XY (Arabic)
- BN_BD (Bengali)
- DE_DE (German)
- ES_LA (Spanish)
- FR_FR (French)
- HI_IN (Hindi)
- ID_ID (Indonesian)
- IT_IT (Italian)
- JA_JP (Japanese)
- KO_KR (Korean)
- PT_BR (Brazilian Portuguese)
- SW_KE (Swahili)
- YO_NG (Yoruba)
- ZH_CH (Simplied Chinese)


## How to Use
Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU)

```python
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "default")
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "by_language")
```
138 changes: 138 additions & 0 deletions opencompass/configs/datasets/mmmlu/mmmlu_5_shot_gen_bcbeb3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
from opencompass.datasets import MMMLUDataset
from opencompass.utils.text_postprocessors import first_option_postprocess
from mmengine.config import read_base

with read_base():
from .mmmlu_prompt import (get_few_shot_prompts_ar,
get_few_shot_prompts_bn,
get_few_shot_prompts_de,
get_few_shot_prompts_es,
get_few_shot_prompts_fr,
get_few_shot_prompts_hi,
get_few_shot_prompts_id,
get_few_shot_prompts_it,
get_few_shot_prompts_ja,
get_few_shot_prompts_ko,
get_few_shot_prompts_pt,
get_few_shot_prompts_zh,
get_few_shot_prompts_sw,
get_few_shot_prompts_yo)

mmmlu_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D','subject'],
output_column='target',
train_split='test')

mmmlu_all_sets = [
'mmlu_AR-XY',
'mmlu_BN-BD',
'mmlu_DE-DE',
'mmlu_ES-LA',
'mmlu_FR-FR',
'mmlu_HI-IN',
'mmlu_ID-ID',
'mmlu_IT-IT',
'mmlu_JA-JP',
'mmlu_KO-KR',
'mmlu_PT-BR',
'mmlu_SW-KE',
'mmlu_YO-NG',
'mmlu_ZH-CN',
]

mmmlu_datasets = []
for _name in mmmlu_all_sets:
if 'AR' in _name:
_hint = f'هناك سؤال اختيار واحد. أجب عن السؤال بالرد على A أو B أو C أو D, يرجى استخدام واحدة من الرموز A، B، C، أو D لتمثيل خيارات الإجابة في ردك'
_prompt = f'يتعلق بـ {{subject}} \nالسؤال: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nالإجابة:'
_round = get_few_shot_prompts_ar(_hint, _prompt)
elif 'BN' in _name:
_hint = f'এটি একটি একক পছন্দের প্রশ্ন। এ, বি, সি বা ডি উত্তর দিয়ে প্রশ্নের উত্তর দিন।, আপনার উত্তরে ইংরেজি বর্ণ A, B, C এবং D এর মধ্যে একটি ব্যবহার করুন'
_prompt = f'এটি {{subject}} সম্পর্কে \nপ্রশ্ন: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nউত্তর:'
_round = get_few_shot_prompts_bn(_hint, _prompt)
elif 'DE' in _name:
_hint = f'Es gibt eine Einzelwahlfrage. Beantworte die Frage, indem du A, B, C oder D antwortest.'
_prompt = f'Es geht um {{subject}} \nFrage: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAntwort:'
_round = get_few_shot_prompts_de(_hint, _prompt)
elif 'ES' in _name:
_hint = f'Hay una pregunta de elección única. Responde a la pregunta respondiendo A, B, C o D.'
_prompt = f'Se trata de {{subject}} \nPregunta: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRespuesta:'
_round = get_few_shot_prompts_es(_hint, _prompt)
elif 'FR' in _name:
_hint = f'Il y a une question à choix unique. Répondez à la question en répondant A, B, C ou D.'
_prompt = f'''C'est à propos de {{subject}} \nQuestion : {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRéponse :'''
_round = get_few_shot_prompts_fr(_hint, _prompt)
elif 'HI' in _name:
_hint = f'यह एक एकल विकल्प प्रश्न है। प्रश्न का उत्तर A, B, C या D में से कोई भी उत्तर देकर दें।'
_prompt = f'यह {{subject}} के बारे में है \nप्रश्न: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nउत्तर:'
_round = get_few_shot_prompts_hi(_hint, _prompt)
elif 'ID' in _name:
_hint = f'Ada pertanyaan pilihan tunggal. Jawablah pertanyaan dengan menjawab A, B, C, atau D.'
_prompt = f'Ini tentang {{subject}} \nPertanyaan: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nJawaban:'
_round = get_few_shot_prompts_id(_hint, _prompt)
elif 'IT' in _name:
_hint = f'Ci sono domande a scelta singola. Rispondi alla domanda rispondendo A, B, C o D.'
_prompt = f'Si tratta di {{subject}} \nDomanda: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRisposta:'
_round = get_few_shot_prompts_it(_hint, _prompt)
elif 'JA' in _name:
_hint = f'単一選択肢の質問があります。この質問にはA、B、C、またはDで答えてください。'
_prompt = f'これは {{subject}} に関することです \n質問: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n回答:'
_round = get_few_shot_prompts_ja(_hint, _prompt)
elif 'KO' in _name:
_hint = f'단일 선택 질문이 있습니다. A, B, C 또는 D로 답변해 주세요.'
_prompt = f'이것은 {{subject}}에 관한 것입니다 \n질문: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n답변:'
_round = get_few_shot_prompts_ko(_hint, _prompt)
elif 'PT' in _name:
_hint = f'Há uma pergunta de escolha única. Responda à pergunta escolhendo A, B, C ou D.'
_prompt = f'É sobre {{subject}} \nPergunta: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nResposta:'
_round = get_few_shot_prompts_pt(_hint, _prompt)
elif 'ZH' in _name:
_hint = f'这里有一个单项选择题。请通过选择 A、B、C 或 D 来回答该问题。'
_prompt = f'这是关于 {{subject}} 的内容\n问题:{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案:'
_round = get_few_shot_prompts_zh(_hint, _prompt)
elif 'SW' in _name:
_hint = f'Kuna swali moja la chaguo. Jibu swali kwa kujibu A, B, C au D.'
_prompt = f'Hii ni kuhusu {{subject}}.\nSwali: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nJibu:'
_round = get_few_shot_prompts_sw(_hint, _prompt)
elif 'YO' in _name:
_hint = f'Ibeere kan wa ti o ni yiyan kan. Fesi si ibeere naa nipa fesi A, B, C tabi D.'
_prompt = f'Eyi jẹ nipa {{subject}}.\nIbeere: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nFesi:'
_round = get_few_shot_prompts_yo(_hint, _prompt)
else:
_hint = f'There is a single choice question. Answer the question by replying A, B, C or D.'
_prompt = f'it is about {{subject}} \nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer:'
_round = f'{_hint}\n{_prompt}\n'+"Please answer only with option A, B, C or D. \nAnswer:"
mmmlu_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin='</E>',
round=_round
),
ice_token='</E>',
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

mmmlu_eval_cfg = dict(
evaluator=dict(type=AccwithDetailsEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))

mmmlu_datasets.append(
dict(
abbr=f'openai_m{_name}',
type=MMMLUDataset,
path='openai/MMMLU',
name=_name,
reader_cfg=mmmlu_reader_cfg,
infer_cfg=mmmlu_infer_cfg,
eval_cfg=mmmlu_eval_cfg,
))

del _name, _hint, _prompt, _round
4 changes: 4 additions & 0 deletions opencompass/configs/datasets/mmmlu/mmmlu_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .mmmlu_5_shot_gen_b31abe import mmmlu_datasets # noqa: F401, F403
105 changes: 105 additions & 0 deletions opencompass/configs/datasets/mmmlu/mmmlu_gen_c51a84.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
from opencompass.datasets import MMMLUDataset
from opencompass.utils.text_postprocessors import first_option_postprocess


mmmlu_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D','subject'],
output_column='target',
train_split='test')

mmmlu_all_sets = [
'mmlu_AR-XY',
'mmlu_BN-BD',
'mmlu_DE-DE',
'mmlu_ES-LA',
'mmlu_FR-FR',
'mmlu_HI-IN',
'mmlu_ID-ID',
'mmlu_IT-IT',
'mmlu_JA-JP',
'mmlu_KO-KR',
'mmlu_PT-BR',
'mmlu_SW-KE',
'mmlu_YO-NG',
'mmlu_ZH-CN',
]

mmmlu_datasets = []
for _name in mmmlu_all_sets:
if 'AR' in _name:
_hint = f'هناك سؤال اختيار واحد. أجب عن السؤال بالرد على A أو B أو C أو D, يرجى استخدام واحدة من الرموز A، B، C، أو D لتمثيل خيارات الإجابة في ردك'
_prompt = f'يتعلق بـ {{subject}} \nالسؤال: {{input}}\nأ. {{A}}\nب. {{B}}\nج. {{C}}\nد. {{D}}\nالإجابة:'
elif 'BN' in _name:
_hint = f'এটি একটি একক পছন্দের প্রশ্ন। এ, বি, সি বা ডি উত্তর দিয়ে প্রশ্নের উত্তর দিন।, আপনার উত্তরে ইংরেজি বর্ণ A, B, C এবং D এর মধ্যে একটি ব্যবহার করুন'
_prompt = f'এটি {{subject}} এর সম্পর্কে \nপ্রশ্ন: {{input}}\nএ. {{A}}\nবি. {{B}}\nসি. {{C}}\nডি. {{D}}\nউত্তর:'
elif 'DE' in _name:
_hint = f'Es gibt eine Einzelwahlfrage. Beantworte die Frage, indem du A, B, C oder D antwortest.'
_prompt = f'Es geht um {{subject}} \nFrage: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAntwort:'
elif 'ES' in _name:
_hint = f'Hay una pregunta de elección única. Responde a la pregunta respondiendo A, B, C o D.'
_prompt = f'Se trata de {{subject}} \nPregunta: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRespuesta:'
elif 'FR' in _name:
_hint = f'Il y a une question à choix unique. Répondez à la question en répondant A, B, C ou D.'
_prompt = f'''C'est à propos de {{subject}} \nQuestion : {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRéponse :'''
elif 'HI' in _name:
_hint = f'यह एक एकल विकल्प प्रश्न है। प्रश्न का उत्तर A, B, C या D में से कोई भी उत्तर देकर दें।'
_prompt = f'यह {{subject}} के बारे में है \nप्रश्न: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nउत्तर:'
elif 'ID' in _name:
_hint = f'Ada pertanyaan pilihan tunggal. Jawablah pertanyaan dengan menjawab A, B, C, atau D.'
_prompt = f'Ini tentang {{subject}} \nPertanyaan: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nJawaban:'
elif 'IT' in _name:
_hint = f'Ci sono domande a scelta singola. Rispondi alla domanda rispondendo A, B, C o D.'
_prompt = f'Si tratta di {{subject}} \nDomanda: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nRisposta:'
elif 'JA' in _name:
_hint = f'単一選択肢の質問があります。この質問にはA、B、C、またはDで答えてください。'
_prompt = f'これは {{subject}} に関することです \n質問: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n回答:'
elif 'KO' in _name:
_hint = f'단일 선택 질문이 있습니다. A, B, C 또는 D로 답변해 주세요.'
_prompt = f'이것은 {{subject}}에 관한 것입니다 \n질문: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n답변:'
elif 'PT' in _name:
_hint = f'Há uma pergunta de escolha única. Responda à pergunta escolhendo A, B, C ou D.'
_prompt = f'É sobre {{subject}} \nPergunta: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nResposta:'
elif 'ZH' in _name:
_hint = f'这里有一个单项选择题。请通过选择 A、B、C 或 D 来回答该问题。'
_prompt = f'这是关于 {{subject}} 的内容\n问题:{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案:'
else:
_hint = f'There is a single choice question. Answer the question by replying A, B, C or D.'
_prompt = f'it is about {{subject}} \nQuestion: {{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer:'
mmmlu_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt=f'{_hint}\n {_prompt}'
),
],
),
ice_token='</E>',
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

mmmlu_eval_cfg = dict(
evaluator=dict(type=AccwithDetailsEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))

mmmlu_datasets.append(
dict(
abbr=f'openai_m{_name}',
type=MMMLUDataset,
path='openai/MMMLU',
name=_name,
reader_cfg=mmmlu_reader_cfg,
infer_cfg=mmmlu_infer_cfg,
eval_cfg=mmmlu_eval_cfg,
))

del _name, _hint, _prompt
Loading
Loading