33Kyrgyz Evals
44
55dataset:
6- TTimur/kyrgyzMMLU, TTimur/kyrgyzRC, TTimur/hellaswag_kg,
6+ TTimur/kyrgyzMMLU, TTimur/kyrgyzRC, TTimur/hellaswag_kg,
77TTimur/winogrande_kg, TTimur/truthfulqa_kg, TTimur/gsm8k_kg, TTimur/boolq_kg
88
99abstract:
2020https://ieeexplore.ieee.org/document/11206960
2121"""
2222
23+ import string
2324from functools import partial
2425
25- from lighteval .metrics .dynamic_metrics import LogLikelihoodAccMetric
2626from lighteval .metrics .metrics import Metrics
2727from lighteval .metrics .normalizations import LogProbCharNorm , LogProbTokenNorm
2828from lighteval .tasks .lighteval_task import LightevalTaskConfig
2929from lighteval .tasks .requests import Doc
3030
3131
32-
33- import string
3432LETTER_INDICES = string .ascii_uppercase
3533
3634# ============================================
@@ -57,29 +55,23 @@ def kyrgyz_mmlu_prompt(line: dict, task_name: str = None) -> Doc:
5755 """
5856 question = line ["Суроо (KG)" ]
5957 correct_answer = str (line ["Туура жооп" ])
60-
61- choices = [line [' А (KG)' ], line [' Б (KG)' ], line [' В (KG)' ], line [' Г (KG)' ], line [' Д (KG)' ]]
58+
59+ choices = [line [" А (KG)" ], line [" Б (KG)" ], line [" В (KG)" ], line [" Г (KG)" ], line [" Д (KG)" ]]
6260 choices = [c .strip () for c in choices if c ]
63-
64- letter_to_index = {
65- 'а' : 0 ,
66- 'б' : 1 ,
67- 'в' : 2 ,
68- 'г' : 3 ,
69- 'д' : 4
70- }
61+
62+ letter_to_index = {"а" : 0 , "б" : 1 , "в" : 2 , "г" : 3 , "д" : 4 }
7163 gold_index = letter_to_index .get (correct_answer .lower (), 0 )
72-
64+
7365 instruction = "Сиз билимиңизге жана жөндөмүңүзгө жараша суроолорго жооп берген AIсыз. Сизге суроо жана 2-5 жооп варианты берилет, туура жооптун НОМЕРИН (индексин) гана кайтарышыңыз керек.\n \n "
74-
66+
7567 query = f"{ instruction } Суроо: { question } \n \n Сунушталган жооптор:\n "
76-
68+
7769 for i , choice in enumerate (choices ):
7870 if choice :
7971 query += f"{ i } . { choice } \n "
80-
72+
8173 query += "\n \n Туура жоопту тандаңыз:"
82-
74+
8375 return Doc (
8476 task_name = task_name ,
8577 query = query ,
@@ -110,10 +102,7 @@ def __init__(self, name, hf_subset):
110102 )
111103
112104
113- MMLU_TASKS = [
114- CustomKyrgyzMMLUTask (name = f"kyrgyz_evals:{ subset } " , hf_subset = subset )
115- for subset in MMLU_SUBSETS
116- ]
105+ MMLU_TASKS = [CustomKyrgyzMMLUTask (name = f"kyrgyz_evals:{ subset } " , hf_subset = subset ) for subset in MMLU_SUBSETS ]
117106
118107
119108# ============================================
@@ -133,31 +122,31 @@ def kyrgyz_rc_prompt(line: dict, task_name: str = None) -> Doc:
133122 """
134123 Creates a prompt for Reading Comprehension tasks in Kyrgyz.
135124 """
136- text = line [' Текст (KG)' ]
125+ text = line [" Текст (KG)" ]
137126 question = line ["Суроо (KG)" ]
138127 correct_answer = str (line ["Туура жооп" ])
139-
140- choices = [line [' А (KG)' ], line [' Б (KG)' ], line [' В (KG)' ], line [' Г (KG)' ]]
128+
129+ choices = [line [" А (KG)" ], line [" Б (KG)" ], line [" В (KG)" ], line [" Г (KG)" ]]
141130 choices = [c .strip () for c in choices if c ]
142-
131+
143132 letter_to_index = {
144- 'а' : 0 ,
145- 'б' : 1 ,
146- 'в' : 2 ,
147- 'г' : 3 ,
133+ "а" : 0 ,
134+ "б" : 1 ,
135+ "в" : 2 ,
136+ "г" : 3 ,
148137 }
149138 gold_index = letter_to_index .get (correct_answer .lower (), 0 )
150-
139+
151140 instruction = "Сизге бир темага байланыштуу бир нече үзүндү текст берилген. Бардык үзүндүлөрдү кунт коюп окуп, андан кийин төмөндөгү суроолорго жооп бериңиздер. Суроо менен 2-4 жооп варианты берилет, туура жооптун НОМЕРИН (индексин) гана кайтарышыңыз керек.\n \n "
152-
141+
153142 query = f"{ instruction } Текст: { text } \n \n Суроо: { question } \n \n Сунушталган жооптор:\n "
154-
143+
155144 for i , choice in enumerate (choices ):
156145 if choice :
157146 query += f"{ i } . { choice } \n "
158-
147+
159148 query += "\n \n Туура жоопту тандаңыз:"
160-
149+
161150 return Doc (
162151 task_name = task_name ,
163152 query = query ,
@@ -189,31 +178,31 @@ def __init__(self, name, hf_subset):
189178 )
190179
191180
192- RC_TASKS = [
193- CustomKyrgyzRCTask (name = f"kyrgyz_evals:{ subset } " , hf_subset = subset )
194- for subset in RC_SUBSETS
195- ]
181+ RC_TASKS = [CustomKyrgyzRCTask (name = f"kyrgyz_evals:{ subset } " , hf_subset = subset ) for subset in RC_SUBSETS ]
196182
197183
198184# ============================================
199185# ====== HELLASWAG TASK ======================
200186# ============================================
201187
188+
202189def kyrgyz_hellaswag_prompt (line : dict , task_name : str = None ) -> Doc :
203190 """
204191 Creates a prompt for HellaSwag tasks in Kyrgyz.
205192 """
206- ctx_a_kg = line ['ctx_a_kg' ] if line ['ctx_a_kg' ] else '.'
207- ctx_b_kg = line ['ctx_b_kg' ].capitalize () if line ['ctx_b_kg' ] else '.'
208-
209- instruction = "Төмөндө жалпы түшүнүккө (common sense) байланыштуу бир нече тандоо суроолору (жооптору менен) берилген.\n \n "
210-
193+ ctx_a_kg = line ["ctx_a_kg" ] if line ["ctx_a_kg" ] else "."
194+ ctx_b_kg = line ["ctx_b_kg" ].capitalize () if line ["ctx_b_kg" ] else "."
195+
196+ instruction = (
197+ "Төмөндө жалпы түшүнүккө (common sense) байланыштуу бир нече тандоо суроолору (жооптору менен) берилген.\n \n "
198+ )
199+
211200 query = f"{ instruction } Суроо: { line ['activity_label_kg' ]} : { ctx_a_kg } { ctx_b_kg } \n "
212201 query += "" .join ([f"{ key } . { choice } \n " for key , choice in zip (LETTER_INDICES , line ["endings_kg" ])])
213202 query += "Туура жоопту тандаңыз:"
214-
203+
215204 gold_ix = int (line ["label" ]) if line ["label" ] != "" else - 1
216-
205+
217206 return Doc (
218207 task_name = task_name ,
219208 query = query ,
@@ -246,13 +235,14 @@ def kyrgyz_hellaswag_prompt(line: dict, task_name: str = None) -> Doc:
246235# ====== WINOGRANDE TASK =====================
247236# ============================================
248237
238+
249239def kyrgyz_winogrande_prompt (line : dict , task_name : str = None ) -> Doc :
250240 """
251241 Creates a prompt for Winogrande tasks in Kyrgyz.
252242 """
253243 query , end_of_target = line ["sentence_kg" ].split ("_" )
254244 end_of_target = end_of_target .strip ()
255-
245+
256246 return Doc (
257247 task_name = task_name ,
258248 query = query ,
@@ -282,34 +272,35 @@ def kyrgyz_winogrande_prompt(line: dict, task_name: str = None) -> Doc:
282272# ====== TRUTHFULQA TASK =====================
283273# ============================================
284274
275+
285276def kyrgyz_truthful_qa_prompt (line : dict , task_name : str = None ) -> Doc :
286277 """
287278 Creates a prompt for TruthfulQA tasks in Kyrgyz.
288279 """
289280 import ast
290-
281+
291282 mc1 = line .get ("mc1_targets_kg" , "{}" )
292283 mc2 = line .get ("mc2_targets_kg" , "{}" )
293-
284+
294285 if isinstance (mc1 , str ):
295286 try :
296287 mc1 = ast .literal_eval (mc1 )
297288 except (ValueError , SyntaxError ):
298289 mc1 = {"choices" : [], "labels" : []}
299290 else :
300291 mc1 = {"choices" : [], "labels" : []}
301-
292+
302293 if isinstance (mc2 , str ):
303294 try :
304295 mc2 = ast .literal_eval (mc2 )
305296 except (ValueError , SyntaxError ):
306297 mc2 = {"choices" : [], "labels" : []}
307298 else :
308299 mc2 = {"choices" : [], "labels" : []}
309-
300+
310301 choices = [f" { c } " for c in mc1 .get ("choices" , [])] + [f" { c } " for c in mc2 .get ("choices" , [])]
311302 labels = mc1 .get ("labels" , []) + mc2 .get ("labels" , [])
312-
303+
313304 return Doc (
314305 task_name = task_name ,
315306 query = f"Суроо: { line ['Question_kg' ]} \n Жооп:" ,
@@ -339,6 +330,7 @@ def kyrgyz_truthful_qa_prompt(line: dict, task_name: str = None) -> Doc:
339330# ====== GSM8K TASK ==========================
340331# ============================================
341332
333+
342334def kyrgyz_gsm8k_prompt (line : dict , task_name : str = None ) -> Doc :
343335 """
344336 Creates a prompt for GSM8K tasks in Kyrgyz.
@@ -374,12 +366,13 @@ def kyrgyz_gsm8k_prompt(line: dict, task_name: str = None) -> Doc:
374366# ====== BOOLQ TASK ==========================
375367# ============================================
376368
369+
377370def kyrgyz_boolq_prompt (line : dict , task_name : str = None ) -> Doc :
378371 """
379372 Creates a prompt for BoolQ tasks in Kyrgyz.
380373 """
381374 question = line ["question_kg" ][:- 1 ] if line ["question_kg" ][- 2 :] == "??" else line ["question_kg" ]
382-
375+
383376 return Doc (
384377 task_name = task_name ,
385378 query = f"Текст: { line ['passage_kg' ]} \n Суроо: { question } \n Жооп:" ,
@@ -411,13 +404,13 @@ def kyrgyz_boolq_prompt(line: dict, task_name: str = None) -> Doc:
411404# ============================================
412405
413406TASKS_TABLE = (
414- MMLU_TASKS +
415- RC_TASKS +
416- [
407+ MMLU_TASKS
408+ + RC_TASKS
409+ + [
417410 HELLASWAG_TASK ,
418411 WINOGRANDE_TASK ,
419412 TRUTHFULQA_TASK ,
420413 GSM8K_TASK ,
421414 BOOLQ_TASK ,
422415 ]
423- )
416+ )
0 commit comments