|
20 | 20 | https://ieeexplore.ieee.org/document/11206960 |
21 | 21 | """ |
22 | 22 |
|
| 23 | +import json |
23 | 24 | import string |
24 | 25 | from functools import partial |
25 | 26 |
|
@@ -188,27 +189,51 @@ def __init__(self, name, hf_subset): |
188 | 189 |
|
189 | 190 | def kyrgyz_hellaswag_prompt(line: dict, task_name: str = None) -> Doc: |
190 | 191 | """ |
191 | | - Creates a prompt for HellaSwag tasks in Kyrgyz. |
| 192 | + Hellaswag-style multiple-choice prompt. |
| 193 | +
|
| 194 | + The Kyrgyz dataset provides: |
| 195 | + - ctx_a_kg, ctx_b_kg: context pieces |
| 196 | + - activity_label_kg: short description |
| 197 | + - endings_kg: list of 4 full candidate endings (strings) |
| 198 | + - label: correct ending index in [0, 3] |
192 | 199 | """ |
| 200 | + import ast |
| 201 | + |
193 | 202 | ctx_a_kg = line["ctx_a_kg"] if line["ctx_a_kg"] else "." |
194 | 203 | ctx_b_kg = line["ctx_b_kg"].capitalize() if line["ctx_b_kg"] else "." |
195 | 204 |
|
196 | | - instruction = ( |
| 205 | + endings_kg = line.get("endings_kg") |
| 206 | + |
| 207 | + if isinstance(endings_kg, str): |
| 208 | + try: |
| 209 | + # Try JSON first |
| 210 | + endings_kg = json.loads(endings_kg) |
| 211 | + except Exception: |
| 212 | + try: |
| 213 | + endings_kg = ast.literal_eval(endings_kg) |
| 214 | + except Exception: |
| 215 | + endings_kg = [endings_kg] |
| 216 | + |
| 217 | + if not isinstance(endings_kg, list): |
| 218 | + endings_kg = [str(endings_kg)] |
| 219 | + |
| 220 | + endings_kg = endings_kg[:4] |
| 221 | + |
| 222 | + query = ( |
197 | 223 | "Төмөндө жалпы түшүнүккө (common sense) байланыштуу бир нече тандоо суроолору (жооптору менен) берилген.\n\n" |
198 | 224 | ) |
| 225 | + query += f"Суроо: {line['activity_label_kg']}: {ctx_a_kg} {ctx_b_kg}\n" |
| 226 | + query += "".join([f"{letter}. {choice}\n" for letter, choice in zip(LETTER_INDICES, endings_kg)]) |
| 227 | + query += "Туура жоопту тандаңыз: " |
199 | 228 |
|
200 | | - query = f"{instruction}Суроо: {line['activity_label_kg']}: {ctx_a_kg} {ctx_b_kg}\n" |
201 | | - query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, line["endings_kg"])]) |
202 | | - query += "Туура жоопту тандаңыз:" |
203 | | - |
204 | | - gold_ix = int(line["label"]) if line["label"] != "" else -1 |
| 229 | + gold_ix = int(line["label"]) if line.get("label", "") != "" else -1 |
205 | 230 |
|
206 | 231 | return Doc( |
207 | 232 | task_name=task_name, |
208 | 233 | query=query, |
209 | | - choices=[" " + i for i in LETTER_INDICES[: len(line["endings_kg"])]], |
| 234 | + choices=[f" {letter}" for letter in LETTER_INDICES[: len(endings_kg)]], |
210 | 235 | gold_index=gold_ix, |
211 | | - instruction=instruction, |
| 236 | + instruction="Төмөндө жалпы түшүнүккө (common sense) байланыштуу бир нече тандоо суроолору (жооптору менен) берилген.\n\n", |
212 | 237 | ) |
213 | 238 |
|
214 | 239 |
|
|
0 commit comments