|
| 1 | +import argparse |
| 2 | +import mindspore |
| 3 | +from mindspore.nn import AdamWeightDecay |
| 4 | +from squad_dataset import get_squad_dataset |
| 5 | +from mindnlp.peft import LoraConfig, get_peft_model |
| 6 | +from mindnlp.transformers import ( |
| 7 | + AutoTokenizer, |
| 8 | + AutoModelForQuestionAnswering, |
| 9 | +) |
| 10 | + |
| 11 | +mindspore.set_context(device_target="CPU") |
| 12 | + |
| 13 | + |
| 14 | +def main(args): |
| 15 | + tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path) |
| 16 | + model = AutoModelForQuestionAnswering.from_pretrained( |
| 17 | + args.model_name_or_path) |
| 18 | + |
| 19 | + ds = get_squad_dataset(tokenizer, args.batch_size) |
| 20 | + peft_config = LoraConfig( |
| 21 | + lora_alpha=args.lora_alpha, |
| 22 | + lora_dropout=args.lora_dropout, |
| 23 | + r=args.lora_r, |
| 24 | + bias='none', |
| 25 | + task_type="QUESTION_ANSWER", |
| 26 | + target_modules=args.lora_target_modules.split(","), |
| 27 | + ) |
| 28 | + model = get_peft_model(model=model, peft_config=peft_config) |
| 29 | + # model.print_trainable_parameters() |
| 30 | + |
| 31 | + optimizer = AdamWeightDecay( |
| 32 | + params=model.trainable_params(), learning_rate=args.lr) |
| 33 | + |
| 34 | + def forward_fn(input_ids, token_type_ids, attention_mask, start_positions, end_positions): |
| 35 | + output = model( |
| 36 | + input_ids=input_ids, |
| 37 | + attention_mask=attention_mask, |
| 38 | + token_type_ids=token_type_ids, |
| 39 | + start_positions=start_positions, |
| 40 | + end_positions=end_positions |
| 41 | + ) |
| 42 | + return output.loss |
| 43 | + |
| 44 | + grad_fn = mindspore.value_and_grad( |
| 45 | + forward_fn, None, optimizer.parameters, has_aux=False |
| 46 | + ) |
| 47 | + |
| 48 | + total_loss, total_step = 0, 0 |
| 49 | + for _, (input_ids, token_type_ids, attention_mask, start_positions, end_positions) in enumerate(ds): |
| 50 | + (loss), grad = grad_fn(input_ids, token_type_ids, |
| 51 | + attention_mask, start_positions, end_positions) |
| 52 | + optimizer(grad) |
| 53 | + total_loss += loss.asnumpy() |
| 54 | + total_step += 1 |
| 55 | + curr_loss = total_loss / total_step |
| 56 | + print({"train-loss": f"{curr_loss:.2f}"}) |
| 57 | + |
| 58 | + model.save_pretrained(save_directory=args.model_save_dir) |
| 59 | + |
| 60 | + |
| 61 | +if __name__ == "__main__": |
| 62 | + |
| 63 | + parser = argparse.ArgumentParser() |
| 64 | + parser.add_argument("--batch_size", default=4, type=int, |
| 65 | + help="Batch size per GPU/CPU for training.") |
| 66 | + parser.add_argument("--model_name_or_path", default="YituTech/conv-bert-base", |
| 67 | + type=str, help="YituTech/conv-bert-base") |
| 68 | + parser.add_argument("--num_epochs", default=5, type=int) |
| 69 | + parser.add_argument("--lr", default=1e-4, type=float, |
| 70 | + help="Set 2e-5 for full-finetuning.") |
| 71 | + parser.add_argument("--max_seq_len", default=256, type=int) |
| 72 | + parser.add_argument("--lora_r", type=int, default=32) |
| 73 | + parser.add_argument("--lora_alpha", type=int, default=64) |
| 74 | + parser.add_argument("--lora_dropout", type=float, default=0) |
| 75 | + parser.add_argument("--lora_target_modules", type=str, |
| 76 | + default="query, key, value,conv_out_layer, conv_kernel_layer, dense") |
| 77 | + parser.add_argument("--model_save_dir", type=str, |
| 78 | + default="convbert_lora_peft") |
| 79 | + args = parser.parse_args() |
| 80 | + main(args) |
0 commit comments