LM Fine-tuning (BERT-based)
unknown
python
3 years ago
1.5 kB
4
Indexable
import transformers print(transformers.__version__) # 4.5.1 olmalı !pip install datasets %%%%%%%%%% import transformers import pandas as pd from transformers import AutoTokenizer, AutoModelForMaskedLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, AutoModel import import_ipynb from datasets import load_dataset from os import listdir from os.path import isfile, join model_checkpoint = "roberta-large" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True) model = AutoModelForMaskedLM.from_pretrained(model_checkpoint) model.cuda() data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15) import datasets from datasets import load_from_disk my_path = "dataset/batches_cleaned/stance/dataset" lm_dataset = load_from_disk(my_path) transformers.logging.set_verbosity_info() training_args = TrainingArguments( output_dir="./models/ROBERTA/", evaluation_strategy='epoch', save_steps = 10000, num_train_epochs = 2, learning_rate = 2e-5, logging_steps = 10000, disable_tqdm = True, weight_decay = 0.1, adam_beta2 = 0.98, adam_epsilon = 1e-6, warmup_ratio = 0.1, gradient_accumulation_steps = 2) trainer = Trainer( model=model, args=training_args, train_dataset=lm_dataset['train'], eval_dataset=lm_dataset['test'], data_collator=data_collator) print(f"Started training") trainer.train("./models/ROBERTA/checkpoint-30000") eval_results = trainer.evaluate() print(f"Perplexity: {np.exp(eval_results['eval_loss']):.2f}")
Editor is loading...