LM Fine-tuning (BERT-based)
unknown
python
4 years ago
1.5 kB
5
Indexable
import transformers
print(transformers.__version__) # 4.5.1 olmalı
!pip install datasets
%%%%%%%%%%
import transformers
import pandas as pd
from transformers import AutoTokenizer, AutoModelForMaskedLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, AutoModel
import import_ipynb
from datasets import load_dataset
from os import listdir
from os.path import isfile, join
model_checkpoint = "roberta-large"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
model.cuda()
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
import datasets
from datasets import load_from_disk
my_path = "dataset/batches_cleaned/stance/dataset"
lm_dataset = load_from_disk(my_path)
transformers.logging.set_verbosity_info()
training_args = TrainingArguments(
output_dir="./models/ROBERTA/",
evaluation_strategy='epoch',
save_steps = 10000,
num_train_epochs = 2,
learning_rate = 2e-5,
logging_steps = 10000,
disable_tqdm = True,
weight_decay = 0.1,
adam_beta2 = 0.98,
adam_epsilon = 1e-6,
warmup_ratio = 0.1,
gradient_accumulation_steps = 2)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=lm_dataset['train'],
eval_dataset=lm_dataset['test'],
data_collator=data_collator)
print(f"Started training")
trainer.train("./models/ROBERTA/checkpoint-30000")
eval_results = trainer.evaluate()
print(f"Perplexity: {np.exp(eval_results['eval_loss']):.2f}")Editor is loading...