Untitled

# Install necessary libraries
!pip install transformers
!pip install tqdm
!pip install gdown


# Download the CSV file from Google Drive
!gdown '199uvBPgChEd0Eql5CQ0bf15NKHSJZC6y' -O 'InterbrandTextOnly.csv'


# Import libraries
import pandas as pd
from tqdm.notebook import tqdm
from transformers import pipeline, AutoTokenizer, TFAutoModelForSequenceClassification

# Load the dataset
dataset = pd.read_csv('InterbrandTextOnly.csv', encoding='ISO-8859-1')

# Initialize the tokenizer and model for ESG-BERT
tokenizer = AutoTokenizer.from_pretrained('nbroad/ESG-BERT')
model = TFAutoModelForSequenceClassification.from_pretrained('nbroad/ESG-BERT')

# Setup the classification pipeline
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

# Perform classification on the 'Text' column with a progress bar
predictions = []
for text in tqdm(dataset['Text'], desc="Classifying"):
    predictions.append(classifier(text, truncation=True, padding=True, max_length=512))

# Add predictions to the dataset
dataset['Prediction'] = [prediction[0]['label'] for prediction in predictions]
dataset['Score'] = [prediction[0]['score'] for prediction in predictions]

# Save the predictions to a new CSV file in the current working directory
output_file_path = 'InterbrandTextOnly_with_Predictions.csv'
dataset.to_csv(output_file_path, index=False)

print(f"Classification complete. Results saved to {output_file_path}")
Editor is loading...