Sentiment Analyzer

mail@pastecode.io avatar
unknown
python
2 months ago
3.8 kB
5
Indexable
Never
import csv
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.font_manager as font_manager
nltk.download('punkt')



def load_lexicon(filename):
    lexicon = {}
    with open(filename, mode='r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            lexicon[row['Words'].lower()] = int(row['Score'])
    return lexicon

def preprocess_text(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read().lower()
    sentences = sent_tokenize(text)
    tokenized_sentences = [word_tokenize(sentence) for sentence in sentences]
    return tokenized_sentences

def calculate_custom_sentiment(tokenized_sentences, lexicon, negation_words):
    sentiment_scores = []
    for sentence in tokenized_sentences:
        sentence_score = 0
        negation_detected = False  # Flag to detect negation in the sentence
        for word in sentence:
            if word in negation_words:
                negation_detected = True
                continue  # Move to the next word after detecting negation
            
            # Apply negation to the next word and reset negation flag
            word_score = lexicon.get(word, 0)
            if negation_detected:
                word_score *= -1  # Invert the score due to negation
                negation_detected = False  # Reset flag after applying negation
            
            sentence_score += word_score
        
        sentiment_scores.append(sentence_score)
    return sentiment_scores



def plot_sentiment_over_time(sentiment_scores):
    x_values = np.arange(len(sentiment_scores))
    y_values = sentiment_scores
    
    # Assign colors: Dark green for positive, dark red for negative, gray for neutral
    colors = ['#006400' if score > 0 else '#8B0000' if score < 0 else '#808080' for score in sentiment_scores]
    
    # Plotting
    plt.figure(figsize=(10, 6))
    plt.scatter(x_values, y_values, color=colors, edgecolors='black', linewidth=0.5, alpha=0.7, s=40)
    plt.axhline(0, color='grey', lw=0.5)  # Neutral sentiment reference line
    
    plt.title('Sentiment Graph')
    plt.xlabel('Sentence Index')
    plt.ylabel('Sentiment Score')
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.tight_layout()
    plt.show()


negation_words = [
    "none", "no", "nah", "never", "not", 
    "nought", "naught", "ne", "nother", 
    "neither", "nor", "nay"
]



if __name__ == "__main__":
    lexicon_file = 'Old English Sentiment Lexicon - Sheet1.csv'  # Update with the actual path
    text_file = 'hello.txt'  # Update with the actual path
    negation_words = [
        "none", "no", "nah", "never", "not", 
        "nought", "naught", "ne", "nother", 
        "neither", "nor", "nay"
    ]
    
    lexicon = load_lexicon(lexicon_file)
    tokenized_sentences = preprocess_text(text_file)
    sentiment_scores = calculate_custom_sentiment(tokenized_sentences, lexicon, negation_words)
    plot_sentiment_over_time(sentiment_scores)
    
    # Continue with counting and printing sentiment analysis results...

    
    
# Count positive, negative, and neutral sentiments
positive_count = sum(1 for score in sentiment_scores if score > 0)
negative_count = sum(1 for score in sentiment_scores if score < 0)
neutral_count = sum(1 for score in sentiment_scores if score == 0)

# Determine overall sentiment
overall_sentiment = "Positive" if positive_count > negative_count else "Negative" if negative_count > positive_count else "Neutral"

# Print sentiment analysis results
print(f"Positive Sentiments: {positive_count}")
print(f"Negative Sentiments: {negative_count}")
print(f"Neutral Sentiments: {neutral_count}")
print(f"Overall Sentiment: {overall_sentiment}")
Leave a Comment