Untitled

mail@pastecode.io avatar
unknown
python
a year ago
2.1 kB
1
Indexable
Never
import csv
import re
import string

# Define function to count repeated punctuation in a comment
def count_repeated_punctuation(comment):
    # Find all occurrences of repeated punctuation using a regular expression
    matches = re.findall(r'([{}])\1+'.format(re.escape(string.punctuation)), comment)

    # Count the number of unique matches
    return len(set(matches))

# Open TSV file and read rows
with open('short_coms_sample.tsv', 'r', newline='') as tsv_file:
    tsv_reader = csv.reader(tsv_file, delimiter='\t')
    next(tsv_reader) # skip header row
    
    # initialize counters for each language
    question_infavor = {'en': 0, 'fr': 0, 'es': 0}
    question_against = {'en': 0, 'fr': 0, 'es': 0}
    excl_infavor = {'en': 0, 'fr': 0, 'es': 0}
    excl_against = {'en': 0, 'fr': 0, 'es': 0}

    for row in tsv_reader:
        # Extract relevant columns
        alignment = row[2]
        comment = row[8]
        language = row[6] # get the language from the row

        # Count repeated punctuation in comment
        punctuation_count = count_repeated_punctuation(comment)

        # Update counts based on alignment, language, and punctuation count
        if '?' in comment :
            if alignment == 'In favor':
                question_infavor[language] += 1
            elif alignment == 'Against':
                question_against[language] += 1
        if '!' in comment :
            if alignment == 'In favor':
                excl_infavor[language] += 1
            elif alignment == 'Against':
                excl_against[language] += 1

    # Print results for each language
    for lang in ['en', 'fr', 'es']:
        print(f'Nombre de commentaires POUR contentant "?" en {lang}: {question_infavor[lang]}')
        print(f'Nombre de commentaires CONTRE contentant "?" en {lang}: {question_against[lang]}')
        print(f'Nombre de commentaires POUR contentant "!" en {lang}: {excl_infavor[lang]}')
        print(f'Nombre de commentaires CONTRE contentant "!" en {lang}: {excl_against[lang]}')