Untitled
unknown
python
2 years ago
2.1 kB
4
Indexable
import csv import re import string # Define function to count repeated punctuation in a comment def count_repeated_punctuation(comment): # Find all occurrences of repeated punctuation using a regular expression matches = re.findall(r'([{}])\1+'.format(re.escape(string.punctuation)), comment) # Count the number of unique matches return len(set(matches)) # Open TSV file and read rows with open('short_coms_sample.tsv', 'r', newline='') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') next(tsv_reader) # skip header row # initialize counters for each language question_infavor = {'en': 0, 'fr': 0, 'es': 0} question_against = {'en': 0, 'fr': 0, 'es': 0} excl_infavor = {'en': 0, 'fr': 0, 'es': 0} excl_against = {'en': 0, 'fr': 0, 'es': 0} for row in tsv_reader: # Extract relevant columns alignment = row[2] comment = row[8] language = row[6] # get the language from the row # Count repeated punctuation in comment punctuation_count = count_repeated_punctuation(comment) # Update counts based on alignment, language, and punctuation count if '?' in comment : if alignment == 'In favor': question_infavor[language] += 1 elif alignment == 'Against': question_against[language] += 1 if '!' in comment : if alignment == 'In favor': excl_infavor[language] += 1 elif alignment == 'Against': excl_against[language] += 1 # Print results for each language for lang in ['en', 'fr', 'es']: print(f'Nombre de commentaires POUR contentant "?" en {lang}: {question_infavor[lang]}') print(f'Nombre de commentaires CONTRE contentant "?" en {lang}: {question_against[lang]}') print(f'Nombre de commentaires POUR contentant "!" en {lang}: {excl_infavor[lang]}') print(f'Nombre de commentaires CONTRE contentant "!" en {lang}: {excl_against[lang]}')
Editor is loading...