Untitled
unknown
python
3 years ago
2.1 kB
5
Indexable
import csv
import re
import string
# Define function to count repeated punctuation in a comment
def count_repeated_punctuation(comment):
# Find all occurrences of repeated punctuation using a regular expression
matches = re.findall(r'([{}])\1+'.format(re.escape(string.punctuation)), comment)
# Count the number of unique matches
return len(set(matches))
# Open TSV file and read rows
with open('short_coms_sample.tsv', 'r', newline='') as tsv_file:
tsv_reader = csv.reader(tsv_file, delimiter='\t')
next(tsv_reader) # skip header row
# initialize counters for each language
question_infavor = {'en': 0, 'fr': 0, 'es': 0}
question_against = {'en': 0, 'fr': 0, 'es': 0}
excl_infavor = {'en': 0, 'fr': 0, 'es': 0}
excl_against = {'en': 0, 'fr': 0, 'es': 0}
for row in tsv_reader:
# Extract relevant columns
alignment = row[2]
comment = row[8]
language = row[6] # get the language from the row
# Count repeated punctuation in comment
punctuation_count = count_repeated_punctuation(comment)
# Update counts based on alignment, language, and punctuation count
if '?' in comment :
if alignment == 'In favor':
question_infavor[language] += 1
elif alignment == 'Against':
question_against[language] += 1
if '!' in comment :
if alignment == 'In favor':
excl_infavor[language] += 1
elif alignment == 'Against':
excl_against[language] += 1
# Print results for each language
for lang in ['en', 'fr', 'es']:
print(f'Nombre de commentaires POUR contentant "?" en {lang}: {question_infavor[lang]}')
print(f'Nombre de commentaires CONTRE contentant "?" en {lang}: {question_against[lang]}')
print(f'Nombre de commentaires POUR contentant "!" en {lang}: {excl_infavor[lang]}')
print(f'Nombre de commentaires CONTRE contentant "!" en {lang}: {excl_against[lang]}')Editor is loading...