Untitled
unknown
python
8 months ago
1.5 kB
5
Indexable
Never
from Bio import SeqIO import re def cercapatterns(input_filename, patterns, output_filename): results = {} with open(input_filename, 'r') as file: for record in SeqIO.parse(file, 'fasta'): trovati=set() results[record.description] = [] for pattern in patterns: matches = [(m.start(), m.group()) for m in re.finditer(pattern, str(record.seq))] for match in matches: if match not in trovati: trovati.add(match) else: matches.remove(match) if matches: results[record.description].append((len(record.seq), pattern, matches)) with open(output_filename, 'w') as outfile: for gene, values in results.items(): outfile.write(f"Risultati per il gene: {gene}\n") for length, pattern, matches in values: outfile.write(f"Trovato/i {len(matches)} occorrenze del motivo {pattern}\n") for position, match_str in matches: match_str = match_str[:1].lower() + match_str[1:-1] + match_str[-1:].lower() outfile.write(f"Trovato il motivo {match_str} alla posizione {position}\n") outfile.write("\n") patterns = ['.AGT.{3,11}ACT.', '.TAGT.{3,11}ATC.', '.AG.{3,11}ACT.', '.AGT.{3,11}CT.' ] cercapatterns(input_filename, patterns, output_filename)
Leave a Comment