Untitled
unknown
python
2 years ago
1.6 kB
8
Indexable
from Bio import SeqIO
import re
def cercapatterns(input_filename, patterns, output_filename):
results = {}
with open(input_filename, 'r') as file:
for record in SeqIO.parse(file, 'fasta'):
trovati=set()
results[record.description] = []
for pattern in patterns:
matches = [(m.start(), m.group()) for m in re.finditer(pattern, str(record.seq))]
for match in matches:
if match not in trovati:
trovati.add(match)
else:
matches.remove(match)
if matches:
results[record.description].append((len(record.seq), pattern, matches))
with open(output_filename, 'w') as outfile:
for gene, values in results.items():
outfile.write(f"Risultati per il gene: {gene}\n")
for length, pattern, matches in values:
outfile.write(f"Trovato/i {len(matches)} occorrenze del motivo {pattern}\n")
for position, match_str in matches:
match_str = match_str[:1].lower() + match_str[1:-1] + match_str[-1:].lower()
outfile.write(f"Trovato il motivo {match_str} alla posizione {position}\n")
outfile.write("\n")
patterns = ['.AGT.{3,11}ACT.', '.TAGT.{3,11}ATC.', '.AG.{3,11}ACT.', '.AGT.{3,11}CT.' ]
cercapatterns(input_filename, patterns, output_filename)
Editor is loading...
Leave a Comment