Untitled

mail@pastecode.io avatar
unknown
python
8 months ago
1.5 kB
5
Indexable
Never
from Bio import SeqIO
import re

def cercapatterns(input_filename, patterns, output_filename):
    results = {}
    with open(input_filename, 'r') as file:
        for record in SeqIO.parse(file, 'fasta'):
            trovati=set()
            results[record.description] = []
            for pattern in patterns:
                matches = [(m.start(), m.group()) for m in re.finditer(pattern, str(record.seq))]
                for match in matches:
                    if match not in trovati:
                        trovati.add(match)
                    else:
                        matches.remove(match)
                if matches:
                    results[record.description].append((len(record.seq), pattern, matches))
    with open(output_filename, 'w') as outfile:
        for gene, values in results.items():
            outfile.write(f"Risultati per il gene: {gene}\n")
            for length, pattern, matches in values:
                outfile.write(f"Trovato/i {len(matches)} occorrenze del motivo {pattern}\n")
                for position, match_str in matches:
                    match_str = match_str[:1].lower() + match_str[1:-1] + match_str[-1:].lower()
                    outfile.write(f"Trovato il motivo {match_str} alla posizione {position}\n")
                outfile.write("\n")

patterns = ['.AGT.{3,11}ACT.', '.TAGT.{3,11}ATC.', '.AG.{3,11}ACT.', '.AGT.{3,11}CT.' ]
cercapatterns(input_filename, patterns, output_filename)
Leave a Comment