Untitled
unknown
plain_text
2 years ago
12 kB
5
Indexable
Never
# -*- coding: utf-8 -*- """ Created on Tue May 24 18:16:48 2022 @author: missc """ # The PrintCodon.py implemented using methods/ functions # this program takes a DNA sequences and prints all the codons (substring of size 3) import tkinter as tk from tkinter import filedialog import sys # DNA <-> AA translation table: CodonTable # global declaration: it is constant table/directory CodonTable = { 'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', 'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R', 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L', 'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*', 'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W', } def FileRead(FileName): """ this function will take the file name it will put the first line of the file in the DesLine it will then add each line less the '\n' to exisitng DNA sequence this will result in a sequence without the 'n' and it is returnedto the calling function. """ #open the file in reading text mode using error checking #use a for loop to read each line, remove the EOL and concatenate this to the #existing sequence #initalise Data a empty string and list to return 2 parameters Data = "" FileContents = [] try: Fp1 = open(FileName,'r') DesLine = Fp1.readlines(1) # read the descriptor line # Read the reamin of the file using a read command Data = Fp1.read() except IOError: print("error unable to read file or file does not exist!!!") print("Exiting the program") stop = input() Fp1.close() sys.exit(1) #add the contents to a list FileContents.append(DesLine) FileContents.append(Data) #return the File contents list return FileContents #**************************** end of the File Read Function ***************************************** #DnaSequence = "ACTGAtATAGATAGCGCGCTAGCTACGATCGATCGATGCTAGCTAGCTGCGATCGATTATCGTAGTGTTTACTCCGTGTAGCTAGTCGTATTTAGATGATAGTAGATCGATGCATGCTAGTAGTTATGCGTGCGCATGCTGCATGCTGGCATCGAGTCTCGCATCGGCATCCTG" def Transcribe(DnaSeq): # print DNA sequences before the split. print ("DNASequence: ", DnaSeq) print (DnaSeq) # split into list with '\n' as the deliminator ListSeq = DnaSeq.split('\n') # print entire list #print (" of the list after splitting is: ", ListSeq) # join this liat with "no spaces" between each element of the list DnaSequence = ('').join(ListSeq) #print the joined sequence #print("DNA sequence without the carriage return is: \n",DnaSequence) return DnaSequence #************************** codon translation ****************************** #************************** Sequence translation ****************************** """ this function will convert a DNA strand into amino acid strand using the Codon table it will extract 3 characters from a sequence and convert one at a time concating them to the amino acid string: it takes a string and a RF number as parameters it returns the translated amino acid string string """ def Translate(DnaSequence, RFNumber): # declare an empty list an an empty string AminoAcidList = [] AminoAcidSeq = '' #print the DNA seq for the reading frame # extract sequence print("\n************************************* reading frame number {:d} *******************************".format(RFNumber+1)) DnaSequenceRF = DnaSequence[RFNumber:len(DnaSequence)] print("the DNA seq is: \n") print(DnaSequenceRF) # use a while loop to continu to ask the user to input a chodon until a stop (TAG) codon is entered # error prevention code if the iput is not in the table for n in range(RFNumber,len(DnaSequence),3): codon = DnaSequence[n:n+3] # extract a codon # translate and add to amino acid string if codon in CodonTable: AminoAcid = CodonTable[codon] # translates the codon into an amino acid AminoAcidSeq += AminoAcid #print(CodonTable[codon]) # display output (just to confirm it is running correctly #return the translated sequence for a reading frame return AminoAcidSeq def Write(Des_Seq, AminoAcidSeq): ''' inser code to: write the descritor line and the amino acid sequence to the file: AminoAcid_Sequence.fasta error prevention code, in case the file cannot be created ''' # input the name of the file FileName = input("\nEnter the name of the file for writing (AminoAcid_Sequence): ") # #code to open and close a window root = tk.Tk() root.wm_withdraw() # this completely hides the root window # use windows explorer to input the file name FileName2 = filedialog.asksaveasfilename(filetypes = [('All Files','*.*')]) root.destroy() #open the file in reading text mode using error checking try: Fp1 = open(FileName,'a') Fp1.writelines(Des_Seq) Fp1.writelines("\n") Fp1.write(AminoAcidSeq) print("data successfully written to the file") print("closing file...") Fp1.close() except IOError: print >> sys.stderr, "File could not be open: " print("error unable to create or write to file {:s}".format(FileName)) print("Exiting the program") Fp1.close() stop = input() sys.exit(1) #******************************** end of the amino acid write function ****************************************** #************************************************************************ """ The Compliment Function a program to get the compliment of a DNA strand A replaced by T, T replace by A, G replaced by C and C replace by G it takes a DNA sequence as a parameter it returns the compl;iment of this DNA sequence """ def Compliment(DnaSeq): ComplimentSeq = '' #use a for loop and convert for index in range(0,len(DnaSeq)): if DnaSeq[index] == 'T': ComplimentSeq +='A' #concatenate A to strand if DnaSeq[index] == 'A': ComplimentSeq +='T' #concatenate A to strand if DnaSeq[index] == 'C': ComplimentSeq +='G' #concatenate A to strand if DnaSeq[index] == 'G': ComplimentSeq +='C' #concatenate A to strand #reverse the completment strand and return to calling method return ComplimentSeq[::-1] #************************************************************************* #******************************finding open reading frames******************** def FindORF(AminoAcidSeq): DesLine = "" ORF = "" ORFList = [] index1 =0 # if there is a start amino acid add to ORF print("\n **********************The List of ORF *****************\n") index = 0 while index < len(AminoAcidSeq) : if AminoAcidSeq.index("M"): # found start index1 = index start = index print ("\n found M at position {:d}".format(index+1)) ORFList.append(start + 1) while index < (len(AminoAcidSeq)) and AminoAcidSeq[index] != '*': ORF += AminoAcidSeq[index] index += 1 ORF += AminoAcidSeq[index] # print only of Index < length of sequence if index < len(AminoAcidSeq) or AminoAcidSeq[index-1] == '*': end = index ORFList.append(end+1) ORFList.append(end-start) ORFList.append(ORF) #print ORF data (start, end, length and sequence) print("\n The ORF start is: {:d}; the ORF end is: {:d}; the length is {:d}\n".format(ORFList[0], ORFList[1], ORFList[2])) print(ORFList[3]) break #reset the ORF associated variables # ORF = "" # ORFList = [] index = index + 1 print(ORFList) #******************************* the driver or main function ******************************************************** def main(): FileContents = [] # prompt the user to enter the name of the file. print("this program will read the contents of a DNA fasts file and translate them to amino acids") # get file name using windows explorer root = tk.Tk() root.wm_withdraw() # this completely hides the root window # use windows explorer to input the file name FileName = filedialog.askopenfilename(filetypes = [('All files','*.*')]) root.destroy() #FileName = input("enter the name of the fasta file: ") # call the FileRead function; #the DesLine is passed by reference!!! FileContents = FileRead(FileName) #Display the contents of the list print("the contents of {:s} are: \n".format(FileName)) print(FileContents[0]) #print(FileContents[1]) DesLine = FileContents[0] DnaSeq = FileContents[1] # the Descriptor line print(" the descriptor line is: \n") print(DesLine) # pass DNA sequence to a function to remove all carriage returns DnaSequence = Transcribe(DnaSeq) print("the contiguous DNA sequence is:") print(DnaSequence) # prompt the user for the file and open the file for reading... # refer to Exercise2.py (iteration) # calling the Print method/ function passing to it the string DnaSeq # we are also passing the RF number (note RF 1 has number 0 print("\n************************* All the amino acids of the PRIMARY STRAND ********************************************\n") for RFNumber in range(0, 3): AminoAcidSeq_1 = Translate(DnaSeq, RFNumber) print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1)) print(AminoAcidSeq_1) print("the DNA sequence from right to left: ") print(DnaSeq[::-1]) print("\nthe amino acid seq from right to left: ") print(AminoAcidSeq_1[::-1]) #get the compliment of DNA seq ComplimentSeq = Compliment(DnaSeq) print("\n************************** the Reverse compliment sequence 5' to 3' ***************************\n") print(ComplimentSeq) # calling the Print method/ function passing to it the string DnaSeq # we are also passing the RF number (note RF 1 has number 0 print("\n************************* All the amino acids of the Compliments STRAND ********************************************\n") for RFNumber in range(0, 3): AminoAcidSeq_1 = Translate(ComplimentSeq, RFNumber) print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1)) print(AminoAcidSeq_1) # call the write amino acid sequence to file function # Write(DesLine, AASequence_1) AminoAcidSeq = FileRead(FileName) #call ORFAll function it returns all potential ORF in an AA strand FindORF(AminoAcidSeq_1) stop = input("Press enter to exit program....") main() """****************** test plan ******************************** run the program and ensure the output is as expected: codon 1 is ATG.... """ #**************** execute program **************************