C20301201
unknown
plain_text
2 years ago
10 kB
0
Indexable
Never
import tkinter as tk from tkinter import filedialog import sys CodonTable = { 'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', 'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R', 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L', 'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*', 'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W', } def FileRead(FileName): #open the file in reading text mode using error checking #use a for loop to read each line, remove the EOL and concatenate this to the #existing sequence #initalise Data a empty string and list to return 2 parameters Data = "" FileContents = [] try: Fp1 = open(FileName,'r') DesLine = Fp1.readlines(1) # read the descriptor line # Read the reamin of the file using a read command Data = Fp1.read() except IOError: print("error unable to read file or file does not exist!!!") print("Exiting the program") stop = input() Fp1.close() sys.exit(1) #add the contents to a list FileContents.append(DesLine) FileContents.append(Data) #return the File contents list return FileContents #**************************** end of the File Read Function ***************************************** def Transcribe(DnaSeq): # print DNA sequences before the split. #print ("DNASequence: ", DnaSeq) print # split into list with '\n' as the deliminator ListSeq = DnaSeq.split('\n') # join this liat with "no spaces" between each element of the list DnaSequence = ('').join(ListSeq) return DnaSequence #************************** codon translation ****************************** #************************** Sequence translation ****************************** def Translate(DnaSequence, RFNumber): # declare an empty list an an empty string AminoAcidList = [] AminoAcidSeq = '' #print the DNA seq for the reading frame # extract sequence print("\n************************************* reading frame number {:d} *******************************".format(RFNumber+1)) DnaSequenceRF = DnaSequence[RFNumber:len(DnaSequence)] print("the DNA seq is: \n") print(DnaSequenceRF) for n in range(RFNumber,len(DnaSequence),3): codon = DnaSequence[n:n+3] # extract a codon # translate and add to amino acid string if codon in CodonTable: AminoAcid = CodonTable[codon] # translates the codon into an amino acid AminoAcidSeq += AminoAcid #print(CodonTable[codon]) # display output (just to confirm it is running correctly #return the translated sequence for a reading frame return AminoAcidSeq def Write(DesLine, AminoAcidSeq_1, ORFList): FileName2 = input("\nEnter the name of the file for writing (AminoAcid_Sequence): ") # #code to open and close a window root = tk.Tk() root.wm_withdraw() # this completely hides the root window # use windows explorer to input the file name FileName2 = filedialog.asksaveasfilename(filetypes = [('All Files','*.*')]) root.destroy() #open the file in reading text mode using error checking try: Fp2 = open(FileName2,'a') Fp2.writelines(DesLine) Fp2.writelines("\n") Fp2.write(AminoAcidSeq_1) Fp2.write(ORFList) print("data successfully written to the file") print("closing file...") Fp2.close() except IOError: print >> sys.stderr, "File could not be open: " print("error unable to create or write to file {:s}".format(FileName2)) print("Exiting the program") Fp2.close() stop = input() sys.exit(1) #******************************** end of the amino acid write function ****************************************** """ The Compliment Function a program to get the compliment of a DNA strand A replaced by T, T replace by A, G replaced by C and C replace by G it takes a DNA sequence as a parameter it returns the compl;iment of this DNA sequence """ def Compliment(DnaSeq): ComplimentSeq = '' #use a for loop and convert for index in range(0,len(DnaSeq)): if DnaSeq[index] == 'T': ComplimentSeq +='A' #concatenate A to strand if DnaSeq[index] == 'A': ComplimentSeq +='T' #concatenate A to strand if DnaSeq[index] == 'C': ComplimentSeq +='G' #concatenate A to strand if DnaSeq[index] == 'G': ComplimentSeq +='C' #concatenate A to strand #reverse the completment strand and return to calling method return ComplimentSeq[::-1] #************************************************************************* #******************************finding open reading frames******************** def FindORF(AminoAcidSeq_1): DesLine = "" ORF = "" ORFList = [] index1 = 0 # if there is a start amino acid add to ORF print("\n************************* The list of ORF ***********************************************\n") index = 0 while index < len(AminoAcidSeq_1): if AminoAcidSeq_1[index] == 'M': #found start start = index #print("\nfound M at position {:d}".format(index+1)) ORFList.append(start+1) while index != len(AminoAcidSeq_1) and AminoAcidSeq_1[index] != '*': ORF += AminoAcidSeq_1[index] index += 1 #add amino acid (*) to strand #print only of Index < length of sequence if index < len(AminoAcidSeq_1) or AminoAcidSeq_1[index -1] == '*': end = index ORFList.append(end+1) ORFList.append(end-start) ORFList.append(ORF) #print ORF data (start, end, length and sequence) print("\nthe ORF start is: {:d}; the ORF end is: {:d}; the lenth is: {:d}\n".format(ORFList[0], ORFList[1], ORFList[2])) print(ORFList[3]) # reset the ORF associated variables ORF = "" ORFList = [] index = index + 1 return ORFList #******************************* the driver or main function ******************************************************** def main(): FileContents = [] # prompt the user to enter the name of the file. print("this program will read the contents of a DNA fasts file and translate them to amino acids") # get file name using windows explorer root = tk.Tk() root.wm_withdraw() # this completely hides the root window # use windows explorer to input the file name FileName = filedialog.askopenfilename(filetypes = [('All files','*.*')]) root.destroy() FileContents = FileRead(FileName) #Display the contents of the list print("the contents of {:s} are: \n".format(FileName)) print(FileContents[0]) #print(FileContents[1]) DesLine = FileContents[0] DnaSeq = FileContents[1] # the Descriptor line print(" the descriptor line is: \n") print(DesLine) # pass DNA sequence to a function to remove all carriage returns DnaSequence = Transcribe(DnaSeq) print("the contiguous DNA sequence is:") print(DnaSequence) print("\n************************* All the amino acids of the PRIMARY STRAND ********************************************\n") for RFNumber in range(0, 3): AminoAcidSeq_1 = Translate(DnaSeq, RFNumber) print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1)) print(AminoAcidSeq_1) print("the DNA sequence from right to left: ") print(DnaSeq[::-1]) print("\nthe amino acid seq from right to left: ") print(AminoAcidSeq_1[::-1]) #get the compliment of DNA seq ComplimentSeq = Compliment(DnaSeq) print("\n************************** the Reverse compliment sequence 5' to 3' ***************************\n") print(ComplimentSeq) print("\n************************* All the amino acids of the Compliments STRAND ********************************************\n") AminoAcidList= [] for RFNumber in range(0, 3): AminoAcidSeq_1 = Translate(ComplimentSeq, RFNumber) print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1)) print(AminoAcidSeq_1) # Adding the amino acid sequences to a list AminoAcidList.append(AminoAcidSeq_1) AminoAcidSeq = FileRead(FileName) #call ORFAll function it returns all potential ORF in an AA strand ORFList = FindORF(AminoAcidSeq_1) for i in range(0,3): FindORF(AminoAcidList[i]) ORF = "" ORFList = [] stop = input("Press enter to exit program....") main() """****************** test plan ******************************** run the program and ensure the output is as expected: codon 1 is ATG.... """ #**************** execute program **************************