import tkinter as tk
from tkinter import filedialog
import sys
CodonTable = {
'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
}
def FileRead(FileName):
#open the file in reading text mode using error checking
#use a for loop to read each line, remove the EOL and concatenate this to the
#existing sequence
#initalise Data a empty string and list to return 2 parameters
Data = ""
FileContents = []
try:
Fp1 = open(FileName,'r')
DesLine = Fp1.readlines(1) # read the descriptor line
# Read the reamin of the file using a read command
Data = Fp1.read()
except IOError:
print("error unable to read file or file does not exist!!!")
print("Exiting the program")
stop = input()
Fp1.close()
sys.exit(1)
#add the contents to a list
FileContents.append(DesLine)
FileContents.append(Data)
#return the File contents list
return FileContents
#**************************** end of the File Read Function *****************************************
def Transcribe(DnaSeq):
# print DNA sequences before the split.
#print ("DNASequence: ", DnaSeq)
print
# split into list with '\n' as the deliminator
ListSeq = DnaSeq.split('\n')
# join this liat with "no spaces" between each element of the list
DnaSequence = ('').join(ListSeq)
return DnaSequence
#************************** codon translation ******************************
#************************** Sequence translation ******************************
def Translate(DnaSequence, RFNumber):
# declare an empty list an an empty string
AminoAcidList = []
AminoAcidSeq = ''
#print the DNA seq for the reading frame
# extract sequence
print("\n************************************* reading frame number {:d} *******************************".format(RFNumber+1))
DnaSequenceRF = DnaSequence[RFNumber:len(DnaSequence)]
print("the DNA seq is: \n")
print(DnaSequenceRF)
for n in range(RFNumber,len(DnaSequence),3):
codon = DnaSequence[n:n+3] # extract a codon
# translate and add to amino acid string
if codon in CodonTable:
AminoAcid = CodonTable[codon] # translates the codon into an amino acid
AminoAcidSeq += AminoAcid
#print(CodonTable[codon]) # display output (just to confirm it is running correctly
#return the translated sequence for a reading frame
return AminoAcidSeq
def Write(DesLine, AminoAcidSeq_1, ORFList):
FileName2 = input("\nEnter the name of the file for writing (AminoAcid_Sequence): ")
#
#code to open and close a window
root = tk.Tk()
root.wm_withdraw() # this completely hides the root window
# use windows explorer to input the file name
FileName2 = filedialog.asksaveasfilename(filetypes = [('All Files','*.*')])
root.destroy()
#open the file in reading text mode using error checking
try:
Fp2 = open(FileName2,'a')
Fp2.writelines(DesLine)
Fp2.writelines("\n")
Fp2.write(AminoAcidSeq_1)
Fp2.write(ORFList)
print("data successfully written to the file")
print("closing file...")
Fp2.close()
except IOError:
print >> sys.stderr, "File could not be open: "
print("error unable to create or write to file {:s}".format(FileName2))
print("Exiting the program")
Fp2.close()
stop = input()
sys.exit(1)
#******************************** end of the amino acid write function ******************************************
""" The Compliment Function
a program to get the compliment of a DNA strand
A replaced by T, T replace by A, G replaced by C and C replace by G
it takes a DNA sequence as a parameter
it returns the compl;iment of this DNA sequence
"""
def Compliment(DnaSeq):
ComplimentSeq = ''
#use a for loop and convert
for index in range(0,len(DnaSeq)):
if DnaSeq[index] == 'T':
ComplimentSeq +='A' #concatenate A to strand
if DnaSeq[index] == 'A':
ComplimentSeq +='T' #concatenate A to strand
if DnaSeq[index] == 'C':
ComplimentSeq +='G' #concatenate A to strand
if DnaSeq[index] == 'G':
ComplimentSeq +='C' #concatenate A to strand
#reverse the completment strand and return to calling method
return ComplimentSeq[::-1]
#*************************************************************************
#******************************finding open reading frames********************
def FindORF(AminoAcidSeq_1):
DesLine = ""
ORF = ""
ORFList = []
index1 = 0
# if there is a start amino acid add to ORF
print("\n************************* The list of ORF ***********************************************\n")
index = 0
while index < len(AminoAcidSeq_1):
if AminoAcidSeq_1[index] == 'M': #found start
start = index
#print("\nfound M at position {:d}".format(index+1))
ORFList.append(start+1)
while index != len(AminoAcidSeq_1) and AminoAcidSeq_1[index] != '*':
ORF += AminoAcidSeq_1[index]
index += 1
#add amino acid (*) to strand
#print only of Index < length of sequence
if index < len(AminoAcidSeq_1) or AminoAcidSeq_1[index -1] == '*':
end = index
ORFList.append(end+1)
ORFList.append(end-start)
ORFList.append(ORF)
#print ORF data (start, end, length and sequence)
print("\nthe ORF start is: {:d}; the ORF end is: {:d}; the lenth is: {:d}\n".format(ORFList[0], ORFList[1], ORFList[2]))
print(ORFList[3])
# reset the ORF associated variables
ORF = ""
ORFList = []
index = index + 1
return ORFList
#******************************* the driver or main function ********************************************************
def main():
FileContents = []
# prompt the user to enter the name of the file.
print("this program will read the contents of a DNA fasts file and translate them to amino acids")
# get file name using windows explorer
root = tk.Tk()
root.wm_withdraw() # this completely hides the root window
# use windows explorer to input the file name
FileName = filedialog.askopenfilename(filetypes = [('All files','*.*')])
root.destroy()
FileContents = FileRead(FileName)
#Display the contents of the list
print("the contents of {:s} are: \n".format(FileName))
print(FileContents[0])
#print(FileContents[1])
DesLine = FileContents[0]
DnaSeq = FileContents[1]
# the Descriptor line
print(" the descriptor line is: \n")
print(DesLine)
# pass DNA sequence to a function to remove all carriage returns
DnaSequence = Transcribe(DnaSeq)
print("the contiguous DNA sequence is:")
print(DnaSequence)
print("\n************************* All the amino acids of the PRIMARY STRAND ********************************************\n")
for RFNumber in range(0, 3):
AminoAcidSeq_1 = Translate(DnaSeq, RFNumber)
print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
print(AminoAcidSeq_1)
print("the DNA sequence from right to left: ")
print(DnaSeq[::-1])
print("\nthe amino acid seq from right to left: ")
print(AminoAcidSeq_1[::-1])
#get the compliment of DNA seq
ComplimentSeq = Compliment(DnaSeq)
print("\n************************** the Reverse compliment sequence 5' to 3' ***************************\n")
print(ComplimentSeq)
print("\n************************* All the amino acids of the Compliments STRAND ********************************************\n")
AminoAcidList= []
for RFNumber in range(0, 3):
AminoAcidSeq_1 = Translate(ComplimentSeq, RFNumber)
print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
print(AminoAcidSeq_1)
# Adding the amino acid sequences to a list
AminoAcidList.append(AminoAcidSeq_1)
AminoAcidSeq = FileRead(FileName)
#call ORFAll function it returns all potential ORF in an AA strand
ORFList = FindORF(AminoAcidSeq_1)
for i in range(0,3):
FindORF(AminoAcidList[i])
ORF = ""
ORFList = []
stop = input("Press enter to exit program....")
main()
"""****************** test plan ********************************
run the program and ensure the output is as expected: codon 1 is ATG....
"""
#**************** execute program **************************