C20301201

 avatar
unknown
plain_text
3 years ago
10 kB
1
Indexable
import tkinter as tk
from tkinter import filedialog
import sys

CodonTable = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
    'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
    }

def FileRead(FileName):




    #open the file in reading text mode using error checking
    #use a for loop to read each line, remove the EOL and concatenate this to the
    #existing sequence

    #initalise Data a empty string and list to return 2 parameters
    Data = ""
    FileContents = []



    
    try:
        Fp1 = open(FileName,'r')
        DesLine = Fp1.readlines(1)        # read the descriptor line

        # Read the reamin of the file using a read command
        Data = Fp1.read()
  
      
    except IOError:
        print("error unable to read file or file does not exist!!!")
        print("Exiting the program")
        stop = input()
        Fp1.close()
        sys.exit(1)

    #add the contents to a list
    FileContents.append(DesLine)
    FileContents.append(Data)
       

    #return the File contents list

    return FileContents
       

    


#**************************** end of the File Read Function *****************************************

def Transcribe(DnaSeq):
    
        

    # print DNA sequences before the split. 
    #print ("DNASequence:  ", DnaSeq)
    print

    # split into list with '\n' as the deliminator 
    ListSeq = DnaSeq.split('\n')
    



    # join this liat with "no spaces" between each element of the list
    DnaSequence = ('').join(ListSeq)




    return DnaSequence


#************************** codon translation ******************************

#************************** Sequence translation ******************************





def Translate(DnaSequence, RFNumber):

    # declare an empty list an an empty string
    AminoAcidList = []
    AminoAcidSeq = ''


    #print the DNA seq for the reading frame
    # extract sequence
    print("\n*************************************  reading frame number {:d} *******************************".format(RFNumber+1))
    DnaSequenceRF = DnaSequence[RFNumber:len(DnaSequence)]
    print("the DNA seq is: \n")
    print(DnaSequenceRF)
                       

           



    for n in range(RFNumber,len(DnaSequence),3):
        
        codon = DnaSequence[n:n+3]               # extract a codon  

        # translate and add to amino acid string
        if codon in CodonTable:
            AminoAcid = CodonTable[codon]     # translates the codon into an amino acid
            AminoAcidSeq += AminoAcid
            #print(CodonTable[codon])         # display output (just to confirm it is running correctly
       

           
        
    #return the translated sequence for a reading frame
    return AminoAcidSeq    


def Write(DesLine, AminoAcidSeq_1, ORFList):


 FileName2 = input("\nEnter the name of the file for writing (AminoAcid_Sequence): ")

 #

 #code to open and close a window
 root = tk.Tk()
 root.wm_withdraw() # this completely hides the root window

 # use windows explorer to input the file name
 FileName2 = filedialog.asksaveasfilename(filetypes = [('All Files','*.*')])
 root.destroy()

 
 #open the file in reading text mode using error checking
 try:
     Fp2 = open(FileName2,'a')
     Fp2.writelines(DesLine)
     Fp2.writelines("\n")
     Fp2.write(AminoAcidSeq_1)
     Fp2.write(ORFList)
     print("data successfully written to the file")
     print("closing file...")
     Fp2.close()
     

 except IOError:
     print >> sys.stderr, "File could not be open: "
     print("error unable to create or write to file {:s}".format(FileName2))
     print("Exiting the program")
     Fp2.close()
     stop = input()
     sys.exit(1)



#********************************  end of the amino acid write function ******************************************



"""       The Compliment Function 

    a program to get the compliment of a DNA strand
    A replaced by T, T replace by A, G replaced by C and C replace by G
    it takes a DNA sequence as a parameter
    it returns the compl;iment of this DNA sequence

"""

    

def Compliment(DnaSeq):

    ComplimentSeq = ''

    
    #use a for loop and convert

    for index in range(0,len(DnaSeq)):
        if DnaSeq[index] == 'T':
            ComplimentSeq +='A'         #concatenate A to strand
        if DnaSeq[index] == 'A':
            ComplimentSeq +='T'         #concatenate A to strand
        if DnaSeq[index] == 'C':
            ComplimentSeq +='G'         #concatenate A to strand
        if DnaSeq[index] == 'G':
            ComplimentSeq +='C'         #concatenate A to strand



    #reverse the completment strand and return to calling method   
            

    
    return ComplimentSeq[::-1]

#*************************************************************************

#******************************finding open reading frames********************

def FindORF(AminoAcidSeq_1):

    DesLine = ""
    ORF = ""
    ORFList = []
    index1 = 0


    # if there is a start amino acid add to ORF

    print("\n************************* The list of ORF ***********************************************\n")

    
    index = 0
    while index < len(AminoAcidSeq_1):
        if AminoAcidSeq_1[index] == 'M':         #found start
            start = index
            #print("\nfound M at position {:d}".format(index+1))
            ORFList.append(start+1)           
            while index != len(AminoAcidSeq_1) and AminoAcidSeq_1[index] != '*':
                ORF += AminoAcidSeq_1[index]
                index += 1
                
            #add amino acid (*) to strand

   
            #print only of Index < length of sequence
            if index < len(AminoAcidSeq_1) or AminoAcidSeq_1[index -1] == '*':
                end = index
                ORFList.append(end+1)
                ORFList.append(end-start)
                ORFList.append(ORF)

                #print ORF data (start, end, length and sequence)
                print("\nthe ORF start is: {:d}; the ORF end is: {:d}; the lenth is: {:d}\n".format(ORFList[0], ORFList[1], ORFList[2]))
                print(ORFList[3])
                
            # reset the ORF associated variables
            ORF = ""
            ORFList = []

        index = index + 1  
        
    return ORFList 




#*******************************  the driver or main function ********************************************************

def main():

    
    FileContents = []
    
    # prompt the user to enter the name of the file.
    print("this program will read the contents of a DNA fasts file and translate them to amino acids")

    # get file name using windows explorer

    root = tk.Tk()
    root.wm_withdraw() # this completely hides the root window
    # use windows explorer to input the file name
    FileName = filedialog.askopenfilename(filetypes = [('All files','*.*')])
    root.destroy()

    
   
    FileContents = FileRead(FileName)


    #Display the contents of the list
    print("the contents of {:s} are: \n".format(FileName)) 
    print(FileContents[0])
    #print(FileContents[1])


    DesLine = FileContents[0]

    DnaSeq = FileContents[1]

    
    # the Descriptor line
    print(" the descriptor line is: \n")
    print(DesLine)

    # pass DNA sequence to a function to remove all carriage returns
    DnaSequence = Transcribe(DnaSeq)

    print("the contiguous DNA sequence is:")
    print(DnaSequence)
    

    print("\n*************************  All the amino acids of the PRIMARY STRAND ********************************************\n")

    for RFNumber in range(0, 3):
        AminoAcidSeq_1 = Translate(DnaSeq, RFNumber)
        print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
        print(AminoAcidSeq_1)
        print("the DNA sequence from right to left: ")
        print(DnaSeq[::-1])
        print("\nthe amino acid seq from right to left: ")
        print(AminoAcidSeq_1[::-1])


    

    #get the compliment of DNA seq
    ComplimentSeq = Compliment(DnaSeq)
    print("\n************************** the Reverse compliment sequence 5' to 3'  ***************************\n")
    print(ComplimentSeq)
    



    print("\n*************************  All the amino acids of the Compliments STRAND ********************************************\n")


    AminoAcidList= [] 
    for RFNumber in range(0, 3):
        AminoAcidSeq_1 = Translate(ComplimentSeq, RFNumber)
        print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
        print(AminoAcidSeq_1)

        # Adding the amino acid sequences to a list 
        AminoAcidList.append(AminoAcidSeq_1)
        
 


    AminoAcidSeq = FileRead(FileName)

    #call ORFAll function it returns all potential ORF in an AA strand
    ORFList = FindORF(AminoAcidSeq_1)
    
 

    for i in range(0,3):
        FindORF(AminoAcidList[i])

        

        
    ORF = ""
    ORFList = []

    stop = input("Press enter to exit program....")

main()


   
"""****************** test plan ********************************
  run the program and ensure the output is as expected: codon 1 is ATG....
   

"""

#**************** execute program **************************