Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
12 kB
5
Indexable
Never
# -*- coding: utf-8 -*-
"""
Created on Tue May 24 18:16:48 2022

@author: missc
"""
# The PrintCodon.py implemented using methods/ functions
# this program takes a DNA sequences  and prints all the codons (substring of size 3)


import tkinter as tk
from tkinter import filedialog
import sys

# DNA <-> AA translation table: CodonTable
# global declaration: it is constant table/directory

CodonTable = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
    'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
    }

def FileRead(FileName):
    """
        this function will take the file name 
        it will put the first line of the file in the DesLine
        it will then add each line less the '\n' to exisitng DNA sequence
        this will result in a sequence without the 'n' and it is returnedto the
        calling function. 


    """

    #open the file in reading text mode using error checking
    #use a for loop to read each line, remove the EOL and concatenate this to the
    #existing sequence

    #initalise Data a empty string and list to return 2 parameters
    Data = ""
    FileContents = []



    
    try:
        Fp1 = open(FileName,'r')
        DesLine = Fp1.readlines(1)        # read the descriptor line

        # Read the reamin of the file using a read command
        Data = Fp1.read()
  
      
    except IOError:
        print("error unable to read file or file does not exist!!!")
        print("Exiting the program")
        stop = input()
        Fp1.close()
        sys.exit(1)

    #add the contents to a list
    FileContents.append(DesLine)
    FileContents.append(Data)
       

    #return the File contents list

    return FileContents
       

    


#**************************** end of the File Read Function *****************************************

#DnaSequence = "ACTGAtATAGATAGCGCGCTAGCTACGATCGATCGATGCTAGCTAGCTGCGATCGATTATCGTAGTGTTTACTCCGTGTAGCTAGTCGTATTTAGATGATAGTAGATCGATGCATGCTAGTAGTTATGCGTGCGCATGCTGCATGCTGGCATCGAGTCTCGCATCGGCATCCTG"


def Transcribe(DnaSeq):
    
        

    # print DNA sequences before the split. 
    print ("DNASequence:  ", DnaSeq)
    print (DnaSeq)

    # split into list with '\n' as the deliminator 
    ListSeq = DnaSeq.split('\n')
    

    # print entire list
    #print (" of the list after splitting is: ", ListSeq)

    # join this liat with "no spaces" between each element of the list
    DnaSequence = ('').join(ListSeq)

    #print the joined sequence
    #print("DNA sequence without the carriage return is: \n",DnaSequence)


    return DnaSequence









#************************** codon translation ******************************

#************************** Sequence translation ******************************


"""

    this function will convert a DNA strand into amino acid strand using the Codon table
    it will extract 3 characters from a sequence and convert one at a time
    concating them to the amino acid string:


    it takes a string and a RF number as parameters
    it returns the translated amino acid string string

"""





def Translate(DnaSequence, RFNumber):

    # declare an empty list an an empty string
    AminoAcidList = []
    AminoAcidSeq = ''


    #print the DNA seq for the reading frame
    # extract sequence
    print("\n*************************************  reading frame number {:d} *******************************".format(RFNumber+1))
    DnaSequenceRF = DnaSequence[RFNumber:len(DnaSequence)]
    print("the DNA seq is: \n")
    print(DnaSequenceRF)
                       

           


    
    # use a while loop to continu to ask the user to input a chodon until a stop (TAG) codon is entered

    # error prevention code if the iput is not in the table

    for n in range(RFNumber,len(DnaSequence),3):
        
        codon = DnaSequence[n:n+3]               # extract a codon  

        # translate and add to amino acid string
        if codon in CodonTable:
            AminoAcid = CodonTable[codon]     # translates the codon into an amino acid
            AminoAcidSeq += AminoAcid
            #print(CodonTable[codon])         # display output (just to confirm it is running correctly
       

           
        
    #return the translated sequence for a reading frame
    return AminoAcidSeq    


def Write(Des_Seq, AminoAcidSeq):


    '''
        inser code to:
        write the descritor line and the amino acid sequence to the file: AminoAcid_Sequence.fasta
        error prevention code, in case the file cannot be created

    '''
    
    # input the name of the file
    FileName = input("\nEnter the name of the file for writing (AminoAcid_Sequence): ")

    #

    #code to open and close a window
    root = tk.Tk()
    root.wm_withdraw() # this completely hides the root window

    # use windows explorer to input the file name
    FileName2 = filedialog.asksaveasfilename(filetypes = [('All Files','*.*')])
    root.destroy()

    
    #open the file in reading text mode using error checking
    try:
        Fp1 = open(FileName,'a')
        Fp1.writelines(Des_Seq)
        Fp1.writelines("\n")
        Fp1.write(AminoAcidSeq)
        print("data successfully written to the file")
        print("closing file...")
        Fp1.close()
        

    except IOError:
        print >> sys.stderr, "File could not be open: "
        print("error unable to create or write to file {:s}".format(FileName))
        print("Exiting the program")
        Fp1.close()
        stop = input()
        sys.exit(1)


#********************************  end of the amino acid write function ******************************************





#************************************************************************

"""       The Compliment Function 

    a program to get the compliment of a DNA strand
    A replaced by T, T replace by A, G replaced by C and C replace by G
    it takes a DNA sequence as a parameter
    it returns the compl;iment of this DNA sequence

"""

    

def Compliment(DnaSeq):

    ComplimentSeq = ''

    
    #use a for loop and convert

    for index in range(0,len(DnaSeq)):
        if DnaSeq[index] == 'T':
            ComplimentSeq +='A'         #concatenate A to strand
        if DnaSeq[index] == 'A':
            ComplimentSeq +='T'         #concatenate A to strand
        if DnaSeq[index] == 'C':
            ComplimentSeq +='G'         #concatenate A to strand
        if DnaSeq[index] == 'G':
            ComplimentSeq +='C'         #concatenate A to strand



    #reverse the completment strand and return to calling method   
            

    
    return ComplimentSeq[::-1]

#*************************************************************************

#******************************finding open reading frames********************

def FindORF(AminoAcidSeq):
    
    DesLine = ""
    ORF = ""
    ORFList = []
    index1 =0
    
    # if there is a start amino acid add to ORF
    
    print("\n **********************The List of ORF *****************\n")
    
    index = 0
    while index < len(AminoAcidSeq) :
        if AminoAcidSeq.index("M"):  # found start
            index1 = index
            start = index 
            print ("\n found M at position {:d}".format(index+1))
            ORFList.append(start + 1)
            while index < (len(AminoAcidSeq)) and AminoAcidSeq[index] != '*':
                ORF += AminoAcidSeq[index]
                index += 1
                
            ORF += AminoAcidSeq[index]
            
            # print only of Index < length of sequence
            if index < len(AminoAcidSeq) or AminoAcidSeq[index-1] == '*':
                end = index
                ORFList.append(end+1)
                ORFList.append(end-start)
                ORFList.append(ORF)
                    
                #print ORF data (start, end, length and sequence)
                print("\n The ORF start is: {:d}; the ORF end is: {:d}; the length is {:d}\n".format(ORFList[0], ORFList[1], ORFList[2]))
                print(ORFList[3])
                break
                
            
    #reset the ORF associated variables 
  #  ORF = ""
   # ORFList = []
    
    index = index + 1
    print(ORFList)
    




#*******************************  the driver or main function ********************************************************

def main():

    
    FileContents = []
    
    # prompt the user to enter the name of the file.
    print("this program will read the contents of a DNA fasts file and translate them to amino acids")

    # get file name using windows explorer

    root = tk.Tk()
    root.wm_withdraw() # this completely hides the root window
    # use windows explorer to input the file name
    FileName = filedialog.askopenfilename(filetypes = [('All files','*.*')])
    root.destroy()

    
    #FileName = input("enter the name of the fasta file: ")
    # call the FileRead function;
    #the DesLine is passed by reference!!!
    FileContents = FileRead(FileName)


    #Display the contents of the list
    print("the contents of {:s} are: \n".format(FileName)) 
    print(FileContents[0])
    #print(FileContents[1])


    DesLine = FileContents[0]

    DnaSeq = FileContents[1]

    
    # the Descriptor line
    print(" the descriptor line is: \n")
    print(DesLine)

    # pass DNA sequence to a function to remove all carriage returns
    DnaSequence = Transcribe(DnaSeq)

    print("the contiguous DNA sequence is:")
    print(DnaSequence)
    
    # prompt the user for the file and open the file for reading...
    # refer to Exercise2.py (iteration)

    

   
    # calling the Print method/ function passing to it the string DnaSeq
    # we are also passing the RF number (note RF 1 has number 0 
    print("\n*************************  All the amino acids of the PRIMARY STRAND ********************************************\n")

    for RFNumber in range(0, 3):
        AminoAcidSeq_1 = Translate(DnaSeq, RFNumber)
        print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
        print(AminoAcidSeq_1)
        print("the DNA sequence from right to left: ")
        print(DnaSeq[::-1])
        print("\nthe amino acid seq from right to left: ")
        print(AminoAcidSeq_1[::-1])


    

    #get the compliment of DNA seq
    ComplimentSeq = Compliment(DnaSeq)
    print("\n************************** the Reverse compliment sequence 5' to 3'  ***************************\n")
    print(ComplimentSeq)
    


    # calling the Print method/ function passing to it the string DnaSeq
    # we are also passing the RF number (note RF 1 has number 0 
    print("\n*************************  All the amino acids of the Compliments STRAND ********************************************\n")

    for RFNumber in range(0, 3):
        AminoAcidSeq_1 = Translate(ComplimentSeq, RFNumber)
        print("\nthe amino acid sequence of RF {:d} is: \n".format(RFNumber+1))
        print(AminoAcidSeq_1)
        
     # call the write amino acid sequence to file function 
   # Write(DesLine, AASequence_1)


    AminoAcidSeq = FileRead(FileName)

    #call ORFAll function it returns all potential ORF in an AA strand

    FindORF(AminoAcidSeq_1)


    stop = input("Press enter to exit program....")

main()


   
"""****************** test plan ********************************
  run the program and ensure the output is as expected: codon 1 is ATG....
   

"""

#**************** execute program **************************