Parallel LA

mail@pastecode.io avatar
unknown
c_cpp
3 years ago
24 kB
2
Indexable
Never
#include<bits/stdc++.h>
using namespace std;

/*
Global Variables
    - keywordTrie : a instance of keyword_Trie
    - symbolTable : a instance of class Symbol_Table and object type data-structure
Utils data-structures
    - keyword_node : a class data-type for keyword_Trie data-structure
    - keyword_Trie : a Trie data-structure to keywords
    - Symbol_Table : a class type data-base to store real-time declared variables and provide them id
    - Token : a class to store string-type tokens
Global Functions
    - lexical_analysis() : Lexically analyses a input-stream and returns a vectors of tokens 
Utils
    - vector_to_string() : Functions to convert vector of character's to a string
Serial lexical Analysis utils
    - getKeyword() : Looks if the Lexemme is a Keyword
    - getVariable() : Looks if the Lexemme is a Variable
    - getParenthesis() : Looks if the lexemme is a Paranthesis
    - getPunctuation() : Looks if the lexemme is a Punctuation
    - getWhitespace() : Looks if the lexemme is a WhiteSpace
    - getRelop() : Looks if the lexemme is a Relop
    - getNumber() : Looks if the lexemme is a Number
*/


class identifier
{
    public :
    int id; // to store the id of the variable
    string name; // to store the name of the variable

    identifier() // a null identifier
    {
        id=-1;
        name="\0";
    }

    identifier(int id, string name)
    {
        this->id = id;
        this->name = name;
    }
};

class Symbol_Table
{
    public :

    int current_size;
    map<int,identifier> identifiers; // map with 'key : id' and 'value : identifier type object'

    Symbol_Table()
    {
        current_size = 0; // no id's present at initialization
    }

    int insert(string var) // returns valid 'integer' if insertions is successful, -1 if not successful
    {
        bool allocate = true;

        // check if the variable is already declared of not, if not allocate position in map, if yes return -1

        for(auto node : identifiers)
        {

            if(var.compare(node.second.name)==0) // using string::str.compare()
            {allocate=false;break;}
            
        }

        if(allocate==false)
        return -1;

        int id = ++current_size;

        identifiers[id] = identifier(id,var);

        return id;
    }

    void del(int id)
    {
        identifiers.erase(id);
    }

    void pop()
    {
        identifiers.erase(current_size);
        current_size--;
    }

}SymbolTable;

string vector_to_string(vector<char> str)
{
    string to_string(str.begin(),str.end());

    return to_string;
}

class Token // class token to issue tokens - in this case RELOP tokens
{
    public :

    string t; // represents the token string for this simple operations

    Token()
    {
       t = "Not an identifiable Token";
    }

    Token(string token_type)
    {
        t = "< " + token_type + " >" ;
    }

    Token(string token_type ,string token_value) // if the type of token and the name of the operator is provided
    {
        t = "< " + token_type + " , " + token_value + " >";
    }

    Token(string token_type, int token_value)
    {
        t = "< " + token_type + " , " + to_string(token_value) + " >";
    }

    Token(string token_type ,char token_value) // if the type of token and the name of the operator is provided
    {
        t = "< " + token_type + " , " + token_value + " >";
    }

};

class keyword_node
{
    public :

        char a;
        map<char,keyword_node*> dict;
        bool isTerminal;

        keyword_node( char a ='\0' )
        {
            this->a = a;
            isTerminal = false; // default the node is not a terminat
        }

};

class keyword_Trie
{
    public :

        keyword_node *root;

        keyword_Trie()
        {
            root = new keyword_node(); // the root is pointing to a null object
        }

        void insert(string str) // inserts a string into the Trie data-structure
        {
            keyword_node *temp = root;
            int i = 0;

            while(str[i]!='\0')
            {
                char data = str[i];

                if(temp->dict.count(data) == 0)
                    temp->dict[data] = new keyword_node(data);

                temp = temp->dict[data];
                i++;
            }

            temp->isTerminal = true;
        }
        
        bool nfa(string file_input, vector<Token> &tokens,int &i)
        {
            int pos = i;
            bool valid = false;
            bool loop = true;
            char data;

            vector<char> word ;

            keyword_node *temp = root;

            while(loop)
            {

                if(pos<=file_input.length())
                data = file_input[pos];
                else
                {loop = false;data='\0';}


                if(temp->isTerminal && !isalnum(data))
                {
                    valid=true;
                    Token T ;
                    T =  Token(vector_to_string(word));
                    tokens.push_back(T);
                    loop = false;
                }

                if(temp->dict.count(data)) // exist in Trie
                {
                    temp = temp->dict[data];
                    pos++;
                    word.push_back(data);
                }
                else
                {
                    loop = false;
                }
            }

            if(valid==true)
            {
                i=pos;// update i;
            }

            return valid;
        }

}keywordTrie; // keywords is a global Trie type data-structure

void initialize_keywords()
{
    // initializing all the keywords

    int number_of_keywords = 2;

    string keyword[number_of_keywords] = {"if",
                        "else"
                        };

    for(int i=0;i<number_of_keywords;i++)
    {
        keywordTrie.insert(keyword[i]);
    }
}

bool getKeyword(string file_input, vector<Token> &tokens,int &i)
{
    int pos = i;

    bool valid = keywordTrie.nfa(file_input,tokens,i);

    return valid;
}

bool getVariable(string file_input, vector<Token> &tokens,int &i)
{

    int pos = i;

    int state = 0;

    bool loop = true;
    bool valid = false;

    vector<char> var;

    char c;

    while(loop)
    {

        if(pos<=file_input.length())
        c = file_input[pos];
        else
        {loop = false;c='\0';}

        // we will use c++ std::'bool isalpha(char c)' , 'bool isalnum(char c)'

        switch(state)
        {

            case 0 : if(isalpha(c))
                    {
                        state=1;
                        var.push_back(c);
                        pos++;
                    }

                    else
                    {
                        loop=false;
                    }

                    break;

            case 1 : if(isalnum(c))
                    {
                        state=1;
                        var.push_back(c);
                        pos++;
                    }

                    else
                    {
                        state=2;
                        pos++;
                    }

                    break;

            case 2 : valid = true;loop=false;pos--;
                    
                    /*
                    Steps : 
                    - Issue the token
                    - Enter this int Symbol Table
                    */

                   int id = SymbolTable.insert(vector_to_string(var));


                   if(id!=-1)
                   {
                       Token T("ID",id);
                       tokens.push_back(T);
                   }
                   else
                   {
                       Token T;
                       tokens.push_back(T); // it will return an un-identifiable Token found mean-while because token is already in symbol-table
                   }

                    break;
        }

    }

    if(valid==true)
    i=pos;

    return valid;
}

bool getArithmetic(string file_input, vector<Token> &tokens,int &i)
{
    bool valid = false;

    char c = file_input[i];

    Token T;

    switch(c)
    {
        case '+' : valid=true;
                    T = Token("ARITH",c);
                    tokens.push_back(T);

                    break;

        case '-' : valid=true;
                    T = Token("ARITH",c);
                    tokens.push_back(T);

                    break;

        case '*' : valid=true;
                    T = Token("ARITH",c);
                    tokens.push_back(T);

                    break;

        case '/' : valid=true;
                    T = Token("ARITH",c);
                    tokens.push_back(T);

                    break;
    }

    if(valid==true)
    i++;

    return valid;
}

bool getParenthesis(string file_input, vector<Token> &tokens,int &i)
{
    bool valid = false;

    char c = file_input[i];

    Token T;

    switch(c)
    {
        case '(' : valid=true;
                    T = Token("PAREN_OPEN",c);
                    tokens.push_back(T);

                    break;

        case ')' : valid=true;
                    T = Token("PAREN_CLOSE",c);
                    tokens.push_back(T);

                    break;

        case '{' : valid=true;
                    T = Token("PAREN_OPEN",c);
                    tokens.push_back(T);

                    break;

        case '}' : valid=true;
                    T = Token("PAREN_CLOSE",c);
                    tokens.push_back(T);

                    break;

        case '[' : valid=true;
                    T = Token("PAREN_OPEN",c);
                    tokens.push_back(T);

                    break;

        case ']' : valid=true;
                    T = Token("PAREN_CLOSE",c);
                    tokens.push_back(T);

                    break;
    }

    if(valid==true)
    i++;

    return valid;
}

bool getPunctuation(string file_input, vector<Token> &tokens,int &i)
{
    bool valid = false;

    char c = file_input[i];

    Token T;

    switch(c)
    {
        case ';' : valid=true;
                    T = Token("PUNC",c);
                    tokens.push_back(T);

                    break;

        case '"' : valid=true;
                    T = Token("PUNC",c);
                    tokens.push_back(T);

                    break;

        case '\'' : valid=true;
                    T = Token("PUNC",c);
                    tokens.push_back(T);

                    break;

    }

    if(valid==true)
    i++;

    return valid;
}

bool getWhitespace(string file_input, vector<Token> &tokens,int &i)
{
    int pos = i;

    int state = 0;

    bool loop = true;
    bool valid = false;

    char c;

    while(loop)
    {
        if(pos<=file_input.length())
        c = file_input[pos];
        else
        {loop = false;c='\0';}

        // we will call the c++ std::isspace(char ch) to check if its a white-space or not
        
        switch(state)
        {
        
        case 0 : if(isspace(c))
                    {
                        state = 1;
                        pos++;
                    }

                    else
                    {
                        loop = valid = false;
                    }

                    break;

        case 1 : if(isspace(c))
                    {
                        pos++;
                    }

                    else
                    {
                        state=2;
                        pos++;
                    }

                    break;

        case 2 : valid = true; loop=false; pos--;
                    break;

        }


    }

    if(valid==true)
    i=pos;

    return valid;
}

bool getRelop(string file_input, vector<Token> &tokens,int &i)
{
    int pos = i ; // saving the current position of the 'i' input stream pointer

    int state = 0; // state is initialized to 0

    bool loop = true; // while we have to loop
    bool valid = false; // valid = True when something is found

    char c;

    while(loop)
    {
        
        if(pos<=file_input.length())
        c = file_input[pos];
        else
        {loop = false;c='\0';}

        switch(state)
        {

        case 0 : if(c=='<')
                {
                    pos++;
                    state = 1;
                }

                else if(c=='=')
                {
                    pos++;
                    state = 5;
                }

                else if(c=='>')
                {
                    pos++;
                    state = 6;
                }

                else
                {
                    valid = false; // the valid turns to False
                    loop = false;
                } 

                break;


        case 1 :if(c=='=')
                {
                    pos++;
                    state = 2;
                }

                else if(c=='>')
                {
                    pos++;
                    state = 3;
                }

                else
                {
                    pos++;
                    state = 4;
                } 

                break;


    
        case 2 :{Token T("RELOP","LE");
                tokens.push_back(T);

                valid = true;
                loop = false;

                break;}


        case 3 : {Token T("RELOP","NE");
                tokens.push_back(T);

                valid = true;
                loop = false;

                break;
                }


        case 4 : {Token T("RELOP","LT");
                tokens.push_back(T);

                valid = true;
                loop = false;

                break;}


        case 5 : {Token T("RELOP","EQ");
                tokens.push_back(T);
 
                valid = true;
                loop = false;

                break;}


        case 6 : if(c=='=')
                {
                    pos++;
                    state = 7;
                }

                else
                {
                    pos++;
                    state = 8;
                } 

                break;


        case 7 : {Token T("RELOP","GE");
                tokens.push_back(T);

                valid = true;
                loop = false;

                break;}


        case 8 :{ Token T("RELOP","GT");
                tokens.push_back(T);

                pos--;
                valid = true;
                loop = false;

                break;}

        // End of the loop and the switch conditional statements
    }
    }

    if(valid==true)
    i = pos; // update pointer to the input in stream

    return valid; // either true or false
}

bool getNumber(string file_input, vector<Token> &tokens,int &i)
{
    int pos = i ; // saving the current position of the 'i' input stream pointer

    int state = 0; // state is initialized to 0

    bool loop = true; // while we have to loop
    bool valid = false; // valid = True when something is found

    vector<char> number_string;

    char c ;

    while(loop)
    {
        if(pos<=file_input.length())
        c = file_input[pos];
        else
        {loop = false;c='\0';}

        switch(state)
        {

        case 0 : if(isdigit(c))
                {
                    pos++;
                    state = 1;
                    number_string.push_back(c);
                }

                else
                {
                    valid = false; // the valid turns to False
                    loop = false;
                } 

                break;


        case 1 :if(isdigit(c))
                {
                    pos++;
                    state = 1;
                    number_string.push_back(c);
                }

                else if(c=='.')
                {
                    pos++;
                    state = 3;
                    number_string.push_back(c);
                }

                else if(c=='e')
                {
                    pos++;
                    state = 6;
                    number_string.push_back(c);
                }

                else
                {
                    pos++;
                    state = 2;
                } 

                break;


        case 2 :{ Token T; // put the number into the token

                T = Token("NUM",vector_to_string(number_string));

                tokens.push_back(T);

                pos--;
                valid = true;
                loop = false;

                break;}


         case 3 :if(isdigit(c))
                {
                    pos++;
                    state = 4;
                    number_string.push_back(c);
                }

                else
                {
                    valid = false; // the valid turns to False
                    loop = false;
                } 

                break;


        case 4 :if(isdigit(c))
                {
                    pos++;
                    state = 4;
                    number_string.push_back(c);
                }

                else if(c=='e')
                {
                    pos++;
                    state = 6;
                    number_string.push_back(c);
                }

                else
                {
                    pos++;
                    state = 5;
                } 

                break;


        case 5 : { Token T; // put the number into the token

                T = Token("NUM",vector_to_string(number_string));

                tokens.push_back(T);

                pos--;
                valid = true;
                loop = false;

                break;}


        case 6 :if(isdigit(c))
                {
                    pos++;
                    state = 8;
                    number_string.push_back(c);
                }

                else if(c=='+' || c=='-')
                {
                    pos++;
                    state = 7;
                    number_string.push_back(c);
                }

                else
                {
                    valid = false; // the valid turns to False
                    loop = false;
                } 

                break;


        case 7 :if(isdigit(c))
                {
                    pos++;
                    state = 8;
                    number_string.push_back(c);
                }

                else
                {
                    valid = false; // the valid turns to False
                    loop = false;
                } 

                break;


        case 8 :if(isdigit(c))
                {
                    pos++;
                    state = 8;
                    number_string.push_back(c);
                }

                else
                {
                    pos++;
                    state = 9;
                } 

                break;


        case 9 : { Token T; // put the number into the token

                T = Token("NUM",vector_to_string(number_string));

                tokens.push_back(T);

                pos--;
                valid = true;
                loop = false;

                break;}

        // End of the loop and the switch conditional statements
    }
    }

    if(valid==true)
    i = pos; // update pointer to the input in stream

    return valid; // either true or false
}

void serial_lexical_analysis(string file_input, vector<Token> &tokens)
{
    int i = 0; // index where the file pointer is at the start - acts like string pointer

    while(file_input[i]!='\0' && i<=file_input.length()) // till the input stream does not end
    {

        if(getKeyword(file_input,tokens,i));

        else if(getVariable(file_input,tokens,i));

        else if(getNumber(file_input,tokens,i)); // i is the stream pointer - which will be updated if return is True

        else if(getRelop(file_input,tokens,i)); // i is the stream pointer - which will be updated if return is True

        else if(getArithmetic(file_input,tokens,i));

        else if(getParenthesis(file_input,tokens,i));
        
        else if(getPunctuation(file_input,tokens,i));

        else if(getWhitespace(file_input,tokens,i));
        
        else
        {
            Token T; // not an identifiabel token
            tokens.push_back(T);
            i++; // increment i
        }
    }

}

void parallel_lexical_analysis(string file_input, vector<Token> &tokens)
{
    int i = 0; // index where the file pointer is at the start - acts like string pointer

    while(file_input[i]!='\0' && i<=file_input.length()) // till the input stream does not end
    {
        int final_pos[8] = {0,0,0,0,0,0,0,0}; // Order : keyword, Variable, Number, Relop, Arithmetic, Parenthesis, Punctuation, Whitespace

        vector<Token> temp_tokens; // will be deleted as the loop iterates

        for(int k=0;k<8;k++)
        {
            final_pos[k] = i;

            switch(k)
            {
                case 0 : getKeyword(file_input,temp_tokens,final_pos[0]); break;

                case 1 : getVariable(file_input,temp_tokens,final_pos[1]); break;

                case 2 : getNumber(file_input,temp_tokens,final_pos[2]); break;

                case 3 : getRelop(file_input,temp_tokens,final_pos[3]); break;

                case 4 : getArithmetic(file_input,temp_tokens,final_pos[4]); break;

                case 5 : getParenthesis(file_input,temp_tokens,final_pos[5]); break;

                case 6 : getPunctuation(file_input,temp_tokens,final_pos[6]); break;

                case 7 : getWhitespace(file_input,temp_tokens,final_pos[7]); break;
            }
        }


        // see what to call finally

        int max_index = -1;
        int max_pos = i;


        for(int k=0;k<8;k++)
        {
            if(final_pos[k]>max_pos)
            {
                max_index = k;
                max_pos = final_pos[k];
            }
        }


        if(max_index == -1) // i.e no token is issued
        {
            Token T; // not an identifiabel token
            tokens.push_back(T);
            i++; // increment i
        }
        else
        {

            // A Token will surely be issued

            // correcting the Symbol Table

            if(final_pos[1]>i)
            SymbolTable.pop(); // popping the identifier if issued from the symbol table

            // Now issuing the original token by using the max_index

            switch(max_index)
            {
                case 0 : getKeyword(file_input,tokens,i); break;

                case 1 : getVariable(file_input,tokens,i);break;

                case 2 : getNumber(file_input,tokens,i); break;

                case 3 : getRelop(file_input,tokens,i); break;

                case 4 : getArithmetic(file_input,tokens,i); break;

                case 5 : getParenthesis(file_input,tokens,i); break;

                case 6 : getPunctuation(file_input,tokens,i); break;

                case 7 : getWhitespace(file_input,tokens,i); break;
            }

        }
        
    }

}

int main()
{

    // Initializing start

    initialize_keywords();

    // Initialization done

    cout<<"Enter number of test cases : ";
    int t; // test cases
    cin>>t;
    while(t--) // starts from t-1 to 0 ( t times )
    {
        cout<<"Enter the Input Stream : ";
        string input_stream;
        fflush(stdin);
        getline(cin,input_stream);
        cout<<"\n"<<"Lexical Analysis for Input Stream : "<<input_stream;

        vector<Token> tokens;

        // for serial navigation
        // serial_lexical_analysis(input_stream,tokens);

        // for paralle navigation
        parallel_lexical_analysis(input_stream,tokens);

        cout<<endl;
        for(int i=0;i<tokens.size();i++)
        cout<<tokens[i].t<<endl;

        cout<<"\n";
        cout<<endl;
    }
    return 0;
}