Untitled

mail@pastecode.io avatar
unknown
csharp
a year ago
9.8 kB
2
Indexable
Never
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <strings.h>
#include <math.h>

#define MAX_WORD_SIZE 35
#define MAX_NUM_WORDS 1001
#define MAX_VEC_SIZE 200
#define MAX_LINE_LENGTH 100000

static char words[300000][MAX_WORD_SIZE];
struct inputWord{
    int exactMatch;
    int similarCount;
    char *wordText;
    double *vectorValues;
    int dictionaryPosition;
    
};

typedef struct inputWord inputW;

int strLenght(char  *str){
    int l=0;
    while(str[l] != '\0'){
        l++;
    }

    return l;
}


void strcopy(char *dest, const char *src) {
    int i;
    while((*dest++ = *src++)!='\0');
    *dest = '\0';


}


int isStrignored(char *str,char ignoreStrs[][MAX_WORD_SIZE],int numToigno){
    for(int i=0;i<numToigno;i++){
        if(strcmp(str,ignoreStrs[i]) == 0){
            //STR IS INSIDE THE IGNORE TXT SO RETURN 0;
            return 0;
        }
    }

    return 1;
}

char* getStrCleaned(char* str){
    int count =0;
    char* cleanStr = (char*)malloc((120) * sizeof(char));
    while(*str != '\0'){
        char comingChar = *str;
        if(isalnum(comingChar)){

            cleanStr[count++] = tolower(comingChar);
        }
        str++;
    }
    cleanStr[count] = '\0';
    int i=0;
    return cleanStr;
}

int closestInText1(inputW word,char fwords[][MAX_WORD_SIZE],char dict[][MAX_WORD_SIZE]){
    int walker=0;
    int walkerDict= 0;
    int positionOfWd = 0;
    while(strcmp(fwords[walker],"-")!=0){
        while(strcmp(dict[walkerDict],"-") != 0){
            
        }
    }
}


double doubledissimilarity(char * w1, char * w2, char dict[][MAX_WORD_SIZE], float threshold, int *position_of_orginalword ){
    puts("gets called dd");
    int i=0;
    double vectorsoforiginal[200];
    double vectors2[200];
    char *matchIndict;
    //w1 is input word, w2 is dict[i];
    
    
    //first take the original words vector values
    int linecount=0;
    FILE *fp = fopen("dictionary.txt","r");
    char templine[MAX_LINE_LENGTH];
    fgets(templine,MAX_LINE_LENGTH,fp);//jumping over the first line
    while(fgets(templine,MAX_LINE_LENGTH,fp)){
        char *token = strtok(templine," ");

        if(strcasecmp(token,w1)==0){
            token = strtok(NULL," ");
            int vectorindex = 0;
            while (token != NULL)
            {
                double tempvalue = 0.0;
                sscanf(token,"%lf",&tempvalue);
                vectorsoforiginal[vectorindex++] = tempvalue;
                token = strtok(NULL," ");
            }
            
        }   
        linecount++;
    }

    
    //WE TAKE THE ORIGINAL WORDS VECTORS.
    //NOW WE HAVE TO TAKE COMING WORD'S VECTORS
    int vInd=0;
    rewind(fp);
    fgets(templine,MAX_LINE_LENGTH,fp);//jumpoverthe firstline
    while(fgets(templine,MAX_LINE_LENGTH,fp)){
        char *token = strtok(templine," ");
        if(strcasecmp(token,w2)==0){
            int vectorindex = 0;
            while(token != NULL){
                double tempvalue=0.0;
                sscanf(token,"%lf",&tempvalue);
                vectors2[vectorindex++] = tempvalue;
                if(vectorindex==98){
                    token = strtok(NULL,"\n");
                }
                else{
                    token = strtok(NULL," ");

                }
            }

            
            break;
        }


    }

    //now we have to calculate the dissimilarity between 2 vectors
    double sum=0.0;
    for(int i=0;i<100;i++){
       //printf("vectororginal = %lf,vectors2 = %lf\n",vectorsoforiginal[i],vectors2[i]);
       sum += fabs((vectorsoforiginal[i] - vectors2[i]) * (vectorsoforiginal[i] - vectors2[i]));

    }

    //double distance = sqrt(sum);
    sum = sqrt(sum);
    //printf("Distance between = %s, %s is = %lf\n",w1,w2,sum);
    
    return sum;

}


int  read_text(const char  * text_file, const char  * ignore_file, char words[][MAX_WORD_SIZE]){
    int numOflinesIgnore =0;
    FILE *ignoreF = NULL;
    words[MAX_LINE_LENGTH][MAX_WORD_SIZE];
    ignoreF=fopen(ignore_file,"r");
    FILE *textF = fopen(text_file,"r");
    if(ignoreF!=NULL){
        char temp[MAX_LINE_LENGTH];
        while(fgets(temp,MAX_LINE_LENGTH,ignoreF)){
            numOflinesIgnore++;
        }
        rewind(ignoreF);
        char ignoreStrs[numOflinesIgnore][MAX_WORD_SIZE];
        int i=0;
        char line[MAX_LINE_LENGTH];
        while(fgets(line,MAX_LINE_LENGTH,ignoreF)){
            sscanf(line,"%s",ignoreStrs[i]);
            i++;
        }

        char comingChar;
        char comingStr[50];
        char cleanStr[50];
        int count = 0;
        int j=0;
        fclose(ignoreF);
        while(feof(textF) == 0){
           
            comingChar = fgetc(textF);
            
            if(isalnum(comingChar)){
                cleanStr[count++] = comingChar;
            }

            if(comingChar == 32 || comingChar == '\n'){
                cleanStr[count] = '\0';
                count = 0;
                if(strlen(cleanStr) >0){
                    
                    if(isStrignored(cleanStr,ignoreStrs,i)){
                        
                        strcopy(words[j],cleanStr);
                        
                        j++;
                    }

                }
            }

            
            

        }
        strcpy(words[j],"-");
        //words[j][1] = '\0';
        
        return j;
        


    }else{
        printf("Error oppening file!");
    }

}



/*READ DICTIONARY*/
int read_dict(const char * file_name, char dict[][MAX_WORD_SIZE]){
    FILE *fp = fopen(file_name,"r");
    int numWords=0;
    int vecTorSize=0;
    if(fp!=NULL){
        int numofwords =0;
        char line[MAX_LINE_LENGTH];
        fgets(line,MAX_LINE_LENGTH,fp);
        sscanf(line,"%*[^0-9]%d%*[^0-9]%d",&numWords,&vecTorSize);
        //printf("numofwords = %d, vectorsize= %d",numWords,vecTorSize);
        while(fgets(line,MAX_LINE_LENGTH,fp)){
            sscanf(line,"%s",dict[numofwords]);
            
            numofwords++;
        }

        dict[numofwords][0] = '-';
        dict[numofwords][1] = '\0';

        int m =0;

        return numofwords;
        }else{
            return -1;
        }
}




int checkIfInDict(char *word,char dict[][MAX_WORD_SIZE],int *dictPosition){
    int walker = 0;
    int found=-1;
    while(dict[walker][0] != '-'){
        if(strcasecmp(word,dict[walker])==0){
            puts("Found.");
            found = walker;
            break;
        }
        walker++;
    }
    
    *dictPosition = found;

}

int checkTheBook(char *word,char book[][MAX_WORD_SIZE],int *matchCount){
    
    int exactMatch =0;

    int walker = 0;
    while(strcmp(book[walker],"-") != 0){
        if(strcmp(word,book[walker]) == 0){
            
           (*matchCount) += 1;

        }
        walker++;

    }

    
    
}

int main(int argc,char *argv[]){
    char dict[1002][MAX_WORD_SIZE];
    int vectorSize,wordsLen;
    char *dictionaryfile = "dictionary.txt";
    char *inputfile = "input.txt";
    char *ignorefile = "ignore.txt";
    
    
    char input[100];
    char inputCopy[100];
    int numofwords = read_dict(dictionaryfile,dict);
    read_text(inputfile,ignorefile,words);
    printf("Enter word(s): ");
    fgets(input,100,stdin);
    strcopy(inputCopy,input);
    int i=0;
    int wordCount=0;
    int indexCounter = 0;
   
    //COUNTING INPUTTED WORDS
    char *token = strtok(input," ");
    while(token !=NULL){
        wordCount++;
       // puts(token);
        token = strtok(NULL," ");
    }
    
    //PUTTING INPUTS IN ENTEREDWORDS ARRAY
    inputW *inputsa = (inputW*)malloc(wordCount*sizeof(inputW));

    char enteredWords[wordCount][35];

    token = strtok(inputCopy," ");
    
    //WE PUT THE COMING INPUTS INSIDE EVERY INPUTW STRUCT
    while(token !=NULL){
        char *temp;
        inputsa[indexCounter].wordText = (char*)malloc(35 * sizeof(char));
        temp = getStrCleaned(token);
        strcopy(inputsa[indexCounter].wordText,temp);
        strcopy(enteredWords[indexCounter],temp);
        indexCounter++;
        token = strtok(NULL," ");

    }

    


//FIRST WE CHECK IF THE GIVEN WORD IS IN THE INPUT FILE ITSELF, IF IT IS WE INCREMENT THE EXACTHMATCH INTEGER ON THE WORD OBJECT ITSELF 
    for(int a=0;a<wordCount;a++){
        
        inputsa[a].exactMatch = 0;
        int pTocount = inputsa[a].exactMatch;
        int *posP = &inputsa[a].dictionaryPosition;
        char *ptoWord = inputsa[a].wordText;
        checkTheBook(ptoWord,words,&pTocount);
        printf("exact match = %d",pTocount);
    }

// NOW WE HAVE TO CHECK IF ANYWORD'S EXACT MATCH INT IS LESS THAN 1 IF THAT WE GONNA USE DICTIONARY

    int posofOriginal = 0;
    checkIfInDict(enteredWords[0],dict,&posofOriginal);
        if(posofOriginal > 0){
            puts("dictionaryde var");
            for(int m=0;m<1001;m++){
                double sum;
                sum = doubledissimilarity("pigling",dict[m],dict,2.0,&posofOriginal);

                printf("distance between = %s,%s is = %lf\n",enteredWords[0],dict[m],sum);
            }
            
        }
    
    /*
    for(int b=0;b<wordCount;b++){
        checkIfInDict(inputsa[b].wordText,dict,&inputsa[b].dictionaryPosition);
        if(inputsa[b].dictionaryPosition > 0){
            puts("dictionaryde var");
            posofOriginal = inputsa[b].dictionaryPosition;
            for(int m=0;m<1001;m++){
                double sum;
                sum = doubledissimilarity(inputsa[b].wordText,dict[m],dict,2.0,&posofOriginal);

                printf("distance between = %s,%s is = %lf\n",inputsa[b].wordText,dict[m],sum);
            }
            
        }
    }
    
    */
    //NOW WE GONNA SEND THE MATCHED STRING IN DIRECTORY TO DISSIMILARITY FUNCTION,


    

    
}