Untitled
unknown
csharp
3 years ago
9.8 kB
12
Indexable
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <strings.h>
#include <math.h>
#define MAX_WORD_SIZE 35
#define MAX_NUM_WORDS 1001
#define MAX_VEC_SIZE 200
#define MAX_LINE_LENGTH 100000
static char words[300000][MAX_WORD_SIZE];
struct inputWord{
int exactMatch;
int similarCount;
char *wordText;
double *vectorValues;
int dictionaryPosition;
};
typedef struct inputWord inputW;
int strLenght(char *str){
int l=0;
while(str[l] != '\0'){
l++;
}
return l;
}
void strcopy(char *dest, const char *src) {
int i;
while((*dest++ = *src++)!='\0');
*dest = '\0';
}
int isStrignored(char *str,char ignoreStrs[][MAX_WORD_SIZE],int numToigno){
for(int i=0;i<numToigno;i++){
if(strcmp(str,ignoreStrs[i]) == 0){
//STR IS INSIDE THE IGNORE TXT SO RETURN 0;
return 0;
}
}
return 1;
}
char* getStrCleaned(char* str){
int count =0;
char* cleanStr = (char*)malloc((120) * sizeof(char));
while(*str != '\0'){
char comingChar = *str;
if(isalnum(comingChar)){
cleanStr[count++] = tolower(comingChar);
}
str++;
}
cleanStr[count] = '\0';
int i=0;
return cleanStr;
}
int closestInText1(inputW word,char fwords[][MAX_WORD_SIZE],char dict[][MAX_WORD_SIZE]){
int walker=0;
int walkerDict= 0;
int positionOfWd = 0;
while(strcmp(fwords[walker],"-")!=0){
while(strcmp(dict[walkerDict],"-") != 0){
}
}
}
double doubledissimilarity(char * w1, char * w2, char dict[][MAX_WORD_SIZE], float threshold, int *position_of_orginalword ){
puts("gets called dd");
int i=0;
double vectorsoforiginal[200];
double vectors2[200];
char *matchIndict;
//w1 is input word, w2 is dict[i];
//first take the original words vector values
int linecount=0;
FILE *fp = fopen("dictionary.txt","r");
char templine[MAX_LINE_LENGTH];
fgets(templine,MAX_LINE_LENGTH,fp);//jumping over the first line
while(fgets(templine,MAX_LINE_LENGTH,fp)){
char *token = strtok(templine," ");
if(strcasecmp(token,w1)==0){
token = strtok(NULL," ");
int vectorindex = 0;
while (token != NULL)
{
double tempvalue = 0.0;
sscanf(token,"%lf",&tempvalue);
vectorsoforiginal[vectorindex++] = tempvalue;
token = strtok(NULL," ");
}
}
linecount++;
}
//WE TAKE THE ORIGINAL WORDS VECTORS.
//NOW WE HAVE TO TAKE COMING WORD'S VECTORS
int vInd=0;
rewind(fp);
fgets(templine,MAX_LINE_LENGTH,fp);//jumpoverthe firstline
while(fgets(templine,MAX_LINE_LENGTH,fp)){
char *token = strtok(templine," ");
if(strcasecmp(token,w2)==0){
int vectorindex = 0;
while(token != NULL){
double tempvalue=0.0;
sscanf(token,"%lf",&tempvalue);
vectors2[vectorindex++] = tempvalue;
if(vectorindex==98){
token = strtok(NULL,"\n");
}
else{
token = strtok(NULL," ");
}
}
break;
}
}
//now we have to calculate the dissimilarity between 2 vectors
double sum=0.0;
for(int i=0;i<100;i++){
//printf("vectororginal = %lf,vectors2 = %lf\n",vectorsoforiginal[i],vectors2[i]);
sum += fabs((vectorsoforiginal[i] - vectors2[i]) * (vectorsoforiginal[i] - vectors2[i]));
}
//double distance = sqrt(sum);
sum = sqrt(sum);
//printf("Distance between = %s, %s is = %lf\n",w1,w2,sum);
return sum;
}
int read_text(const char * text_file, const char * ignore_file, char words[][MAX_WORD_SIZE]){
int numOflinesIgnore =0;
FILE *ignoreF = NULL;
words[MAX_LINE_LENGTH][MAX_WORD_SIZE];
ignoreF=fopen(ignore_file,"r");
FILE *textF = fopen(text_file,"r");
if(ignoreF!=NULL){
char temp[MAX_LINE_LENGTH];
while(fgets(temp,MAX_LINE_LENGTH,ignoreF)){
numOflinesIgnore++;
}
rewind(ignoreF);
char ignoreStrs[numOflinesIgnore][MAX_WORD_SIZE];
int i=0;
char line[MAX_LINE_LENGTH];
while(fgets(line,MAX_LINE_LENGTH,ignoreF)){
sscanf(line,"%s",ignoreStrs[i]);
i++;
}
char comingChar;
char comingStr[50];
char cleanStr[50];
int count = 0;
int j=0;
fclose(ignoreF);
while(feof(textF) == 0){
comingChar = fgetc(textF);
if(isalnum(comingChar)){
cleanStr[count++] = comingChar;
}
if(comingChar == 32 || comingChar == '\n'){
cleanStr[count] = '\0';
count = 0;
if(strlen(cleanStr) >0){
if(isStrignored(cleanStr,ignoreStrs,i)){
strcopy(words[j],cleanStr);
j++;
}
}
}
}
strcpy(words[j],"-");
//words[j][1] = '\0';
return j;
}else{
printf("Error oppening file!");
}
}
/*READ DICTIONARY*/
int read_dict(const char * file_name, char dict[][MAX_WORD_SIZE]){
FILE *fp = fopen(file_name,"r");
int numWords=0;
int vecTorSize=0;
if(fp!=NULL){
int numofwords =0;
char line[MAX_LINE_LENGTH];
fgets(line,MAX_LINE_LENGTH,fp);
sscanf(line,"%*[^0-9]%d%*[^0-9]%d",&numWords,&vecTorSize);
//printf("numofwords = %d, vectorsize= %d",numWords,vecTorSize);
while(fgets(line,MAX_LINE_LENGTH,fp)){
sscanf(line,"%s",dict[numofwords]);
numofwords++;
}
dict[numofwords][0] = '-';
dict[numofwords][1] = '\0';
int m =0;
return numofwords;
}else{
return -1;
}
}
int checkIfInDict(char *word,char dict[][MAX_WORD_SIZE],int *dictPosition){
int walker = 0;
int found=-1;
while(dict[walker][0] != '-'){
if(strcasecmp(word,dict[walker])==0){
puts("Found.");
found = walker;
break;
}
walker++;
}
*dictPosition = found;
}
int checkTheBook(char *word,char book[][MAX_WORD_SIZE],int *matchCount){
int exactMatch =0;
int walker = 0;
while(strcmp(book[walker],"-") != 0){
if(strcmp(word,book[walker]) == 0){
(*matchCount) += 1;
}
walker++;
}
}
int main(int argc,char *argv[]){
char dict[1002][MAX_WORD_SIZE];
int vectorSize,wordsLen;
char *dictionaryfile = "dictionary.txt";
char *inputfile = "input.txt";
char *ignorefile = "ignore.txt";
char input[100];
char inputCopy[100];
int numofwords = read_dict(dictionaryfile,dict);
read_text(inputfile,ignorefile,words);
printf("Enter word(s): ");
fgets(input,100,stdin);
strcopy(inputCopy,input);
int i=0;
int wordCount=0;
int indexCounter = 0;
//COUNTING INPUTTED WORDS
char *token = strtok(input," ");
while(token !=NULL){
wordCount++;
// puts(token);
token = strtok(NULL," ");
}
//PUTTING INPUTS IN ENTEREDWORDS ARRAY
inputW *inputsa = (inputW*)malloc(wordCount*sizeof(inputW));
char enteredWords[wordCount][35];
token = strtok(inputCopy," ");
//WE PUT THE COMING INPUTS INSIDE EVERY INPUTW STRUCT
while(token !=NULL){
char *temp;
inputsa[indexCounter].wordText = (char*)malloc(35 * sizeof(char));
temp = getStrCleaned(token);
strcopy(inputsa[indexCounter].wordText,temp);
strcopy(enteredWords[indexCounter],temp);
indexCounter++;
token = strtok(NULL," ");
}
//FIRST WE CHECK IF THE GIVEN WORD IS IN THE INPUT FILE ITSELF, IF IT IS WE INCREMENT THE EXACTHMATCH INTEGER ON THE WORD OBJECT ITSELF
for(int a=0;a<wordCount;a++){
inputsa[a].exactMatch = 0;
int pTocount = inputsa[a].exactMatch;
int *posP = &inputsa[a].dictionaryPosition;
char *ptoWord = inputsa[a].wordText;
checkTheBook(ptoWord,words,&pTocount);
printf("exact match = %d",pTocount);
}
// NOW WE HAVE TO CHECK IF ANYWORD'S EXACT MATCH INT IS LESS THAN 1 IF THAT WE GONNA USE DICTIONARY
int posofOriginal = 0;
checkIfInDict(enteredWords[0],dict,&posofOriginal);
if(posofOriginal > 0){
puts("dictionaryde var");
for(int m=0;m<1001;m++){
double sum;
sum = doubledissimilarity("pigling",dict[m],dict,2.0,&posofOriginal);
printf("distance between = %s,%s is = %lf\n",enteredWords[0],dict[m],sum);
}
}
/*
for(int b=0;b<wordCount;b++){
checkIfInDict(inputsa[b].wordText,dict,&inputsa[b].dictionaryPosition);
if(inputsa[b].dictionaryPosition > 0){
puts("dictionaryde var");
posofOriginal = inputsa[b].dictionaryPosition;
for(int m=0;m<1001;m++){
double sum;
sum = doubledissimilarity(inputsa[b].wordText,dict[m],dict,2.0,&posofOriginal);
printf("distance between = %s,%s is = %lf\n",inputsa[b].wordText,dict[m],sum);
}
}
}
*/
//NOW WE GONNA SEND THE MATCHED STRING IN DIRECTORY TO DISSIMILARITY FUNCTION,
}Editor is loading...