Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
5.9 kB
1
Indexable
Never

////////////////慢版本

#define FILE_EXTENSION ".txt"
#include<fstream>
#include<string>
#include<cstring>
#include<vector>
#include<iostream>
#include<algorithm>
#include<queue>
using namespace std;
const int ALPHABET_SIZE = 26;
 
// trie node
struct TrieNode
{
    struct TrieNode *children[ALPHABET_SIZE];
 
    // isEndOfWord is true if the node represents
    // end of a word
    bool isEndOfWord;
};
 
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
    struct TrieNode *pNode =  new TrieNode;
 
    pNode->isEndOfWord = false;
 
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
 
    return pNode;
}
 
// If not present, inserts key into trie
// If the key is prefix of trie node, just
// marks leaf node
void insert(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        
		int index = tolower(key[i]) - 'a';
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
 
        pCrawl = pCrawl->children[index];
    }
 
    // mark last node as leaf
    pCrawl->isEndOfWord = true;
}
 
// Returns true if key presents in trie, else
// false
bool search_exact(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            return false;
 
        pCrawl = pCrawl->children[index];
    }
 
    return (pCrawl->isEndOfWord);
}

bool search_prefix(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            return false;
 
        pCrawl = pCrawl->children[index];
    }
 
    return true;
}
// Utility Func

// string parser : output vector of strings (words) after parsing
vector<string> word_parse(vector<string> tmp_string){
	vector<string> parse_string;
	for(auto& word : tmp_string){
		string new_str;
    	for(auto &ch : word){
			if(isalpha(ch))
				new_str.push_back(ch);
		}
		parse_string.emplace_back(new_str);
	}
	return parse_string;
}

vector<string> split(const string& str, const string& delim) {
	vector<string> res;
	if("" == str) return res;
	//先將要切割的字串從string型別轉換為char*型別
	char * strs = new char[str.length() + 1] ; //不要忘了
	strcpy(strs, str.c_str());

	char * d = new char[delim.length() + 1];
	strcpy(d, delim.c_str());

	char *p = strtok(strs, d);
	while(p) {
		string s = p; //分割得到的字串轉換為string型別
		res.push_back(s); //存入結果陣列
		p = strtok(NULL, d);
	}

	return res;
}

int main(int argc, char *argv[])
{
    string data_dir = argv[1] + string("/");
	string query = string(argv[2]);
	string output = string(argv[3]);
	
	// Read File & Parser Example

	string file, title_name, tmp, q_line;
	fstream fi,fo,fq;
	vector<string> tmp_string, q_string;
	string data_num=data_dir + string("0.txt");
	// priority_queue<pair<int,int>,vector<pair<int, int>>,greater<pair<int,int>>> pri_queue;
	int curnum;
	fq.open("query.txt");
	fo.open("output.txt");
	while(getline(fq,q_line)){
		curnum=0;
		data_num=data_dir + string("0.txt");
		int flag=0;
		q_string=split(q_line," ");
		// vector<string> _query = word_parse(q_string);
		fi.open(data_num, ios::in);
		while(fi){
			struct TrieNode *root = getNode();
			struct TrieNode *suffix_root = getNode();
			getline(fi, title_name);

			tmp_string = split(title_name, " ");

			vector<string> title = word_parse(tmp_string);

			for(auto &word : title){
				// fo << word << endl;
				insert(root,word);
				reverse(word.begin(), word.end());
				insert(suffix_root,word);
			}

			// GET CONTENT LINE BY LINE
			while(getline(fi, tmp)){

				// GET CONTENT WORD VECTOR
				tmp_string = split(tmp, " ");

				// PARSE CONTENT
				vector<string> content = word_parse(tmp_string);

				for(auto word : content){
					insert(root,word);
					reverse(word.begin(), word.end());
					insert(suffix_root,word);
				}
			}
			bool a,b;
			int and_=0;
			int or_=0;
			for(auto &i : q_string){
				if(i[0]!='+' && i[0]!='/'){
					if(i[0]=='\"') {
						string exa=i;
						exa.erase(exa.end()-1);
						exa.erase(0,1);
						b=search_exact(root, exa);
					}  
					else if(i[0]=='*')  {
						string rev=i;
						reverse(rev.begin(), rev.end());
						rev.erase(rev.end()-1);
						rev.erase(0,1);
						 b=search_prefix(suffix_root, rev);
					}
					else {
						b=search_prefix(root, i);
					} 
				}
				if(and_==0 && or_==0 && i[0]!='+' && i[0]!='/') a=b;
				if(and_==1){
					a=a&b;
					and_=0;
				}
				if(or_==1){
					a=a|b;
					or_=0;
				}
				if(i[0]=='+'){
					and_=1;
				}
				if(i[0]=='/'){
					or_=1;
				}
				
			}
			if(a) {
				// for(auto &word : title){
				// 	fo << word << endl;
				// }
				fo << title_name << endl;
				flag=1;
			}
			// fo << curnum << endl;
			fi.close();
			data_num= data_dir + to_string(curnum++)+ string(".txt");
			
			fi.open(data_num, ios::in);
		}
			if(flag==0) fo << "Not Found!" << endl;
			
	}
	fq.close();
	fo.close();
	fi.close();
    
}


// 1. UPPERCASE CHARACTER & LOWERCASE CHARACTER ARE SEEN AS SAME.
// 2. FOR SPECIAL CHARACTER OR DIGITS IN CONTENT OR TITLE -> PLEASE JUST IGNORE, YOU WONT NEED TO CONSIDER IT.
//    EG : "AB?AB" WILL BE SEEN AS "ABAB", "I AM SO SURPRISE!" WILL BE SEEN AS WORD ARRAY AS ["I", "AM", "SO", "SURPRISE"].
// 3. THE OPERATOR IN "QUERY.TXT" IS LEFT ASSOCIATIVE
//    EG : A + B / C == (A + B) / C

//

//////////////////////////////////////////////////////////