Untitled
unknown
plain_text
4 years ago
5.9 kB
8
Indexable
////////////////慢版本
#define FILE_EXTENSION ".txt"
#include<fstream>
#include<string>
#include<cstring>
#include<vector>
#include<iostream>
#include<algorithm>
#include<queue>
using namespace std;
const int ALPHABET_SIZE = 26;
// trie node
struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];
// isEndOfWord is true if the node represents
// end of a word
bool isEndOfWord;
};
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
// If not present, inserts key into trie
// If the key is prefix of trie node, just
// marks leaf node
void insert(struct TrieNode *root, string key)
{
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++)
{
int index = tolower(key[i]) - 'a';
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search_exact(struct TrieNode *root, string key)
{
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++)
{
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl->isEndOfWord);
}
bool search_prefix(struct TrieNode *root, string key)
{
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++)
{
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return true;
}
// Utility Func
// string parser : output vector of strings (words) after parsing
vector<string> word_parse(vector<string> tmp_string){
vector<string> parse_string;
for(auto& word : tmp_string){
string new_str;
for(auto &ch : word){
if(isalpha(ch))
new_str.push_back(ch);
}
parse_string.emplace_back(new_str);
}
return parse_string;
}
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
//先將要切割的字串從string型別轉換為char*型別
char * strs = new char[str.length() + 1] ; //不要忘了
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p; //分割得到的字串轉換為string型別
res.push_back(s); //存入結果陣列
p = strtok(NULL, d);
}
return res;
}
int main(int argc, char *argv[])
{
string data_dir = argv[1] + string("/");
string query = string(argv[2]);
string output = string(argv[3]);
// Read File & Parser Example
string file, title_name, tmp, q_line;
fstream fi,fo,fq;
vector<string> tmp_string, q_string;
string data_num=data_dir + string("0.txt");
// priority_queue<pair<int,int>,vector<pair<int, int>>,greater<pair<int,int>>> pri_queue;
int curnum;
fq.open("query.txt");
fo.open("output.txt");
while(getline(fq,q_line)){
curnum=0;
data_num=data_dir + string("0.txt");
int flag=0;
q_string=split(q_line," ");
// vector<string> _query = word_parse(q_string);
fi.open(data_num, ios::in);
while(fi){
struct TrieNode *root = getNode();
struct TrieNode *suffix_root = getNode();
getline(fi, title_name);
tmp_string = split(title_name, " ");
vector<string> title = word_parse(tmp_string);
for(auto &word : title){
// fo << word << endl;
insert(root,word);
reverse(word.begin(), word.end());
insert(suffix_root,word);
}
// GET CONTENT LINE BY LINE
while(getline(fi, tmp)){
// GET CONTENT WORD VECTOR
tmp_string = split(tmp, " ");
// PARSE CONTENT
vector<string> content = word_parse(tmp_string);
for(auto word : content){
insert(root,word);
reverse(word.begin(), word.end());
insert(suffix_root,word);
}
}
bool a,b;
int and_=0;
int or_=0;
for(auto &i : q_string){
if(i[0]!='+' && i[0]!='/'){
if(i[0]=='\"') {
string exa=i;
exa.erase(exa.end()-1);
exa.erase(0,1);
b=search_exact(root, exa);
}
else if(i[0]=='*') {
string rev=i;
reverse(rev.begin(), rev.end());
rev.erase(rev.end()-1);
rev.erase(0,1);
b=search_prefix(suffix_root, rev);
}
else {
b=search_prefix(root, i);
}
}
if(and_==0 && or_==0 && i[0]!='+' && i[0]!='/') a=b;
if(and_==1){
a=a&b;
and_=0;
}
if(or_==1){
a=a|b;
or_=0;
}
if(i[0]=='+'){
and_=1;
}
if(i[0]=='/'){
or_=1;
}
}
if(a) {
// for(auto &word : title){
// fo << word << endl;
// }
fo << title_name << endl;
flag=1;
}
// fo << curnum << endl;
fi.close();
data_num= data_dir + to_string(curnum++)+ string(".txt");
fi.open(data_num, ios::in);
}
if(flag==0) fo << "Not Found!" << endl;
}
fq.close();
fo.close();
fi.close();
}
// 1. UPPERCASE CHARACTER & LOWERCASE CHARACTER ARE SEEN AS SAME.
// 2. FOR SPECIAL CHARACTER OR DIGITS IN CONTENT OR TITLE -> PLEASE JUST IGNORE, YOU WONT NEED TO CONSIDER IT.
// EG : "AB?AB" WILL BE SEEN AS "ABAB", "I AM SO SURPRISE!" WILL BE SEEN AS WORD ARRAY AS ["I", "AM", "SO", "SURPRISE"].
// 3. THE OPERATOR IN "QUERY.TXT" IS LEFT ASSOCIATIVE
// EG : A + B / C == (A + B) / C
//
//////////////////////////////////////////////////////////
Editor is loading...