不要segmentation fault
unknown
c_cpp
2 years ago
7.3 kB
6
Indexable
#define FILE_EXTENSION ".txt"
#include<fstream>
#include<string>
#include<cstring>
#include<vector>
#include<iostream>
#include<unordered_map>
#include<bitset>
#include <filesystem>
#include<stdio.h>
#pragma GCC optimize("Ofast")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-ftree-tail-merge")
#include <chrono>
#define MAX 100005
#define DATA_MAX 11000
using namespace std;
namespace fs = std::filesystem;
int num = 0;
int charToIndex(char& c) {
if (c >= 'A' && c <= 'Z')
return c - 'A';
else if (c >= 'a' && c <= 'z')
return c - 'a';
return -1;
}
// class Trie {
// public:
int trie[MAX][26];
bitset<DATA_MAX> ed[MAX], suffix[MAX], prefix[MAX];
bitset<DATA_MAX> NOT_FOUND;
// Trie() {}
void insert(string& word, int &index) {
int root = 0;
int len = word.size();
for(char &c : word){
int p = charToIndex(c);
if(trie[root][p] == 0)
trie[root][p]= ++num;
root = trie[root][p];
prefix[root][index]=1;
}
ed[root][index] = 1;
root = 0;
for(int i = len-1; i >= 0; --i){
int p = charToIndex(word[i]);
if(trie[root][p] == 0)
trie[root][p] = ++num;
root = trie[root][p];
suffix[root][index] = 1;
}
}
bitset<DATA_MAX> WildCardSearch(string word, int index, int root) {
int len = word.size();
if (word[index] == '*') {
while (index+1 < len && word[index+1] == '*') {
index++; // avoid multiple '*'
}
if (index == len-2) {
return prefix[root];
}
bitset<DATA_MAX> result;
for (int i = 0; i < 26; ++i) {
if (trie[root][i] != 0 && result == NOT_FOUND) {
result = (WildCardSearch(word, index, trie[root][i]) | WildCardSearch(word, index + 1, root));
}
}
return result;
}
else {
int charIndex = charToIndex(word[index]);
if (trie[root][charIndex] == 0) {
return NOT_FOUND;
}
if(index == len-2) {
return ed[root];
}
return WildCardSearch(word, index + 1, trie[root][charIndex]);
}
}
bitset<DATA_MAX> SuffixSearch(string& word) {
int root = 0;
int len = word.size();
for(int i = len-1; i >= 0; --i) {
if(isalpha(word[i])) {
int index = charToIndex(word[i]);
if(trie[root][index] == 0)
return NOT_FOUND;
root = trie[root][index];
}
}
return suffix[root];
}
bitset<DATA_MAX> search(string& word, bool pre = false) {
int root = 0;
for(auto &c: word) {
if(isalpha(c)) {
int index = charToIndex(c);
if(trie[root][index] == 0)
return NOT_FOUND;
root = trie[root][index];
}
}
if(pre == false) return ed[root];
return prefix[root];
}
bitset<DATA_MAX> startsWith(string& pre) {
return search(pre, true);
}
// };
// Utility Func
// string parser : output vector of strings (words) after parsing
vector<string> word_parse(vector<string> tmp_string){
vector<string> parse_string;
for(auto& word : tmp_string){
string new_str;
for(auto &ch : word){
if(isalpha(ch))
new_str += ch;
}
parse_string.emplace_back(new_str);
}
return parse_string;
}
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
char * strs = new char[str.length() + 1] ;
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p;
res.push_back(s);
p = strtok(nullptr, d);
}
return res;
}
int checkType(string& s) {
if(s == "/" || s == "-" || s == "+") // operator
return 4;
else if(s[0] == '"') {
return 0; // exact word
}
else if(s[0] == '*' && s[s.size()-1] == '*') {
return 2; // suffix word
}
else if(s[0] == '<' && s[s.size()-1] == '>') {
return 3; // wildcard search
}
else {
return 1; // prefix
}
}
int main(int argc, char *argv[]) {
// INPUT :
// 1. data directory in data folder
// 2. number of txt files
// 3. output route
auto start_time = std::chrono::high_resolution_clock::now();
string data_dir = argv[1] + string("/");
string query = string(argv[2]);
ifstream fi(query, ios::in);
string output = string(argv[3]);
string file, title_name, tmp, q_tmp;
FILE *outputFile = freopen(output.c_str(), "w", stdout);
vector<string> tmp_string;
int cnt = 0; // count of data
const int EXACT = 0;
const int PREFIX = 1;
const int SUFFIX = 2;
const int WILDCARD = 3;
// GET TITLENAME WORD ARRAY
vector<string> title;
char str[MAX];
// Trie* tr = new Trie();
while (1) {
string current_file_path = data_dir + to_string(cnt) + ".txt";
FILE *inputFile = freopen(current_file_path.c_str(), "r", stdin);
if (inputFile == nullptr) {
break;
}
fgets(str, MAX, stdin);
title.emplace_back(str);
vector<string> tmp_title = word_parse(split(str, " "));
for(auto &word : tmp_title) {
insert(word, cnt);
}
while (fgets(str, MAX, stdin)) {
// GET CONTENT WORD VECTOR
vector<string> tmp_string = split(str, " ");
// PARSE CONTENT
vector<string> content = word_parse(tmp_string);
for (auto &word : content) {
insert(word, cnt);
}
}
++cnt;
fclose(inputFile);
}
bitset<DATA_MAX> ans(0);
bitset<DATA_MAX> title_tmp(0);
// int n = 1;
while(getline(fi, q_tmp)) { // get seperate query
int merge_type = -1;
vector<string> q = split(q_tmp, " ");
// cout << n++<< endl;
for(auto &it_query : q) { // iterate all the query
int type = checkType(it_query);
if(type == 4) {
if(it_query == "/") {
merge_type = 0;
}
else if(it_query == "-") {
merge_type = 1;
}
else if(it_query == "+") {
merge_type = 2;
}
continue;
}
if(type == EXACT) {
title_tmp = search(it_query);
}
else if(type == PREFIX) {
title_tmp = startsWith(it_query);
}
else if(type == SUFFIX) {
title_tmp = SuffixSearch(it_query);
}
else if(type == WILDCARD) {
// if(data->WildCardSearch(it_query, 1, data->root))
// title_tmp[i] = 1;
title_tmp = WildCardSearch(it_query, 1, 0);
}
if(merge_type == 0) { // OR
ans |= title_tmp;
}
else if(merge_type == 1) { // NOT
ans &= ~title_tmp;
}
else if(merge_type == 2) { // AND
ans &= title_tmp;
}
else { // -1, don't need to merge
ans = title_tmp;
}
title_tmp.reset();
merge_type = -1;
}
bool found = false;
for(int i = 0; i < cnt; ++i) {
if(ans[i] == 1) {
for(auto &c: title[i]) {
putchar(c);
found = true;
}
}
}
if(found == false)
fputs("Not Found!\n", stdout);
ans.reset();
}
fi.close();
fclose(outputFile);
}
// 1. UPPERCASE CHARACTER & LOWERCASE CHARACTER ARE SEEN AS SAME.
// 2. FOR SPECIAL CHARACTER OR DIGITS IN CONTENT OR TITLE -> PLEASE JUST IGNORE, YOU WONT NEED TO CONSIDER IT.
// EG : "AB?AB" WILL BE SEEN AS "ABAB", "I AM SO SURPRISE!" WILL BE SEEN AS WORD ARRAY AS ["I", "AM", "SO", "SURPRISE"].
// 3. THE OPERATOR IN "QUERY.TXT" IS LEFT ASSOCIATIVE
// EG : A + B / C == (A + B) / C
//
//////////////////////////////////////////////////////////Editor is loading...
Leave a Comment