AhoCorasick

mail@pastecode.io avatar
unknown
c_cpp
3 years ago
2.2 kB
1
Indexable
Never
const int ALPHABETSIZE = 26;
const char ALPHABEGIN = 'a';

struct Vertex {
  int children[ALPHABETSIZE];
  int parentId = -1;
  char parentChar;
  int suffixLink;
  int outputLink;
  bool isWord = false;
  int wordId = -1;

  Vertex() {
    memset(children, -1, ALPHABETSIZE*sizeof(int));
  }
};

class Aho {
public:
  vector<string> words;
  vector<Vertex> trie;

  void buildTrie() {
    for(int i = 0; i < words.size(); ++i) {
      int curr = 0;
      for(auto c : words[i]) {
	Vertex& currVertex = trie[curr];
	int charIndex = c - ALPHABEGIN;
	  
	if(currVertex.children[charIndex] == -1) {
	  Vertex v = Vertex();
	  v.parentChar = c;
	  v.parentId = curr;
	  currVertex.children[charIndex] = trie.size();
	  trie.push_back(v);
	}

	curr = trie[curr].children[charIndex];
      }

      trie[curr].isWord = true;
      trie[curr].wordId = i;
    }
  }

  void buildLinks() {
    deque<int> queue;
    queue.push_back(0);

    while(!queue.empty()) {
      int curr = queue.front();
      Vertex& currVertex = trie[curr];
      queue.pop_front();

      if(curr == 0 || currVertex.parentId == 0) {
	currVertex.suffixLink = 0;
	currVertex.outputLink = 0;
      }
      else {
	char pch = currVertex.parentChar;
	int pCharIndex = pch - ALPHABEGIN;
	int next = trie[currVertex.parentId].suffixLink;
	
	while(true) {
	  Vertex& nextVertex = trie[next];
	  if(nextVertex.parentId == -1) {
	    currVertex.suffixLink = 0;
	    break;
	  }
	  else if(nextVertex.children[pCharIndex] != -1) {
	    currVertex.suffixLink = nextVertex.children[pCharIndex];
	    break;
	  }

	  next = nextVertex.suffixLink;
	}
      }

      if(trie[currVertex.suffixLink].isWord)
	currVertex.outputLink = currVertex.suffixLink;
      else
	currVertex.outputLink = trie[currVertex.suffixLink].outputLink;

      for(int i = 0; i < ALPHABETSIZE; ++i) {
	if(currVertex.children[i] != -1)
	  queue.push_back(currVertex.children[i]);
      }
    }
  }

  Aho(vector<string> pat) {
    words = pat;
    trie.push_back(Vertex());  // root
    
    buildTrie();
    buildLinks();
  }
};