package prefuse.data.search;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import prefuse.data.Tuple;
/**
* A trie data structure for fast-lookup of words based on their
* prefixes. The name "Trie" is a play on the words "tree" and
* "retrieval". This class builds a tree structure representing a set of
* words by their prefixes. It is useful for performing prefix-based
* searches over large amounts of text in an efficient manner.
*
* @version 1.0
* @author jeffrey heer
* @see PrefixSearchTupleSet
*/
public class Trie {
/**
* Base class for nodes in the trie structure.
*/
public class TrieNode {
boolean isLeaf;
int leafCount = 0;
}
/**
* A TrieNode implementation representing a branch in the tree. The
* class maintains a list of characters (the next character in the
* prefix) and associated children TrieNodes for each.
*/
public class TrieBranch extends TrieNode {
char[] chars = new char[] {0};
TrieNode[] children = new TrieNode[1];
}
/**
* A TrieNode implementation representing a leaf in the tree. The class
* stores the word and tuple for the leaf, as well as a reference to the
* successor leaf node in the trie.
*/
public class TrieLeaf extends TrieNode {
public TrieLeaf(String word, Tuple t) {
this.word = word;
tuple = t;
next = null;
leafCount = 1;
}
String word;
Tuple tuple;
TrieLeaf next;
}
/**
* An iterator for traversing a subtree of the Trie.
*/
public class TrieIterator implements Iterator {
private LinkedList queue;
public TrieIterator(TrieNode node) {
queue = new LinkedList();
queue.add(node);
}
public boolean hasNext() {
return !queue.isEmpty();
}
public Object next() {
if ( queue.isEmpty() )
throw new NoSuchElementException();
TrieNode n = (TrieNode)queue.removeFirst();
Object o;
if ( n instanceof TrieLeaf ) {
TrieLeaf l = (TrieLeaf)n;
o = l.tuple;
if ( l.next != null )
queue.addFirst(l.next);
return o;
} else {
TrieBranch b = (TrieBranch)n;
for ( int i = b.chars.length-1; i > 0; i-- ) {
queue.addFirst(b.children[i]);
}
if ( b.children[0] != null )
queue.addFirst(b.children[0]);
return next();
}
}
public void remove() {
throw new UnsupportedOperationException();
}
} // end of inner clas TrieIterator
private TrieBranch root = new TrieBranch();
private boolean caseSensitive = false;
/**
* Create a new Trie with the specified case-sensitivity.
* @param caseSensitive true if the index should be case sensitive for
* indexed words, false otherwise.
*/
public Trie(boolean caseSensitive) {
this.caseSensitive = caseSensitive;
}
/**
* Indicates if this Trie's index takes the case of letters
* into account.
* @return true if the index is case-sensitive, false otherwise
*/
public boolean isCaseSensitive() {
return caseSensitive;
}
/**
* Add a new word to the trie, associated with the given Tuple.
* @param word the word to add to the Trie
* @param t the Tuple associated with the word
*/
public void addString(String word, Tuple t) {
TrieLeaf leaf = new TrieLeaf(word,t);
addLeaf(root, leaf, 0);
}
/**
* Remove a word/Tuple pair from the trie.
* @param word the word to remove
* @param t the associate Tuple to remove
*/
public void removeString(String word, Tuple t) {
removeLeaf(root, word, t, 0);
}
private final int getIndex(char[] chars, char c) {
for ( int i=0; i= s.length() ? 0 : s.charAt(i) );
return ( caseSensitive ? c : Character.toLowerCase(c) );
}
private final TrieNode equalityCheck(String word, TrieLeaf l) {
if ( caseSensitive ) {
return l.word.startsWith(word) ? l : null;
} else {
// do our own looping to avoid string allocation for case change
int len = word.length();
if ( len > l.word.length() ) return null;
for ( int i=0; i