package prefuse.data.search; import java.util.Iterator; import java.util.LinkedList; import java.util.NoSuchElementException; import prefuse.data.Tuple; /** * A trie data structure for fast-lookup of words based on their * prefixes. The name "Trie" is a play on the words "tree" and * "retrieval". This class builds a tree structure representing a set of * words by their prefixes. It is useful for performing prefix-based * searches over large amounts of text in an efficient manner. * * @version 1.0 * @author jeffrey heer * @see PrefixSearchTupleSet */ public class Trie { /** * Base class for nodes in the trie structure. */ public class TrieNode { boolean isLeaf; int leafCount = 0; } /** * A TrieNode implementation representing a branch in the tree. The * class maintains a list of characters (the next character in the * prefix) and associated children TrieNodes for each. */ public class TrieBranch extends TrieNode { char[] chars = new char[] {0}; TrieNode[] children = new TrieNode[1]; } /** * A TrieNode implementation representing a leaf in the tree. The class * stores the word and tuple for the leaf, as well as a reference to the * successor leaf node in the trie. */ public class TrieLeaf extends TrieNode { public TrieLeaf(String word, Tuple t) { this.word = word; tuple = t; next = null; leafCount = 1; } String word; Tuple tuple; TrieLeaf next; } /** * An iterator for traversing a subtree of the Trie. */ public class TrieIterator implements Iterator { private LinkedList queue; public TrieIterator(TrieNode node) { queue = new LinkedList(); queue.add(node); } public boolean hasNext() { return !queue.isEmpty(); } public Object next() { if ( queue.isEmpty() ) throw new NoSuchElementException(); TrieNode n = (TrieNode)queue.removeFirst(); Object o; if ( n instanceof TrieLeaf ) { TrieLeaf l = (TrieLeaf)n; o = l.tuple; if ( l.next != null ) queue.addFirst(l.next); return o; } else { TrieBranch b = (TrieBranch)n; for ( int i = b.chars.length-1; i > 0; i-- ) { queue.addFirst(b.children[i]); } if ( b.children[0] != null ) queue.addFirst(b.children[0]); return next(); } } public void remove() { throw new UnsupportedOperationException(); } } // end of inner clas TrieIterator private TrieBranch root = new TrieBranch(); private boolean caseSensitive = false; /** * Create a new Trie with the specified case-sensitivity. * @param caseSensitive true if the index should be case sensitive for * indexed words, false otherwise. */ public Trie(boolean caseSensitive) { this.caseSensitive = caseSensitive; } /** * Indicates if this Trie's index takes the case of letters * into account. * @return true if the index is case-sensitive, false otherwise */ public boolean isCaseSensitive() { return caseSensitive; } /** * Add a new word to the trie, associated with the given Tuple. * @param word the word to add to the Trie * @param t the Tuple associated with the word */ public void addString(String word, Tuple t) { TrieLeaf leaf = new TrieLeaf(word,t); addLeaf(root, leaf, 0); } /** * Remove a word/Tuple pair from the trie. * @param word the word to remove * @param t the associate Tuple to remove */ public void removeString(String word, Tuple t) { removeLeaf(root, word, t, 0); } private final int getIndex(char[] chars, char c) { for ( int i=0; i= s.length() ? 0 : s.charAt(i) ); return ( caseSensitive ? c : Character.toLowerCase(c) ); } private final TrieNode equalityCheck(String word, TrieLeaf l) { if ( caseSensitive ) { return l.word.startsWith(word) ? l : null; } else { // do our own looping to avoid string allocation for case change int len = word.length(); if ( len > l.word.length() ) return null; for ( int i=0; i