关于Trie树请参见另一篇博客
Jetty-util包里提供了Trie树的实现。 先来看Trie接口
/* ------------------------------------------------------------ *//** A Trie String lookup data structure. * @paramthe Trie entry type */public interface Trie { /* ------------------------------------------------------------ */ /** Put and entry into the Trie * @param s The key for the entry * @param v The value of the entry * @return True if the Trie had capacity to add the field. */ public boolean put(String s, V v); /* ------------------------------------------------------------ */ /** Put a value as both a key and a value. * @param v The value and key * @return True if the Trie had capacity to add the field. */ public boolean put(V v); /* ------------------------------------------------------------ */ public V remove(String s); /* ------------------------------------------------------------ */ /** Get and exact match from a String key * @param s The key * @return the value for the string key */ public V get(String s); /* ------------------------------------------------------------ */ /** Get and exact match from a String key * @param s The key * @param offset The offset within the string of the key * @param len the length of the key * @return the value for the string / offset / length */ public V get(String s,int offset,int len); /* ------------------------------------------------------------ */ /** Get and exact match from a segment of a ByteBuufer as key * @param b The buffer * @return The value or null if not found */ public V get(ByteBuffer b); /* ------------------------------------------------------------ */ /** Get and exact match from a segment of a ByteBuufer as key * @param b The buffer * @param offset The offset within the buffer of the key * @param len the length of the key * @return The value or null if not found */ public V get(ByteBuffer b,int offset,int len); /* ------------------------------------------------------------ */ /** Get the best match from key in a String. * @param s The string * @return The value or null if not found */ public V getBest(String s); /* ------------------------------------------------------------ */ /** Get the best match from key in a String. * @param s The string * @param offset The offset within the string of the key * @param len the length of the key * @return The value or null if not found */ public V getBest(String s,int offset,int len); /* ------------------------------------------------------------ */ /** Get the best match from key in a byte array. * The key is assumed to by ISO_8859_1 characters. * @param b The buffer * @param offset The offset within the array of the key * @param len the length of the key * @return The value or null if not found */ public V getBest(byte[] b,int offset,int len); /* ------------------------------------------------------------ */ /** Get the best match from key in a byte buffer. * The key is assumed to by ISO_8859_1 characters. * @param b The buffer * @param offset The offset within the buffer of the key * @param len the length of the key * @return The value or null if not found */ public V getBest(ByteBuffer b,int offset,int len); /* ------------------------------------------------------------ */ public Set keySet(); /* ------------------------------------------------------------ */ public boolean isFull(); /* ------------------------------------------------------------ */ public boolean isCaseInsensitive(); /* ------------------------------------------------------------ */ public void clear();}
Trie树的方法定义基本可以类比为Map。 接着来看关于Trie的抽象类,AbstractTrie类。
/** * Abstract Trie implementation. *Provides some common implementations, which may not be the most * efficient. For byte operations, the assumption is made that the charset * is ISO-8859-1
* * @paramthe type of object that the Trie holds */public abstract class AbstractTrie implements Trie { final boolean _caseInsensitive; protected AbstractTrie(boolean insensitive) { _caseInsensitive=insensitive; } @Override public boolean put(V v) { return put(v.toString(),v); } @Override public V remove(String s) { V o=get(s); put(s,null); return o; } @Override public V get(String s) { return get(s,0,s.length()); } @Override public V get(ByteBuffer b) { return get(b,0,b.remaining()); } @Override public V getBest(String s) { return getBest(s,0,s.length()); } @Override public V getBest(byte[] b, int offset, int len) { return getBest(new String(b,offset,len,StandardCharsets.ISO_8859_1)); } @Override public boolean isCaseInsensitive() { return _caseInsensitive; }}
AbstractTrie基本定义了几个重写方法的指向。
再接着看具体的实现类,最常见的ArrayTrie。public class ArrayTrieextends AbstractTrie { /** * The Size of a Trie row is how many characters can be looked * up directly without going to a big index. This is set at * 32 to cover case insensitive alphabet and a few other common * characters. */ private static final int ROW_SIZE = 32; /** * The index lookup table, this maps a character as a byte * (ISO-8859-1 or UTF8) to an index within a Trie row */ private static final int[] __lookup = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F /*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1, /*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1, /*4*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /*6*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, }; /** * The Trie rows in a single array which allows a lookup of row,character * to the next row in the Trie. This is actually a 2 dimensional * array that has been flattened to achieve locality of reference. * The first ROW_SIZE entries are for row 0, then next ROW_SIZE * entries are for row 1 etc. So in general instead of using * _rows[row][index], we use _rows[row*ROW_SIZE+index] to look up * the next row for a given character. * * The array is of characters rather than integers to save space. */ private final char[] _rowIndex; /** * The key (if any) for a Trie row. * A row may be a leaf, a node or both in the Trie tree. */ private final String[] _key; /** * The value (if any) for a Trie row. * A row may be a leaf, a node or both in the Trie tree. */ private final V[] _value; /** * A big index for each row. * If a character outside of the lookup map is needed, * then a big index will be created for the row, with * 256 entries, one for each possible byte. */ private char[][] _bigIndex; /** * The number of rows allocated */ private char _rows; public ArrayTrie() { this(128); } /* ------------------------------------------------------------ */ /** * @param capacity The capacity of the trie, which at the worst case * is the total number of characters of all keys stored in the Trie. * The capacity needed is dependent of the shared prefixes of the keys. * For example, a capacity of 6 nodes is required to store keys "foo" * and "bar", but a capacity of only 4 is required to * store "bar" and "bat". */ @SuppressWarnings("unchecked") public ArrayTrie(int capacity) { super(true); _value=(V[])new Object[capacity]; _rowIndex=new char[capacity*32]; _key=new String[capacity]; } /* ------------------------------------------------------------ */ @Override public void clear() { _rows=0; Arrays.fill(_value,null); Arrays.fill(_rowIndex,(char)0); Arrays.fill(_key,null); } /* ------------------------------------------------------------ */ @Override public boolean put(String s, V v) { int t=0; int k; int limit = s.length(); for(k=0; k < limit; k++) { char c=s.charAt(k); int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; t=_rowIndex[idx]; if (t==0) { if (++_rows>=_value.length) return false; t=_rowIndex[idx]=_rows; } } else if (c>127) throw new IllegalArgumentException("non ascii character"); else { if (_bigIndex==null) _bigIndex=new char[_value.length][]; if (t>=_bigIndex.length) return false; char[] big=_bigIndex[t]; if (big==null) big=_bigIndex[t]=new char[128]; t=big[c]; if (t==0) { if (_rows==_value.length) return false; t=big[c]=++_rows; } } } if (t>=_key.length) { _rows=(char)_key.length; return false; } _key[t]=v==null?null:s; _value[t] = v; return true; } /* ------------------------------------------------------------ */ @Override public V get(String s, int offset, int len) { int t = 0; for(int i=0; i < len; i++) { char c=s.charAt(offset+i); int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; t=_rowIndex[idx]; if (t==0) return null; } else { char[] big = _bigIndex==null?null:_bigIndex[t]; if (big==null) return null; t=big[c]; if (t==0) return null; } } return _value[t]; } /* ------------------------------------------------------------ */ @Override public V get(ByteBuffer b,int offset,int len) { int t = 0; for(int i=0; i < len; i++) { byte c=b.get(offset+i); int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; t=_rowIndex[idx]; if (t==0) return null; } else { char[] big = _bigIndex==null?null:_bigIndex[t]; if (big==null) return null; t=big[c]; if (t==0) return null; } } return (V)_value[t]; } /* ------------------------------------------------------------ */ @Override public V getBest(byte[] b,int offset,int len) { return getBest(0,b,offset,len); } /* ------------------------------------------------------------ */ @Override public V getBest(ByteBuffer b,int offset,int len) { if (b.hasArray()) return getBest(0,b.array(),b.arrayOffset()+b.position()+offset,len); return getBest(0,b,offset,len); } /* ------------------------------------------------------------ */ @Override public V getBest(String s, int offset, int len) { return getBest(0,s,offset,len); } /* ------------------------------------------------------------ */ private V getBest(int t, String s, int offset, int len) { int pos=offset; for(int i=0; i < len; i++) { char c=s.charAt(pos++); int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; int nt=_rowIndex[idx]; if (nt==0) break; t=nt; } else { char[] big = _bigIndex==null?null:_bigIndex[t]; if (big==null) return null; int nt=big[c]; if (nt==0) break; t=nt; } // Is the next Trie is a match if (_key[t]!=null) { // Recurse so we can remember this possibility V best=getBest(t,s,offset+i+1,len-i-1); if (best!=null) return best; return (V)_value[t]; } } return (V)_value[t]; } /* ------------------------------------------------------------ */ private V getBest(int t,byte[] b,int offset,int len) { for(int i=0; i < len; i++) { byte c=b[offset+i]; int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; int nt=_rowIndex[idx]; if (nt==0) break; t=nt; } else { char[] big = _bigIndex==null?null:_bigIndex[t]; if (big==null) return null; int nt=big[c]; if (nt==0) break; t=nt; } // Is the next Trie is a match if (_key[t]!=null) { // Recurse so we can remember this possibility V best=getBest(t,b,offset+i+1,len-i-1); if (best!=null) return best; break; } } return (V)_value[t]; } private V getBest(int t,ByteBuffer b,int offset,int len) { int pos=b.position()+offset; for(int i=0; i < len; i++) { byte c=b.get(pos++); int index=__lookup[c&0x7f]; if (index>=0) { int idx=t*ROW_SIZE+index; int nt=_rowIndex[idx]; if (nt==0) break; t=nt; } else { char[] big = _bigIndex==null?null:_bigIndex[t]; if (big==null) return null; int nt=big[c]; if (nt==0) break; t=nt; } // Is the next Trie is a match if (_key[t]!=null) { // Recurse so we can remember this possibility V best=getBest(t,b,offset+i+1,len-i-1); if (best!=null) return best; break; } } return (V)_value[t]; } @Override public String toString() { StringBuilder buf = new StringBuilder(); toString(buf,0); if (buf.length()==0) return "{}"; buf.setCharAt(0,'{'); buf.append('}'); return buf.toString(); } private void toString(Appendable out, int t) { if (_value[t]!=null) { try { out.append(','); out.append(_key[t]); out.append('='); out.append(_value[t].toString()); } catch (IOException e) { throw new RuntimeException(e); } } for(int i=0; i < ROW_SIZE; i++) { int idx=t*ROW_SIZE+i; if (_rowIndex[idx] != 0) toString(out,_rowIndex[idx]); } char[] big = _bigIndex==null?null:_bigIndex[t]; if (big!=null) { for (int i:big) if (i!=0) toString(out,i); } } @Override public Set keySet() { Set keys = new HashSet<>(); keySet(keys,0); return keys; } private void keySet(Set set, int t) { if (t<_value.length&&_value[t]!=null) set.add(_key[t]); for(int i=0; i < ROW_SIZE; i++) { int idx=t*ROW_SIZE+i; if (idx<_rowIndex.length && _rowIndex[idx] != 0) keySet(set,_rowIndex[idx]); } char[] big = _bigIndex==null||t>=_bigIndex.length?null:_bigIndex[t]; if (big!=null) { for (int i:big) if (i!=0) keySet(set,i); } } @Override public boolean isFull() { return _rows+1>=_key.length; }}
先看几个参数: 1 int ROW_SIZE=32,代表了26个字母表和其他6个常用字符,依次为字母a-z(不区分大小写)以及符号(+-:;.space)。不需要大索引即可在Trie树中查找的字符数。 2 int[] __lookup,索引查找表,字符映射为Trie行的一个索引byte。看索引表里>=0的位置,位于20、2B、2D、2E、3A、3B以及41至5A和61至7A的位置,查找ASCII编码表,可知分别为空格、加号、减号、句号、冒号、分号以及A-Z和a-z这32个字符。 3 char[] _rowIndex,一个允许查找的array,第一行的ROW_SIZE为0,下一行ROW_SIZE则为1。所以我们一般不使用_rows[row][index],而用_rows[row*ROW_SIZE+index]来找到给定字符的行。 4 String[] _key,Trie行的标记,可以是leaf、node或同时存在。 V[] _value,key对应的值 5 char[][] _bigIndex,当一个字符在索引查找表外时,一个大索引就建立了,标示了256项可能的byte。 6 char _rows,分配的行数,指针。
然后看构造方法,默认为ArrayTrie(128)。@SuppressWarnings("unchecked") public ArrayTrie(int capacity) { super(true); //不区分大小写 _value=(V[])new Object[capacity]; //128个V的数组,用于存放值 _rowIndex=new char[capacity*32]; //128*32个字符,用于存放行索引 _key=new String[capacity]; //128个字符串数组,用于存放key }
然后再看主要方法put、get和getBest。
@Override public boolean put(String s, V v) { int t=0; //行数 int k; int limit = s.length(); for(k=0; k < limit; k++) { char c=s.charAt(k); //遍历字符串s的字符 int index=__lookup[c&0x7f]; //0x7f=127,8位相当于首位清零 if (index>=0) //是32个字符之一 { int idx=t*ROW_SIZE+index; t=_rowIndex[idx]; //第一行时t=0 if (t==0) { if (++_rows>=_value.length)//行数指针下移,_value.length默认128 return false; t=_rowIndex[idx]=_rows;//将行数指针放入行索引 } } else if (c>127) throw new IllegalArgumentException("non ascii character"); else { if (_bigIndex==null) _bigIndex=new char[_value.length][]; if (t>=_bigIndex.length) return false; char[] big=_bigIndex[t]; if (big==null) big=_bigIndex[t]=new char[128]; t=big[c]; if (t==0) { if (_rows==_value.length) return false; t=big[c]=++_rows; } } } if (t>=_key.length) { _rows=(char)_key.length; return false; } _key[t]=v==null?null:s; _value[t] = v; return true; }