critbit.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. package critbitgo
  2. import (
  3. "bytes"
  4. "encoding/hex"
  5. "fmt"
  6. "io"
  7. "os"
  8. "strconv"
  9. )
  10. // The matrix of most significant bit
  11. var msbMatrix [256]byte
  12. func buildMsbMatrix() {
  13. for i := 0; i < len(msbMatrix); i++ {
  14. b := byte(i)
  15. b |= b >> 1
  16. b |= b >> 2
  17. b |= b >> 4
  18. msbMatrix[i] = b &^ (b >> 1)
  19. }
  20. }
  21. type node struct {
  22. internal *internal
  23. external *external
  24. }
  25. type internal struct {
  26. child [2]node
  27. offset int
  28. bit byte
  29. cont bool // if true, key of child[1] contains key of child[0]
  30. }
  31. type external struct {
  32. key []byte
  33. value interface{}
  34. }
  35. // finding the critical bit.
  36. func (n *external) criticalBit(key []byte) (offset int, bit byte, cont bool) {
  37. nlen := len(n.key)
  38. klen := len(key)
  39. mlen := nlen
  40. if nlen > klen {
  41. mlen = klen
  42. }
  43. // find first differing byte and bit
  44. for offset = 0; offset < mlen; offset++ {
  45. if a, b := key[offset], n.key[offset]; a != b {
  46. bit = msbMatrix[a^b]
  47. return
  48. }
  49. }
  50. if nlen < klen {
  51. bit = msbMatrix[key[offset]]
  52. } else if nlen > klen {
  53. bit = msbMatrix[n.key[offset]]
  54. } else {
  55. // two keys are equal
  56. offset = -1
  57. }
  58. return offset, bit, true
  59. }
  60. // calculate direction.
  61. func (n *internal) direction(key []byte) int {
  62. if n.offset < len(key) && (key[n.offset]&n.bit != 0 || n.cont) {
  63. return 1
  64. }
  65. return 0
  66. }
  67. // Crit-bit Tree
  68. type Trie struct {
  69. root node
  70. size int
  71. }
  72. // searching the tree.
  73. func (t *Trie) search(key []byte) *node {
  74. n := &t.root
  75. for n.internal != nil {
  76. n = &n.internal.child[n.internal.direction(key)]
  77. }
  78. return n
  79. }
  80. // membership testing.
  81. func (t *Trie) Contains(key []byte) bool {
  82. if n := t.search(key); n.external != nil && bytes.Equal(n.external.key, key) {
  83. return true
  84. }
  85. return false
  86. }
  87. // get member.
  88. // if `key` is in Trie, `ok` is true.
  89. func (t *Trie) Get(key []byte) (value interface{}, ok bool) {
  90. if n := t.search(key); n.external != nil && bytes.Equal(n.external.key, key) {
  91. return n.external.value, true
  92. }
  93. return
  94. }
  95. // insert into the tree (replaceable).
  96. func (t *Trie) insert(key []byte, value interface{}, replace bool) bool {
  97. // an empty tree
  98. if t.size == 0 {
  99. t.root.external = &external{
  100. key: key,
  101. value: value,
  102. }
  103. t.size = 1
  104. return true
  105. }
  106. n := t.search(key)
  107. newOffset, newBit, newCont := n.external.criticalBit(key)
  108. // already exists in the tree
  109. if newOffset == -1 {
  110. if replace {
  111. n.external.value = value
  112. return true
  113. }
  114. return false
  115. }
  116. // allocate new node
  117. newNode := &internal{
  118. offset: newOffset,
  119. bit: newBit,
  120. cont: newCont,
  121. }
  122. direction := newNode.direction(key)
  123. newNode.child[direction].external = &external{
  124. key: key,
  125. value: value,
  126. }
  127. // insert new node
  128. wherep := &t.root
  129. for in := wherep.internal; in != nil; in = wherep.internal {
  130. if in.offset > newOffset || (in.offset == newOffset && in.bit < newBit) {
  131. break
  132. }
  133. wherep = &in.child[in.direction(key)]
  134. }
  135. if wherep.internal != nil {
  136. newNode.child[1-direction].internal = wherep.internal
  137. } else {
  138. newNode.child[1-direction].external = wherep.external
  139. wherep.external = nil
  140. }
  141. wherep.internal = newNode
  142. t.size += 1
  143. return true
  144. }
  145. // insert into the tree.
  146. // if `key` is alredy in Trie, return false.
  147. func (t *Trie) Insert(key []byte, value interface{}) bool {
  148. return t.insert(key, value, false)
  149. }
  150. // set into the tree.
  151. func (t *Trie) Set(key []byte, value interface{}) {
  152. t.insert(key, value, true)
  153. }
  154. // deleting elements.
  155. // if `key` is in Trie, `ok` is true.
  156. func (t *Trie) Delete(key []byte) (value interface{}, ok bool) {
  157. // an empty tree
  158. if t.size == 0 {
  159. return
  160. }
  161. var direction int
  162. var whereq *node // pointer to the grandparent
  163. var wherep *node = &t.root
  164. // finding the best candidate to delete
  165. for in := wherep.internal; in != nil; in = wherep.internal {
  166. direction = in.direction(key)
  167. whereq = wherep
  168. wherep = &in.child[direction]
  169. }
  170. // checking that we have the right element
  171. if !bytes.Equal(wherep.external.key, key) {
  172. return
  173. }
  174. value = wherep.external.value
  175. ok = true
  176. // removing the node
  177. if whereq == nil {
  178. wherep.external = nil
  179. } else {
  180. othern := whereq.internal.child[1-direction]
  181. whereq.internal = othern.internal
  182. whereq.external = othern.external
  183. }
  184. t.size -= 1
  185. return
  186. }
  187. // clearing a tree.
  188. func (t *Trie) Clear() {
  189. t.root.internal = nil
  190. t.root.external = nil
  191. t.size = 0
  192. }
  193. // return the number of key in a tree.
  194. func (t *Trie) Size() int {
  195. return t.size
  196. }
  197. // fetching elements with a given prefix.
  198. // handle is called with arguments key and value (if handle returns `false`, the iteration is aborted)
  199. func (t *Trie) Allprefixed(prefix []byte, handle func(key []byte, value interface{}) bool) bool {
  200. // an empty tree
  201. if t.size == 0 {
  202. return true
  203. }
  204. // walk tree, maintaining top pointer
  205. p := &t.root
  206. top := p
  207. if len(prefix) > 0 {
  208. for q := p.internal; q != nil; q = p.internal {
  209. p = &q.child[q.direction(prefix)]
  210. if q.offset < len(prefix) {
  211. top = p
  212. }
  213. }
  214. // check prefix
  215. if !bytes.Contains(p.external.key, prefix) {
  216. return true
  217. }
  218. }
  219. return allprefixed(top, handle)
  220. }
  221. func allprefixed(n *node, handle func([]byte, interface{}) bool) bool {
  222. if n.internal != nil {
  223. // dealing with an internal node while recursing
  224. for i := 0; i < 2; i++ {
  225. if !allprefixed(&n.internal.child[i], handle) {
  226. return false
  227. }
  228. }
  229. } else {
  230. // dealing with an external node while recursing
  231. return handle(n.external.key, n.external.value)
  232. }
  233. return true
  234. }
  235. // Search for the longest matching key from the beginning of the given key.
  236. // if `key` is in Trie, `ok` is true.
  237. func (t *Trie) LongestPrefix(given []byte) (key []byte, value interface{}, ok bool) {
  238. // an empty tree
  239. if t.size == 0 {
  240. return
  241. }
  242. return longestPrefix(&t.root, given)
  243. }
  244. func longestPrefix(n *node, key []byte) ([]byte, interface{}, bool) {
  245. if n.internal != nil {
  246. direction := n.internal.direction(key)
  247. if k, v, ok := longestPrefix(&n.internal.child[direction], key); ok {
  248. return k, v, ok
  249. }
  250. if direction == 1 {
  251. return longestPrefix(&n.internal.child[0], key)
  252. }
  253. } else {
  254. if bytes.HasPrefix(key, n.external.key) {
  255. return n.external.key, n.external.value, true
  256. }
  257. }
  258. return nil, nil, false
  259. }
  260. // Iterating elements from a given start key.
  261. // handle is called with arguments key and value (if handle returns `false`, the iteration is aborted)
  262. func (t *Trie) Walk(start []byte, handle func(key []byte, value interface{}) bool) bool {
  263. var seek bool
  264. if start != nil {
  265. seek = true
  266. }
  267. return walk(&t.root, start, &seek, handle)
  268. }
  269. func walk(n *node, key []byte, seek *bool, handle func([]byte, interface{}) bool) bool {
  270. if n.internal != nil {
  271. var direction int
  272. if *seek {
  273. direction = n.internal.direction(key)
  274. }
  275. if !walk(&n.internal.child[direction], key, seek, handle) {
  276. return false
  277. }
  278. if !(*seek) && direction == 0 {
  279. // iteration another side
  280. return walk(&n.internal.child[1], key, seek, handle)
  281. }
  282. return true
  283. } else {
  284. if *seek {
  285. if bytes.Equal(n.external.key, key) {
  286. // seek completed
  287. *seek = false
  288. } else {
  289. // key is not in Trie
  290. return false
  291. }
  292. }
  293. return handle(n.external.key, n.external.value)
  294. }
  295. }
  296. // dump tree. (for debugging)
  297. func (t *Trie) Dump(w io.Writer) {
  298. if t.root.internal == nil && t.root.external == nil {
  299. return
  300. }
  301. if w == nil {
  302. w = os.Stdout
  303. }
  304. dump(w, &t.root, true, "")
  305. }
  306. func dump(w io.Writer, n *node, right bool, prefix string) {
  307. var ownprefix string
  308. if right {
  309. ownprefix = prefix
  310. } else {
  311. ownprefix = prefix[:len(prefix)-1] + "`"
  312. }
  313. if in := n.internal; in != nil {
  314. fmt.Fprintf(w, "%s-- off=%d, bit=%08b(%02x), cont=%v\n", ownprefix, in.offset, in.bit, in.bit, in.cont)
  315. for i := 0; i < 2; i++ {
  316. var nextprefix string
  317. switch i {
  318. case 0:
  319. nextprefix = prefix + " |"
  320. right = true
  321. case 1:
  322. nextprefix = prefix + " "
  323. right = false
  324. }
  325. dump(w, &in.child[i], right, nextprefix)
  326. }
  327. } else {
  328. fmt.Fprintf(w, "%s-- key=%d (%s)\n", ownprefix, n.external.key, key2str(n.external.key))
  329. }
  330. return
  331. }
  332. func key2str(key []byte) string {
  333. for _, c := range key {
  334. if !strconv.IsPrint(rune(c)) {
  335. return hex.EncodeToString(key)
  336. }
  337. }
  338. return string(key)
  339. }
  340. // create a tree.
  341. func NewTrie() *Trie {
  342. return &Trie{}
  343. }
  344. func init() {
  345. buildMsbMatrix()
  346. }