map.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. // Fully persistent data structures. A persistent data structure is a data
  2. // structure that always preserves the previous version of itself when
  3. // it is modified. Such data structures are effectively immutable,
  4. // as their operations do not update the structure in-place, but instead
  5. // always yield a new structure.
  6. //
  7. // Persistent
  8. // data structures typically share structure among themselves. This allows
  9. // operations to avoid copying the entire data structure.
  10. package ps
  11. import (
  12. "bytes"
  13. "fmt"
  14. "unsafe"
  15. )
  16. // A Map associates unique keys (type string) with values (type Any).
  17. type Map interface {
  18. // IsNil returns true if the Map is empty
  19. IsNil() bool
  20. // Set returns a new map in which key and value are associated.
  21. // If the key didn't exist before, it's created; otherwise, the
  22. // associated value is changed.
  23. // This operation is O(log N) in the number of keys.
  24. Set(key string, value interface{}) Map
  25. // UnsafeMutableSet returns the same map in which key and value are associated in-place.
  26. // If the key didn't exist before, it's created; otherwise, the
  27. // associated value is changed.
  28. // This operation is O(log N) in the number of keys.
  29. // Only use UnsafeMutableSet if you are the only reference-holder of the Map.
  30. UnsafeMutableSet(key string, value interface{}) Map
  31. // Delete returns a new map with the association for key, if any, removed.
  32. // This operation is O(log N) in the number of keys.
  33. Delete(key string) Map
  34. // Lookup returns the value associated with a key, if any. If the key
  35. // exists, the second return value is true; otherwise, false.
  36. // This operation is O(log N) in the number of keys.
  37. Lookup(key string) (interface{}, bool)
  38. // Size returns the number of key value pairs in the map.
  39. // This takes O(1) time.
  40. Size() int
  41. // ForEach executes a callback on each key value pair in the map.
  42. ForEach(f func(key string, val interface{}))
  43. // Keys returns a slice with all keys in this map.
  44. // This operation is O(N) in the number of keys.
  45. Keys() []string
  46. String() string
  47. }
  48. // Immutable (i.e. persistent) associative array
  49. const childCount = 8
  50. const shiftSize = 3
  51. type tree struct {
  52. count int
  53. hash uint64 // hash of the key (used for tree balancing)
  54. key string
  55. value interface{}
  56. children [childCount]*tree
  57. }
  58. var nilMap = &tree{}
  59. // Recursively set nilMap's subtrees to point at itself.
  60. // This eliminates all nil pointers in the map structure.
  61. // All map nodes are created by cloning this structure so
  62. // they avoid the problem too.
  63. func init() {
  64. for i := range nilMap.children {
  65. nilMap.children[i] = nilMap
  66. }
  67. }
  68. // NewMap allocates a new, persistent map from strings to values of
  69. // any type.
  70. // This is currently implemented as a path-copying binary tree.
  71. func NewMap() Map {
  72. return nilMap
  73. }
  74. func (self *tree) IsNil() bool {
  75. return self == nilMap
  76. }
  77. // clone returns an exact duplicate of a tree node
  78. func (self *tree) clone() *tree {
  79. var m tree
  80. m = *self
  81. return &m
  82. }
  83. // constants for FNV-1a hash algorithm
  84. const (
  85. offset64 uint64 = 14695981039346656037
  86. prime64 uint64 = 1099511628211
  87. )
  88. type unsafeString struct {
  89. Data uintptr
  90. Len int
  91. }
  92. type unsafeSlice struct {
  93. Data uintptr
  94. Len int
  95. Cap int
  96. }
  97. var zeroByteSlice = []byte{}
  98. // bytesView returns a view of the string as a []byte.
  99. // It doesn't incur allocation and copying caused by conversion but it's
  100. // unsafe, use with care.
  101. func bytesView(v string) []byte {
  102. if len(v) == 0 {
  103. return zeroByteSlice
  104. }
  105. sx := (*unsafeString)(unsafe.Pointer(&v))
  106. bx := unsafeSlice{sx.Data, sx.Len, sx.Len}
  107. return *(*[]byte)(unsafe.Pointer(&bx))
  108. }
  109. // hashKey returns a hash code for a given string
  110. func hashKey(key string) uint64 {
  111. hash := offset64
  112. for _, b := range bytesView(key) {
  113. hash ^= uint64(b)
  114. hash *= prime64
  115. }
  116. return hash
  117. }
  118. // Set returns a new map similar to this one but with key and value
  119. // associated. If the key didn't exist, it's created; otherwise, the
  120. // associated value is changed.
  121. func (self *tree) Set(key string, value interface{}) Map {
  122. hash := hashKey(key)
  123. return setLowLevel(self, hash, hash, key, value)
  124. }
  125. func setLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree {
  126. if self.IsNil() { // an empty tree is easy
  127. m := self.clone()
  128. m.count = 1
  129. m.hash = hash
  130. m.key = key
  131. m.value = value
  132. return m
  133. }
  134. if hash != self.hash {
  135. m := self.clone()
  136. i := partialHash % childCount
  137. m.children[i] = setLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value)
  138. // update count if we added a new object
  139. if m.children[i].count > self.children[i].count {
  140. m.count++
  141. }
  142. return m
  143. }
  144. // did we find a hash collision?
  145. if key != self.key {
  146. oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key)
  147. panic(oops)
  148. }
  149. // replacing a key's previous value
  150. m := self.clone()
  151. m.value = value
  152. return m
  153. }
  154. // UnsafeMutableSet is the in-place mutable version of Set. Only use if
  155. // you are the only reference-holder of the Map.
  156. func (self *tree) UnsafeMutableSet(key string, value interface{}) Map {
  157. hash := hashKey(key)
  158. return mutableSetLowLevel(self, hash, hash, key, value)
  159. }
  160. func mutableSetLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree {
  161. if self.IsNil() { // an empty tree is easy
  162. m := self.clone()
  163. m.count = 1
  164. m.hash = hash
  165. m.key = key
  166. m.value = value
  167. return m
  168. }
  169. if hash != self.hash {
  170. i := partialHash % childCount
  171. oldChildCount := self.children[i].count
  172. self.children[i] = mutableSetLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value)
  173. // update count if we added a new object
  174. if oldChildCount < self.children[i].count {
  175. self.count++
  176. }
  177. return self
  178. }
  179. // did we find a hash collision?
  180. if key != self.key {
  181. oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key)
  182. panic(oops)
  183. }
  184. // replacing a key's previous value
  185. self.value = value
  186. return self
  187. }
  188. // modifies a map by recalculating its key count based on the counts
  189. // of its subtrees
  190. func recalculateCount(m *tree) {
  191. count := 0
  192. for _, t := range m.children {
  193. count += t.Size()
  194. }
  195. m.count = count + 1 // add one to count ourself
  196. }
  197. func (m *tree) Delete(key string) Map {
  198. hash := hashKey(key)
  199. newMap, _ := deleteLowLevel(m, hash, hash)
  200. return newMap
  201. }
  202. func deleteLowLevel(self *tree, partialHash, hash uint64) (*tree, bool) {
  203. // empty trees are easy
  204. if self.IsNil() {
  205. return self, false
  206. }
  207. if hash != self.hash {
  208. i := partialHash % childCount
  209. child, found := deleteLowLevel(self.children[i], partialHash>>shiftSize, hash)
  210. if !found {
  211. return self, false
  212. }
  213. newMap := self.clone()
  214. newMap.children[i] = child
  215. recalculateCount(newMap)
  216. return newMap, true // ? this wasn't in the original code
  217. }
  218. // we must delete our own node
  219. if self.isLeaf() { // we have no children
  220. return nilMap, true
  221. }
  222. /*
  223. if self.subtreeCount() == 1 { // only one subtree
  224. for _, t := range self.children {
  225. if t != nilMap {
  226. return t, true
  227. }
  228. }
  229. panic("Tree with 1 subtree actually had no subtrees")
  230. }
  231. */
  232. // find a node to replace us
  233. i := -1
  234. size := -1
  235. for j, t := range self.children {
  236. if t.Size() > size {
  237. i = j
  238. size = t.Size()
  239. }
  240. }
  241. // make chosen leaf smaller
  242. replacement, child := self.children[i].deleteLeftmost()
  243. newMap := replacement.clone()
  244. for j := range self.children {
  245. if j == i {
  246. newMap.children[j] = child
  247. } else {
  248. newMap.children[j] = self.children[j]
  249. }
  250. }
  251. recalculateCount(newMap)
  252. return newMap, true
  253. }
  254. // delete the leftmost node in a tree returning the node that
  255. // was deleted and the tree left over after its deletion
  256. func (m *tree) deleteLeftmost() (*tree, *tree) {
  257. if m.isLeaf() {
  258. return m, nilMap
  259. }
  260. for i, t := range m.children {
  261. if t != nilMap {
  262. deleted, child := t.deleteLeftmost()
  263. newMap := m.clone()
  264. newMap.children[i] = child
  265. recalculateCount(newMap)
  266. return deleted, newMap
  267. }
  268. }
  269. panic("Tree isn't a leaf but also had no children. How does that happen?")
  270. }
  271. // isLeaf returns true if this is a leaf node
  272. func (m *tree) isLeaf() bool {
  273. return m.Size() == 1
  274. }
  275. // returns the number of child subtrees we have
  276. func (m *tree) subtreeCount() int {
  277. count := 0
  278. for _, t := range m.children {
  279. if t != nilMap {
  280. count++
  281. }
  282. }
  283. return count
  284. }
  285. func (m *tree) Lookup(key string) (interface{}, bool) {
  286. hash := hashKey(key)
  287. return lookupLowLevel(m, hash, hash)
  288. }
  289. func lookupLowLevel(self *tree, partialHash, hash uint64) (interface{}, bool) {
  290. if self.IsNil() { // an empty tree is easy
  291. return nil, false
  292. }
  293. if hash != self.hash {
  294. i := partialHash % childCount
  295. return lookupLowLevel(self.children[i], partialHash>>shiftSize, hash)
  296. }
  297. // we found it
  298. return self.value, true
  299. }
  300. func (m *tree) Size() int {
  301. return m.count
  302. }
  303. func (m *tree) ForEach(f func(key string, val interface{})) {
  304. if m.IsNil() {
  305. return
  306. }
  307. // ourself
  308. f(m.key, m.value)
  309. // children
  310. for _, t := range m.children {
  311. if t != nilMap {
  312. t.ForEach(f)
  313. }
  314. }
  315. }
  316. func (m *tree) Keys() []string {
  317. keys := make([]string, m.Size())
  318. i := 0
  319. m.ForEach(func(k string, v interface{}) {
  320. keys[i] = k
  321. i++
  322. })
  323. return keys
  324. }
  325. // make it easier to display maps for debugging
  326. func (m *tree) String() string {
  327. keys := m.Keys()
  328. buf := bytes.NewBufferString("{")
  329. for _, key := range keys {
  330. val, _ := m.Lookup(key)
  331. fmt.Fprintf(buf, "%s: %s, ", key, val)
  332. }
  333. fmt.Fprintf(buf, "}\n")
  334. return buf.String()
  335. }