123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- // Fully persistent data structures. A persistent data structure is a data
- // structure that always preserves the previous version of itself when
- // it is modified. Such data structures are effectively immutable,
- // as their operations do not update the structure in-place, but instead
- // always yield a new structure.
- //
- // Persistent
- // data structures typically share structure among themselves. This allows
- // operations to avoid copying the entire data structure.
- package ps
- import (
- "bytes"
- "fmt"
- "unsafe"
- )
- // A Map associates unique keys (type string) with values (type Any).
- type Map interface {
- // IsNil returns true if the Map is empty
- IsNil() bool
- // Set returns a new map in which key and value are associated.
- // If the key didn't exist before, it's created; otherwise, the
- // associated value is changed.
- // This operation is O(log N) in the number of keys.
- Set(key string, value interface{}) Map
- // UnsafeMutableSet returns the same map in which key and value are associated in-place.
- // If the key didn't exist before, it's created; otherwise, the
- // associated value is changed.
- // This operation is O(log N) in the number of keys.
- // Only use UnsafeMutableSet if you are the only reference-holder of the Map.
- UnsafeMutableSet(key string, value interface{}) Map
- // Delete returns a new map with the association for key, if any, removed.
- // This operation is O(log N) in the number of keys.
- Delete(key string) Map
- // Lookup returns the value associated with a key, if any. If the key
- // exists, the second return value is true; otherwise, false.
- // This operation is O(log N) in the number of keys.
- Lookup(key string) (interface{}, bool)
- // Size returns the number of key value pairs in the map.
- // This takes O(1) time.
- Size() int
- // ForEach executes a callback on each key value pair in the map.
- ForEach(f func(key string, val interface{}))
- // Keys returns a slice with all keys in this map.
- // This operation is O(N) in the number of keys.
- Keys() []string
- String() string
- }
- // Immutable (i.e. persistent) associative array
- const childCount = 8
- const shiftSize = 3
- type tree struct {
- count int
- hash uint64 // hash of the key (used for tree balancing)
- key string
- value interface{}
- children [childCount]*tree
- }
- var nilMap = &tree{}
- // Recursively set nilMap's subtrees to point at itself.
- // This eliminates all nil pointers in the map structure.
- // All map nodes are created by cloning this structure so
- // they avoid the problem too.
- func init() {
- for i := range nilMap.children {
- nilMap.children[i] = nilMap
- }
- }
- // NewMap allocates a new, persistent map from strings to values of
- // any type.
- // This is currently implemented as a path-copying binary tree.
- func NewMap() Map {
- return nilMap
- }
- func (self *tree) IsNil() bool {
- return self == nilMap
- }
- // clone returns an exact duplicate of a tree node
- func (self *tree) clone() *tree {
- var m tree
- m = *self
- return &m
- }
- // constants for FNV-1a hash algorithm
- const (
- offset64 uint64 = 14695981039346656037
- prime64 uint64 = 1099511628211
- )
- type unsafeString struct {
- Data uintptr
- Len int
- }
- type unsafeSlice struct {
- Data uintptr
- Len int
- Cap int
- }
- var zeroByteSlice = []byte{}
- // bytesView returns a view of the string as a []byte.
- // It doesn't incur allocation and copying caused by conversion but it's
- // unsafe, use with care.
- func bytesView(v string) []byte {
- if len(v) == 0 {
- return zeroByteSlice
- }
- sx := (*unsafeString)(unsafe.Pointer(&v))
- bx := unsafeSlice{sx.Data, sx.Len, sx.Len}
- return *(*[]byte)(unsafe.Pointer(&bx))
- }
- // hashKey returns a hash code for a given string
- func hashKey(key string) uint64 {
- hash := offset64
- for _, b := range bytesView(key) {
- hash ^= uint64(b)
- hash *= prime64
- }
- return hash
- }
- // Set returns a new map similar to this one but with key and value
- // associated. If the key didn't exist, it's created; otherwise, the
- // associated value is changed.
- func (self *tree) Set(key string, value interface{}) Map {
- hash := hashKey(key)
- return setLowLevel(self, hash, hash, key, value)
- }
- func setLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree {
- if self.IsNil() { // an empty tree is easy
- m := self.clone()
- m.count = 1
- m.hash = hash
- m.key = key
- m.value = value
- return m
- }
- if hash != self.hash {
- m := self.clone()
- i := partialHash % childCount
- m.children[i] = setLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value)
- // update count if we added a new object
- if m.children[i].count > self.children[i].count {
- m.count++
- }
- return m
- }
- // did we find a hash collision?
- if key != self.key {
- oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key)
- panic(oops)
- }
- // replacing a key's previous value
- m := self.clone()
- m.value = value
- return m
- }
- // UnsafeMutableSet is the in-place mutable version of Set. Only use if
- // you are the only reference-holder of the Map.
- func (self *tree) UnsafeMutableSet(key string, value interface{}) Map {
- hash := hashKey(key)
- return mutableSetLowLevel(self, hash, hash, key, value)
- }
- func mutableSetLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree {
- if self.IsNil() { // an empty tree is easy
- m := self.clone()
- m.count = 1
- m.hash = hash
- m.key = key
- m.value = value
- return m
- }
- if hash != self.hash {
- i := partialHash % childCount
- oldChildCount := self.children[i].count
- self.children[i] = mutableSetLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value)
- // update count if we added a new object
- if oldChildCount < self.children[i].count {
- self.count++
- }
- return self
- }
- // did we find a hash collision?
- if key != self.key {
- oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key)
- panic(oops)
- }
- // replacing a key's previous value
- self.value = value
- return self
- }
- // modifies a map by recalculating its key count based on the counts
- // of its subtrees
- func recalculateCount(m *tree) {
- count := 0
- for _, t := range m.children {
- count += t.Size()
- }
- m.count = count + 1 // add one to count ourself
- }
- func (m *tree) Delete(key string) Map {
- hash := hashKey(key)
- newMap, _ := deleteLowLevel(m, hash, hash)
- return newMap
- }
- func deleteLowLevel(self *tree, partialHash, hash uint64) (*tree, bool) {
- // empty trees are easy
- if self.IsNil() {
- return self, false
- }
- if hash != self.hash {
- i := partialHash % childCount
- child, found := deleteLowLevel(self.children[i], partialHash>>shiftSize, hash)
- if !found {
- return self, false
- }
- newMap := self.clone()
- newMap.children[i] = child
- recalculateCount(newMap)
- return newMap, true // ? this wasn't in the original code
- }
- // we must delete our own node
- if self.isLeaf() { // we have no children
- return nilMap, true
- }
- /*
- if self.subtreeCount() == 1 { // only one subtree
- for _, t := range self.children {
- if t != nilMap {
- return t, true
- }
- }
- panic("Tree with 1 subtree actually had no subtrees")
- }
- */
- // find a node to replace us
- i := -1
- size := -1
- for j, t := range self.children {
- if t.Size() > size {
- i = j
- size = t.Size()
- }
- }
- // make chosen leaf smaller
- replacement, child := self.children[i].deleteLeftmost()
- newMap := replacement.clone()
- for j := range self.children {
- if j == i {
- newMap.children[j] = child
- } else {
- newMap.children[j] = self.children[j]
- }
- }
- recalculateCount(newMap)
- return newMap, true
- }
- // delete the leftmost node in a tree returning the node that
- // was deleted and the tree left over after its deletion
- func (m *tree) deleteLeftmost() (*tree, *tree) {
- if m.isLeaf() {
- return m, nilMap
- }
- for i, t := range m.children {
- if t != nilMap {
- deleted, child := t.deleteLeftmost()
- newMap := m.clone()
- newMap.children[i] = child
- recalculateCount(newMap)
- return deleted, newMap
- }
- }
- panic("Tree isn't a leaf but also had no children. How does that happen?")
- }
- // isLeaf returns true if this is a leaf node
- func (m *tree) isLeaf() bool {
- return m.Size() == 1
- }
- // returns the number of child subtrees we have
- func (m *tree) subtreeCount() int {
- count := 0
- for _, t := range m.children {
- if t != nilMap {
- count++
- }
- }
- return count
- }
- func (m *tree) Lookup(key string) (interface{}, bool) {
- hash := hashKey(key)
- return lookupLowLevel(m, hash, hash)
- }
- func lookupLowLevel(self *tree, partialHash, hash uint64) (interface{}, bool) {
- if self.IsNil() { // an empty tree is easy
- return nil, false
- }
- if hash != self.hash {
- i := partialHash % childCount
- return lookupLowLevel(self.children[i], partialHash>>shiftSize, hash)
- }
- // we found it
- return self.value, true
- }
- func (m *tree) Size() int {
- return m.count
- }
- func (m *tree) ForEach(f func(key string, val interface{})) {
- if m.IsNil() {
- return
- }
- // ourself
- f(m.key, m.value)
- // children
- for _, t := range m.children {
- if t != nilMap {
- t.ForEach(f)
- }
- }
- }
- func (m *tree) Keys() []string {
- keys := make([]string, m.Size())
- i := 0
- m.ForEach(func(k string, v interface{}) {
- keys[i] = k
- i++
- })
- return keys
- }
- // make it easier to display maps for debugging
- func (m *tree) String() string {
- keys := m.Keys()
- buf := bytes.NewBufferString("{")
- for _, key := range keys {
- val, _ := m.Lookup(key)
- fmt.Fprintf(buf, "%s: %s, ", key, val)
- }
- fmt.Fprintf(buf, "}\n")
- return buf.String()
- }
|