hash.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. // Copyright The OpenTelemetry Authors
  2. // SPDX-License-Identifier: Apache-2.0
  3. package pdatautil // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil"
  4. import (
  5. "encoding/binary"
  6. "math"
  7. "sort"
  8. "sync"
  9. "github.com/cespare/xxhash/v2"
  10. "go.opentelemetry.io/collector/pdata/pcommon"
  11. )
  12. var (
  13. extraByte = []byte{'\xf3'}
  14. keyPrefix = []byte{'\xf4'}
  15. valEmpty = []byte{'\xf5'}
  16. valBytesPrefix = []byte{'\xf6'}
  17. valStrPrefix = []byte{'\xf7'}
  18. valBoolTrue = []byte{'\xf8'}
  19. valBoolFalse = []byte{'\xf9'}
  20. valIntPrefix = []byte{'\xfa'}
  21. valDoublePrefix = []byte{'\xfb'}
  22. valMapPrefix = []byte{'\xfc'}
  23. valMapSuffix = []byte{'\xfd'}
  24. valSlicePrefix = []byte{'\xfe'}
  25. valSliceSuffix = []byte{'\xff'}
  26. emptyHash = [16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
  27. )
  28. type hashWriter struct {
  29. byteBuf []byte
  30. keysBuf []string
  31. }
  32. func newHashWriter() *hashWriter {
  33. return &hashWriter{
  34. byteBuf: make([]byte, 0, 512),
  35. keysBuf: make([]string, 0, 16),
  36. }
  37. }
  38. var hashWriterPool = &sync.Pool{
  39. New: func() any { return newHashWriter() },
  40. }
  41. // MapHash return a hash for the provided map.
  42. // Maps with the same underlying key/value pairs in different order produce the same deterministic hash value.
  43. func MapHash(m pcommon.Map) [16]byte {
  44. if m.Len() == 0 {
  45. return emptyHash
  46. }
  47. hw := hashWriterPool.Get().(*hashWriter)
  48. defer hashWriterPool.Put(hw)
  49. hw.byteBuf = hw.byteBuf[:0]
  50. hw.writeMapHash(m)
  51. return hw.hashSum128()
  52. }
  53. // ValueHash return a hash for the provided pcommon.Value.
  54. func ValueHash(v pcommon.Value) [16]byte {
  55. hw := hashWriterPool.Get().(*hashWriter)
  56. defer hashWriterPool.Put(hw)
  57. hw.byteBuf = hw.byteBuf[:0]
  58. hw.writeValueHash(v)
  59. return hw.hashSum128()
  60. }
  61. func (hw *hashWriter) writeMapHash(m pcommon.Map) {
  62. // For each recursive call into this function we want to preserve the previous buffer state
  63. // while also adding new keys to the buffer. nextIndex is the index of the first new key
  64. // added to the buffer for this call of the function.
  65. // This also works for the first non-recursive call of this function because the buffer is always empty
  66. // on the first call due to it being cleared of any added keys at then end of the function.
  67. nextIndex := len(hw.keysBuf)
  68. m.Range(func(k string, v pcommon.Value) bool {
  69. hw.keysBuf = append(hw.keysBuf, k)
  70. return true
  71. })
  72. // Get only the newly added keys from the buffer by slicing the buffer from nextIndex to the end
  73. workingKeySet := hw.keysBuf[nextIndex:]
  74. sort.Strings(workingKeySet)
  75. for _, k := range workingKeySet {
  76. v, _ := m.Get(k)
  77. hw.byteBuf = append(hw.byteBuf, keyPrefix...)
  78. hw.byteBuf = append(hw.byteBuf, k...)
  79. hw.writeValueHash(v)
  80. }
  81. // Remove all keys that were added to the buffer during this call of the function
  82. hw.keysBuf = hw.keysBuf[:nextIndex]
  83. }
  84. func (hw *hashWriter) writeValueHash(v pcommon.Value) {
  85. switch v.Type() {
  86. case pcommon.ValueTypeStr:
  87. hw.byteBuf = append(hw.byteBuf, valStrPrefix...)
  88. hw.byteBuf = append(hw.byteBuf, v.Str()...)
  89. case pcommon.ValueTypeBool:
  90. if v.Bool() {
  91. hw.byteBuf = append(hw.byteBuf, valBoolTrue...)
  92. } else {
  93. hw.byteBuf = append(hw.byteBuf, valBoolFalse...)
  94. }
  95. case pcommon.ValueTypeInt:
  96. hw.byteBuf = append(hw.byteBuf, valIntPrefix...)
  97. hw.byteBuf = binary.LittleEndian.AppendUint64(hw.byteBuf, uint64(v.Int()))
  98. case pcommon.ValueTypeDouble:
  99. hw.byteBuf = append(hw.byteBuf, valDoublePrefix...)
  100. hw.byteBuf = binary.LittleEndian.AppendUint64(hw.byteBuf, math.Float64bits(v.Double()))
  101. case pcommon.ValueTypeMap:
  102. hw.byteBuf = append(hw.byteBuf, valMapPrefix...)
  103. hw.writeMapHash(v.Map())
  104. hw.byteBuf = append(hw.byteBuf, valMapSuffix...)
  105. case pcommon.ValueTypeSlice:
  106. sl := v.Slice()
  107. hw.byteBuf = append(hw.byteBuf, valSlicePrefix...)
  108. for i := 0; i < sl.Len(); i++ {
  109. hw.writeValueHash(sl.At(i))
  110. }
  111. hw.byteBuf = append(hw.byteBuf, valSliceSuffix...)
  112. case pcommon.ValueTypeBytes:
  113. hw.byteBuf = append(hw.byteBuf, valBytesPrefix...)
  114. hw.byteBuf = append(hw.byteBuf, v.Bytes().AsRaw()...)
  115. case pcommon.ValueTypeEmpty:
  116. hw.byteBuf = append(hw.byteBuf, valEmpty...)
  117. }
  118. }
  119. // hashSum128 returns a [16]byte hash sum.
  120. func (hw *hashWriter) hashSum128() [16]byte {
  121. r := [16]byte{}
  122. res := r[:]
  123. h := xxhash.Sum64(hw.byteBuf)
  124. res = binary.LittleEndian.AppendUint64(res[:0], h)
  125. // Append an extra byte to generate another part of the hash sum
  126. hw.byteBuf = append(hw.byteBuf, extraByte...)
  127. h = xxhash.Sum64(hw.byteBuf)
  128. _ = binary.LittleEndian.AppendUint64(res[8:], h)
  129. return r
  130. }