metrics.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // Copyright The OpenTelemetry Authors
  2. // SPDX-License-Identifier: Apache-2.0
  3. package vcenterreceiver // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/vcenterreceiver"
  4. import (
  5. "context"
  6. "github.com/vmware/govmomi/performance"
  7. "github.com/vmware/govmomi/vim25/mo"
  8. "github.com/vmware/govmomi/vim25/types"
  9. "go.opentelemetry.io/collector/pdata/pcommon"
  10. "go.opentelemetry.io/collector/receiver/scrapererror"
  11. "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/vcenterreceiver/internal/metadata"
  12. )
  13. func (v *vcenterMetricScraper) recordHostSystemMemoryUsage(
  14. now pcommon.Timestamp,
  15. hs mo.HostSystem,
  16. ) {
  17. s := hs.Summary
  18. h := s.Hardware
  19. z := s.QuickStats
  20. v.mb.RecordVcenterHostMemoryUsageDataPoint(now, int64(z.OverallMemoryUsage))
  21. memUtilization := 100 * float64(z.OverallMemoryUsage) / float64(h.MemorySize>>20)
  22. v.mb.RecordVcenterHostMemoryUtilizationDataPoint(now, memUtilization)
  23. v.mb.RecordVcenterHostCPUUsageDataPoint(now, int64(z.OverallCpuUsage))
  24. cpuUtilization := 100 * float64(z.OverallCpuUsage) / float64(int32(h.NumCpuCores)*h.CpuMhz)
  25. v.mb.RecordVcenterHostCPUUtilizationDataPoint(now, cpuUtilization)
  26. }
  27. func (v *vcenterMetricScraper) recordVMUsages(
  28. now pcommon.Timestamp,
  29. vm mo.VirtualMachine,
  30. hs mo.HostSystem,
  31. ) {
  32. memUsage := vm.Summary.QuickStats.GuestMemoryUsage
  33. balloonedMem := vm.Summary.QuickStats.BalloonedMemory
  34. swappedMem := vm.Summary.QuickStats.SwappedMemory
  35. swappedSSDMem := vm.Summary.QuickStats.SsdSwappedMemory
  36. if totalMemory := vm.Summary.Config.MemorySizeMB; totalMemory > 0 && memUsage > 0 {
  37. memoryUtilization := float64(memUsage) / float64(totalMemory) * 100
  38. v.mb.RecordVcenterVMMemoryUtilizationDataPoint(now, memoryUtilization)
  39. }
  40. v.mb.RecordVcenterVMMemoryUsageDataPoint(now, int64(memUsage))
  41. v.mb.RecordVcenterVMMemoryBalloonedDataPoint(now, int64(balloonedMem))
  42. v.mb.RecordVcenterVMMemorySwappedDataPoint(now, int64(swappedMem))
  43. v.mb.RecordVcenterVMMemorySwappedSsdDataPoint(now, swappedSSDMem)
  44. diskUsed := vm.Summary.Storage.Committed
  45. diskFree := vm.Summary.Storage.Uncommitted
  46. v.mb.RecordVcenterVMDiskUsageDataPoint(now, diskUsed, metadata.AttributeDiskStateUsed)
  47. v.mb.RecordVcenterVMDiskUsageDataPoint(now, diskFree, metadata.AttributeDiskStateAvailable)
  48. if diskFree != 0 {
  49. diskUtilization := float64(diskUsed) / float64(diskFree+diskUsed) * 100
  50. v.mb.RecordVcenterVMDiskUtilizationDataPoint(now, diskUtilization)
  51. }
  52. cpuUsage := vm.Summary.QuickStats.OverallCpuUsage
  53. if cpuUsage == 0 {
  54. // Most likely the VM is unavailable or is unreachable.
  55. return
  56. }
  57. v.mb.RecordVcenterVMCPUUsageDataPoint(now, int64(cpuUsage))
  58. // https://communities.vmware.com/t5/VMware-code-Documents/Resource-Management/ta-p/2783456
  59. // VirtualMachine.runtime.maxCpuUsage is a property of the virtual machine, indicating the limit value.
  60. // This value is always equal to the limit value set for that virtual machine.
  61. // If no limit, it has full host mhz * vm.Config.Hardware.NumCPU.
  62. cpuLimit := vm.Config.Hardware.NumCPU * hs.Summary.Hardware.CpuMhz
  63. if vm.Runtime.MaxCpuUsage != 0 {
  64. cpuLimit = vm.Runtime.MaxCpuUsage
  65. }
  66. if cpuLimit == 0 {
  67. // This shouldn't happen, but protect against division by zero.
  68. return
  69. }
  70. v.mb.RecordVcenterVMCPUUtilizationDataPoint(now, 100*float64(cpuUsage)/float64(cpuLimit))
  71. }
  72. func (v *vcenterMetricScraper) recordDatastoreProperties(
  73. now pcommon.Timestamp,
  74. ds mo.Datastore,
  75. ) {
  76. s := ds.Summary
  77. diskUsage := s.Capacity - s.FreeSpace
  78. diskUtilization := float64(diskUsage) / float64(s.Capacity) * 100
  79. v.mb.RecordVcenterDatastoreDiskUsageDataPoint(now, diskUsage, metadata.AttributeDiskStateUsed)
  80. v.mb.RecordVcenterDatastoreDiskUsageDataPoint(now, s.FreeSpace, metadata.AttributeDiskStateAvailable)
  81. v.mb.RecordVcenterDatastoreDiskUtilizationDataPoint(now, diskUtilization)
  82. }
  83. func (v *vcenterMetricScraper) recordResourcePool(
  84. now pcommon.Timestamp,
  85. rp mo.ResourcePool,
  86. ) {
  87. s := rp.Summary.GetResourcePoolSummary()
  88. if s.QuickStats != nil {
  89. v.mb.RecordVcenterResourcePoolCPUUsageDataPoint(now, s.QuickStats.OverallCpuUsage)
  90. v.mb.RecordVcenterResourcePoolMemoryUsageDataPoint(now, s.QuickStats.GuestMemoryUsage)
  91. }
  92. v.mb.RecordVcenterResourcePoolCPUSharesDataPoint(now, int64(s.Config.CpuAllocation.Shares.Shares))
  93. v.mb.RecordVcenterResourcePoolMemorySharesDataPoint(now, int64(s.Config.MemoryAllocation.Shares.Shares))
  94. }
  95. var hostPerfMetricList = []string{
  96. // network metrics
  97. "net.bytesTx.average",
  98. "net.bytesRx.average",
  99. "net.packetsTx.summation",
  100. "net.packetsRx.summation",
  101. "net.usage.average",
  102. "net.errorsRx.summation",
  103. "net.errorsTx.summation",
  104. // disk metrics
  105. "virtualDisk.totalWriteLatency.average",
  106. "disk.deviceReadLatency.average",
  107. "disk.deviceWriteLatency.average",
  108. "disk.kernelReadLatency.average",
  109. "disk.kernelWriteLatency.average",
  110. "disk.maxTotalLatency.latest",
  111. "disk.read.average",
  112. "disk.write.average",
  113. }
  114. func (v *vcenterMetricScraper) recordHostPerformanceMetrics(
  115. ctx context.Context,
  116. host mo.HostSystem,
  117. errs *scrapererror.ScrapeErrors,
  118. ) {
  119. spec := types.PerfQuerySpec{
  120. Entity: host.Reference(),
  121. MaxSample: 5,
  122. Format: string(types.PerfFormatNormal),
  123. MetricId: []types.PerfMetricId{{Instance: "*"}},
  124. // right now we are only grabbing real time metrics from the performance
  125. // manager
  126. IntervalId: int32(20),
  127. }
  128. info, err := v.client.performanceQuery(ctx, spec, hostPerfMetricList, []types.ManagedObjectReference{host.Reference()})
  129. if err != nil {
  130. errs.AddPartial(1, err)
  131. return
  132. }
  133. if v.emitPerfWithObject {
  134. v.processHostPerformance(info.results)
  135. } else {
  136. v.processHostPerformanceWithoutObject(info.results)
  137. }
  138. }
  139. // vmPerfMetricList may be customizable in the future but here is the full list of Virtual Machine Performance Counters
  140. // https://docs.vmware.com/en/vRealize-Operations/8.6/com.vmware.vcom.metrics.doc/GUID-1322F5A4-DA1D-481F-BBEA-99B228E96AF2.html
  141. var vmPerfMetricList = []string{
  142. // network metrics
  143. "net.packetsTx.summation",
  144. "net.packetsRx.summation",
  145. "net.bytesRx.average",
  146. "net.bytesTx.average",
  147. "net.usage.average",
  148. // disk metrics
  149. "disk.totalWriteLatency.average",
  150. "disk.totalReadLatency.average",
  151. "disk.maxTotalLatency.latest",
  152. "virtualDisk.totalWriteLatency.average",
  153. "virtualDisk.totalReadLatency.average",
  154. }
  155. func (v *vcenterMetricScraper) recordVMPerformance(
  156. ctx context.Context,
  157. vm mo.VirtualMachine,
  158. errs *scrapererror.ScrapeErrors,
  159. ) {
  160. spec := types.PerfQuerySpec{
  161. Entity: vm.Reference(),
  162. Format: string(types.PerfFormatNormal),
  163. // Just grabbing real time performance metrics of the current
  164. // supported metrics by this receiver. If more are added we may need
  165. // a system of making this user customizable or adapt to use a 5 minute interval per metric
  166. IntervalId: int32(20),
  167. }
  168. info, err := v.client.performanceQuery(ctx, spec, vmPerfMetricList, []types.ManagedObjectReference{vm.Reference()})
  169. if err != nil {
  170. errs.AddPartial(1, err)
  171. return
  172. }
  173. if v.emitPerfWithObject {
  174. v.processVMPerformanceMetrics(info)
  175. } else {
  176. v.processVMPerformanceMetricsWithoutObject(info)
  177. }
  178. }
  179. func (v *vcenterMetricScraper) processVMPerformanceMetrics(info *perfSampleResult) {
  180. for _, m := range info.results {
  181. for _, val := range m.Value {
  182. for j, nestedValue := range val.Value {
  183. si := m.SampleInfo[j]
  184. switch val.Name {
  185. // Performance monitoring level 1 metrics
  186. case "net.bytesTx.average":
  187. v.mb.RecordVcenterVMNetworkThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted, val.Instance)
  188. case "net.bytesRx.average":
  189. v.mb.RecordVcenterVMNetworkThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived, val.Instance)
  190. case "net.usage.average":
  191. v.mb.RecordVcenterVMNetworkUsageDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, val.Instance)
  192. case "net.packetsTx.summation":
  193. v.mb.RecordVcenterVMNetworkPacketCountDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted, val.Instance)
  194. case "net.packetsRx.summation":
  195. v.mb.RecordVcenterVMNetworkPacketCountDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived, val.Instance)
  196. // Performance monitoring level 2 metrics required
  197. case "disk.totalReadLatency.average":
  198. v.mb.RecordVcenterVMDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, metadata.AttributeDiskTypePhysical, val.Instance)
  199. case "virtualDisk.totalReadLatency.average":
  200. v.mb.RecordVcenterVMDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, metadata.AttributeDiskTypeVirtual, val.Instance)
  201. case "disk.totalWriteLatency.average":
  202. v.mb.RecordVcenterVMDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, metadata.AttributeDiskTypePhysical, val.Instance)
  203. case "virtualDisk.totalWriteLatency.average":
  204. v.mb.RecordVcenterVMDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, metadata.AttributeDiskTypeVirtual, val.Instance)
  205. case "disk.maxTotalLatency.latest":
  206. v.mb.RecordVcenterVMDiskLatencyMaxDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, val.Instance)
  207. }
  208. }
  209. }
  210. }
  211. }
  212. func (v *vcenterMetricScraper) processVMPerformanceMetricsWithoutObject(info *perfSampleResult) {
  213. for _, m := range info.results {
  214. for _, val := range m.Value {
  215. for j, nestedValue := range val.Value {
  216. si := m.SampleInfo[j]
  217. switch val.Name {
  218. // Performance monitoring level 1 metrics
  219. case "net.bytesTx.average":
  220. v.mb.RecordVcenterVMNetworkThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted)
  221. case "net.bytesRx.average":
  222. v.mb.RecordVcenterVMNetworkThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived)
  223. case "net.usage.average":
  224. v.mb.RecordVcenterVMNetworkUsageDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue)
  225. case "net.packetsTx.summation":
  226. v.mb.RecordVcenterVMNetworkPacketCountDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted)
  227. case "net.packetsRx.summation":
  228. v.mb.RecordVcenterVMNetworkPacketCountDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived)
  229. // Performance monitoring level 2 metrics required
  230. case "disk.totalReadLatency.average":
  231. v.mb.RecordVcenterVMDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, metadata.AttributeDiskTypePhysical)
  232. case "virtualDisk.totalReadLatency.average":
  233. v.mb.RecordVcenterVMDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, metadata.AttributeDiskTypeVirtual)
  234. case "disk.totalWriteLatency.average":
  235. v.mb.RecordVcenterVMDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, metadata.AttributeDiskTypePhysical)
  236. case "virtualDisk.totalWriteLatency.average":
  237. v.mb.RecordVcenterVMDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, metadata.AttributeDiskTypeVirtual)
  238. case "disk.maxTotalLatency.latest":
  239. v.mb.RecordVcenterVMDiskLatencyMaxDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue)
  240. }
  241. }
  242. }
  243. }
  244. }
  245. func (v *vcenterMetricScraper) processHostPerformance(metrics []performance.EntityMetric) {
  246. for _, m := range metrics {
  247. for _, val := range m.Value {
  248. for j, nestedValue := range val.Value {
  249. si := m.SampleInfo[j]
  250. switch val.Name {
  251. // Performance monitoring level 1 metrics
  252. case "net.usage.average":
  253. v.mb.RecordVcenterHostNetworkUsageDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, val.Instance)
  254. case "net.bytesTx.average":
  255. v.mb.RecordVcenterHostNetworkThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted, val.Instance)
  256. case "net.bytesRx.average":
  257. v.mb.RecordVcenterHostNetworkThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived, val.Instance)
  258. case "net.packetsTx.summation":
  259. v.mb.RecordVcenterHostNetworkPacketCountDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted, val.Instance)
  260. case "net.packetsRx.summation":
  261. v.mb.RecordVcenterHostNetworkPacketCountDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived, val.Instance)
  262. // Following requires performance level 2
  263. case "net.errorsRx.summation":
  264. v.mb.RecordVcenterHostNetworkPacketErrorsDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived, val.Instance)
  265. case "net.errorsTx.summation":
  266. v.mb.RecordVcenterHostNetworkPacketErrorsDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted, val.Instance)
  267. case "disk.totalWriteLatency.average":
  268. v.mb.RecordVcenterHostDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, val.Instance)
  269. case "disk.totalReadLatency.average":
  270. v.mb.RecordVcenterHostDiskLatencyAvgDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, val.Instance)
  271. case "disk.maxTotalLatency.latest":
  272. v.mb.RecordVcenterHostDiskLatencyMaxDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, val.Instance)
  273. // Following requires performance level 4
  274. case "disk.read.average":
  275. v.mb.RecordVcenterHostDiskThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead, val.Instance)
  276. case "disk.write.average":
  277. v.mb.RecordVcenterHostDiskThroughputDataPoint(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite, val.Instance)
  278. }
  279. }
  280. }
  281. }
  282. }
  283. func (v *vcenterMetricScraper) processHostPerformanceWithoutObject(metrics []performance.EntityMetric) {
  284. for _, m := range metrics {
  285. for _, val := range m.Value {
  286. for j, nestedValue := range val.Value {
  287. si := m.SampleInfo[j]
  288. switch val.Name {
  289. // Performance monitoring level 1 metrics
  290. case "net.usage.average":
  291. v.mb.RecordVcenterHostNetworkUsageDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue)
  292. case "net.bytesTx.average":
  293. v.mb.RecordVcenterHostNetworkThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted)
  294. case "net.bytesRx.average":
  295. v.mb.RecordVcenterHostNetworkThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived)
  296. case "net.packetsTx.summation":
  297. v.mb.RecordVcenterHostNetworkPacketCountDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted)
  298. case "net.packetsRx.summation":
  299. v.mb.RecordVcenterHostNetworkPacketCountDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived)
  300. // Following requires performance level 2
  301. case "net.errorsRx.summation":
  302. v.mb.RecordVcenterHostNetworkPacketErrorsDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionReceived)
  303. case "net.errorsTx.summation":
  304. v.mb.RecordVcenterHostNetworkPacketErrorsDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeThroughputDirectionTransmitted)
  305. case "disk.totalWriteLatency.average":
  306. v.mb.RecordVcenterHostDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite)
  307. case "disk.totalReadLatency.average":
  308. v.mb.RecordVcenterHostDiskLatencyAvgDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead)
  309. case "disk.maxTotalLatency.latest":
  310. v.mb.RecordVcenterHostDiskLatencyMaxDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue)
  311. // Following requires performance level 4
  312. case "disk.read.average":
  313. v.mb.RecordVcenterHostDiskThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionRead)
  314. case "disk.write.average":
  315. v.mb.RecordVcenterHostDiskThroughputDataPointWithoutObject(pcommon.NewTimestampFromTime(si.Timestamp), nestedValue, metadata.AttributeDiskDirectionWrite)
  316. }
  317. }
  318. }
  319. }
  320. }