report.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. package report
  2. // "github.com/weaveworks/scope/common/xfer"
  3. // Names of the various topologies.
  4. const (
  5. Endpoint = "endpoint"
  6. Process = "process"
  7. Container = "container"
  8. Pod = "pod"
  9. Service = "service"
  10. Deployment = "deployment"
  11. ReplicaSet = "replica_set"
  12. DaemonSet = "daemon_set"
  13. StatefulSet = "stateful_set"
  14. CronJob = "cron_job"
  15. Namespace = "namespace"
  16. ContainerImage = "container_image"
  17. Host = "host"
  18. Overlay = "overlay"
  19. ECSService = "ecs_service"
  20. ECSTask = "ecs_task"
  21. SwarmService = "swarm_service"
  22. PersistentVolume = "persistent_volume"
  23. PersistentVolumeClaim = "persistent_volume_claim"
  24. StorageClass = "storage_class"
  25. VolumeSnapshot = "volume_snapshot"
  26. VolumeSnapshotData = "volume_snapshot_data"
  27. Job = "job"
  28. // Shapes used for different nodes
  29. Circle = "circle"
  30. Triangle = "triangle"
  31. Square = "square"
  32. Pentagon = "pentagon"
  33. Hexagon = "hexagon"
  34. Heptagon = "heptagon"
  35. Octagon = "octagon"
  36. Cloud = "cloud"
  37. Cylinder = "cylinder"
  38. DottedCylinder = "dottedcylinder"
  39. StorageSheet = "sheet"
  40. Camera = "camera"
  41. DottedTriangle = "dottedtriangle"
  42. // Used when counting the number of containers
  43. ContainersKey = "containers"
  44. )
  45. // topologyNames are the names of all report topologies.
  46. var topologyNames = []string{
  47. Endpoint,
  48. Process,
  49. Container,
  50. ContainerImage,
  51. Pod,
  52. Service,
  53. Deployment,
  54. ReplicaSet,
  55. DaemonSet,
  56. StatefulSet,
  57. CronJob,
  58. Namespace,
  59. Host,
  60. Overlay,
  61. ECSTask,
  62. ECSService,
  63. SwarmService,
  64. PersistentVolume,
  65. PersistentVolumeClaim,
  66. StorageClass,
  67. VolumeSnapshot,
  68. VolumeSnapshotData,
  69. Job,
  70. }
  71. // Report is the core data type. It's produced by probes, and consumed and
  72. // stored by apps. It's composed of multiple topologies, each representing
  73. // a different (related, but not equivalent) view of the network.
  74. // type Report struct {
  75. // // TS is the time this report was generated
  76. // TS time.Time
  77. // // Endpoint nodes are individual (address, port) tuples on each host.
  78. // // They come from inspecting active connections and can (theoretically)
  79. // // be traced back to a process. Edges are present.
  80. // Endpoint Topology
  81. // // Process nodes are processes on each host. Edges are not present.
  82. // Process Topology
  83. // // Container nodes represent all Docker containers on hosts running probes.
  84. // // Metadata includes things like containter id, name, image id etc.
  85. // // Edges are not present.
  86. // Container Topology
  87. // // Pod nodes represent all Kubernetes pods running on hosts running probes.
  88. // // Metadata includes things like pod id, name etc. Edges are not
  89. // // present.
  90. // Pod Topology
  91. // // Service nodes represent all Kubernetes services running on hosts running probes.
  92. // // Metadata includes things like service id, name etc. Edges are not
  93. // // present.
  94. // Service Topology
  95. // // Deployment nodes represent all Kubernetes deployments running on hosts running probes.
  96. // // Metadata includes things like deployment id, name etc. Edges are not
  97. // // present.
  98. // Deployment Topology
  99. // // ReplicaSet nodes represent all Kubernetes ReplicaSets running on hosts running probes.
  100. // // Metadata includes things like ReplicaSet id, name etc. Edges are not
  101. // // present.
  102. // ReplicaSet Topology
  103. // // DaemonSet nodes represent all Kubernetes DaemonSets running on hosts running probes.
  104. // // Metadata includes things like DaemonSet id, name etc. Edges are not
  105. // // present.
  106. // DaemonSet Topology
  107. // // StatefulSet nodes represent all Kubernetes Stateful Sets running on hosts running probes.
  108. // // Metadata includes things like Stateful Set id, name, etc. Edges are not
  109. // // present.
  110. // StatefulSet Topology
  111. // // CronJob nodes represent all Kubernetes Cron Jobs running on hosts running probes.
  112. // // Metadata includes things like Cron Job id, name, etc. Edges are not
  113. // // present.
  114. // CronJob Topology
  115. // // Namespace nodes represent all Kubernetes Namespaces running on hosts running probes.
  116. // // Metadata includes things like Namespace id, name, etc. Edges are not
  117. // // present.
  118. // Namespace Topology
  119. // // ContainerImages nodes represent all Docker containers images on
  120. // // hosts running probes. Metadata includes things like image id, name etc.
  121. // // Edges are not present.
  122. // ContainerImage Topology
  123. // // Host nodes are physical hosts that run probes. Metadata includes things
  124. // // like operating system, load, etc. The information is scraped by the
  125. // // probes with each published report. Edges are not present.
  126. // Host Topology
  127. // // ECS Task nodes are AWS ECS tasks, which represent a group of containers.
  128. // // Metadata is limited for now, more to come later. Edges are not present.
  129. // ECSTask Topology
  130. // // ECS Service nodes are AWS ECS services, which represent a specification for a
  131. // // desired count of tasks with a task definition template.
  132. // // Metadata is limited for now, more to come later. Edges are not present.
  133. // ECSService Topology
  134. // // Swarm Service nodes are Docker Swarm services, which represent a specification for a
  135. // // group of tasks (either one per host, or a desired count).
  136. // // Edges are not present.
  137. // SwarmService Topology
  138. // // Overlay nodes are active peers in any software-defined network that's
  139. // // overlaid on the infrastructure. The information is scraped by polling
  140. // // their status endpoints. Edges are present.
  141. // Overlay Topology
  142. // // Persistent Volume nodes represent all Kubernetes Persistent Volumes running on hosts running probes.
  143. // // Metadata is limited for now, more to come later.
  144. // PersistentVolume Topology
  145. // // Persistent Volume Claim nodes represent all Kubernetes Persistent Volume Claims running on hosts running probes.
  146. // // Metadata is limited for now, more to come later.
  147. // PersistentVolumeClaim Topology
  148. // // Storage Class represent all kubernetes Storage Classes on hosts running probes.
  149. // // Metadata is limited for now, more to come later.
  150. // StorageClass Topology
  151. // // VolumeSnapshot represent all Kubernetes Volume Snapshots on hosts running probes.
  152. // VolumeSnapshot Topology
  153. // // VolumeSnapshotData represent all Kubernetes Volume Snapshot Data on hosts running probes.
  154. // VolumeSnapshotData Topology
  155. // // Job represent all Kubernetes Job on hosts running probes.
  156. // Job Topology
  157. // DNS DNSRecords `json:"DNS,omitempty" deepequal:"nil==empty"`
  158. // // Backwards-compatibility for an accident in commit 951629a / release 1.11.6.
  159. // BugDNS DNSRecords `json:"nodes,omitempty"`
  160. // // Sampling data for this report.
  161. // Sampling Sampling
  162. // // Window is the amount of time that this report purports to represent.
  163. // // Windows must be carefully merged. They should only be added when
  164. // // reports cover non-overlapping periods of time. By default, we assume
  165. // // that's true, and add windows in merge operations. When that's not true,
  166. // // such as in the app, we expect the component to overwrite the window
  167. // // before serving it to consumers.
  168. // Window time.Duration
  169. // // Shortcut reports should be propagated to the UI as quickly as possible,
  170. // // bypassing the usual spy interval, publish interval and app ws interval.
  171. // Shortcut bool
  172. // Plugins xfer.PluginSpecs
  173. // // ID a random identifier for this report, used when caching
  174. // // rendered views of the report. Reports with the same id
  175. // // must be equal, but we don't require that equal reports have
  176. // // the same id.
  177. // ID string `deepequal:"skip"`
  178. // }
  179. // // MakeReport makes a clean report, ready to Merge() other reports into.
  180. // func MakeReport() Report {
  181. // return Report{
  182. // Endpoint: MakeTopology(),
  183. // Process: MakeTopology().
  184. // WithShape(Square).
  185. // WithLabel("process", "processes"),
  186. // Container: MakeTopology().
  187. // WithShape(Hexagon).
  188. // WithLabel("container", "containers"),
  189. // ContainerImage: MakeTopology().
  190. // WithShape(Hexagon).
  191. // WithLabel("image", "images"),
  192. // Host: MakeTopology().
  193. // WithShape(Circle).
  194. // WithLabel("host", "hosts"),
  195. // Pod: MakeTopology().
  196. // WithShape(Heptagon).
  197. // WithLabel("pod", "pods"),
  198. // Service: MakeTopology().
  199. // WithShape(Heptagon).
  200. // WithLabel("service", "services"),
  201. // Deployment: MakeTopology().
  202. // WithShape(Heptagon).
  203. // WithLabel("deployment", "deployments"),
  204. // ReplicaSet: MakeTopology().
  205. // WithShape(Triangle).
  206. // WithLabel("replica set", "replica sets"),
  207. // DaemonSet: MakeTopology().
  208. // WithShape(Pentagon).
  209. // WithLabel("daemonset", "daemonsets"),
  210. // StatefulSet: MakeTopology().
  211. // WithShape(Octagon).
  212. // WithLabel("stateful set", "stateful sets"),
  213. // CronJob: MakeTopology().
  214. // WithShape(Triangle).
  215. // WithLabel("cron job", "cron jobs"),
  216. // Namespace: MakeTopology(),
  217. // Overlay: MakeTopology().
  218. // WithShape(Circle).
  219. // WithLabel("peer", "peers"),
  220. // ECSTask: MakeTopology().
  221. // WithShape(Heptagon).
  222. // WithLabel("task", "tasks"),
  223. // ECSService: MakeTopology().
  224. // WithShape(Heptagon).
  225. // WithLabel("service", "services"),
  226. // SwarmService: MakeTopology().
  227. // WithShape(Heptagon).
  228. // WithLabel("service", "services"),
  229. // PersistentVolume: MakeTopology().
  230. // WithShape(Cylinder).
  231. // WithLabel("persistent volume", "persistent volumes"),
  232. // PersistentVolumeClaim: MakeTopology().
  233. // WithShape(DottedCylinder).
  234. // WithLabel("persistent volume claim", "persistent volume claims"),
  235. // StorageClass: MakeTopology().
  236. // WithShape(StorageSheet).
  237. // WithLabel("storage class", "storage classes"),
  238. // VolumeSnapshot: MakeTopology().
  239. // WithShape(DottedCylinder).
  240. // WithTag(Camera).
  241. // WithLabel("volume snapshot", "volume snapshots"),
  242. // VolumeSnapshotData: MakeTopology().
  243. // WithShape(Cylinder).
  244. // WithTag(Camera).
  245. // WithLabel("volume snapshot data", "volume snapshot data"),
  246. // Job: MakeTopology().
  247. // WithShape(DottedTriangle).
  248. // WithLabel("job", "jobs"),
  249. // DNS: DNSRecords{},
  250. // Sampling: Sampling{},
  251. // Window: 0,
  252. // Plugins: xfer.MakePluginSpecs(),
  253. // ID: fmt.Sprintf("%d", rand.Int63()),
  254. // }
  255. // }
  256. // // Copy returns a value copy of the report.
  257. // func (r Report) Copy() Report {
  258. // newReport := Report{
  259. // TS: r.TS,
  260. // DNS: r.DNS.Copy(),
  261. // Sampling: r.Sampling,
  262. // Window: r.Window,
  263. // Shortcut: r.Shortcut,
  264. // Plugins: r.Plugins.Copy(),
  265. // ID: fmt.Sprintf("%d", rand.Int63()),
  266. // }
  267. // newReport.WalkPairedTopologies(&r, func(newTopology, oldTopology *Topology) {
  268. // *newTopology = oldTopology.Copy()
  269. // })
  270. // return newReport
  271. // }
  272. // // UnsafeMerge merges another Report into the receiver. The original is modified.
  273. // func (r *Report) UnsafeMerge(other Report) {
  274. // // Merged report has the earliest non-zero timestamp
  275. // if !other.TS.IsZero() && (r.TS.IsZero() || other.TS.Before(r.TS)) {
  276. // r.TS = other.TS
  277. // }
  278. // r.DNS = r.DNS.Merge(other.DNS)
  279. // r.Sampling = r.Sampling.Merge(other.Sampling)
  280. // r.Window = r.Window + other.Window
  281. // r.Plugins = r.Plugins.Merge(other.Plugins)
  282. // r.WalkPairedTopologies(&other, func(ourTopology, theirTopology *Topology) {
  283. // ourTopology.UnsafeMerge(*theirTopology)
  284. // })
  285. // }
  286. // // UnsafeUnMerge removes any information from r that would be added by merging other.
  287. // // The original is modified.
  288. // func (r *Report) UnsafeUnMerge(other Report) {
  289. // // TODO: DNS, Sampling, Plugins
  290. // r.Window = r.Window - other.Window
  291. // r.WalkPairedTopologies(&other, func(ourTopology, theirTopology *Topology) {
  292. // ourTopology.UnsafeUnMerge(*theirTopology)
  293. // })
  294. // }
  295. // // UnsafeRemovePartMergedNodes removes nodes that have not fully re-merged.
  296. // // E.g. if a node is removed from source between two full reports, then we
  297. // // might only have a delta of its last state. Remove that from the set.
  298. // // The original is modified.
  299. // func (r *Report) UnsafeRemovePartMergedNodes(ctx context.Context) {
  300. // dropped := map[string]int{}
  301. // r.WalkNamedTopologies(func(name string, t *Topology) {
  302. // for k, v := range t.Nodes {
  303. // if v.isPartMerged() {
  304. // delete(t.Nodes, k)
  305. // dropped[name]++
  306. // }
  307. // }
  308. // })
  309. // if span := opentracing.SpanFromContext(ctx); span != nil && len(dropped) > 0 {
  310. // msg := ""
  311. // for name, count := range dropped {
  312. // msg += fmt.Sprintf("%s: %d, ", name, count)
  313. // }
  314. // span.LogKV("dropped-part-merged", msg)
  315. // }
  316. // }
  317. // // WalkTopologies iterates through the Topologies of the report,
  318. // // potentially modifying them
  319. // func (r *Report) WalkTopologies(f func(*Topology)) {
  320. // for _, name := range topologyNames {
  321. // f(r.topology(name))
  322. // }
  323. // }
  324. // // WalkNamedTopologies iterates through the Topologies of the report,
  325. // // potentially modifying them.
  326. // func (r *Report) WalkNamedTopologies(f func(string, *Topology)) {
  327. // for _, name := range topologyNames {
  328. // f(name, r.topology(name))
  329. // }
  330. // }
  331. // // WalkPairedTopologies iterates through the Topologies of this and another report,
  332. // // potentially modifying one or both.
  333. // func (r *Report) WalkPairedTopologies(o *Report, f func(*Topology, *Topology)) {
  334. // for _, name := range topologyNames {
  335. // f(r.topology(name), o.topology(name))
  336. // }
  337. // }
  338. // // topology returns a reference to one of the report's topologies,
  339. // // selected by name.
  340. // func (r *Report) topology(name string) *Topology {
  341. // switch name {
  342. // case Endpoint:
  343. // return &r.Endpoint
  344. // case Process:
  345. // return &r.Process
  346. // case Container:
  347. // return &r.Container
  348. // case ContainerImage:
  349. // return &r.ContainerImage
  350. // case Pod:
  351. // return &r.Pod
  352. // case Service:
  353. // return &r.Service
  354. // case Deployment:
  355. // return &r.Deployment
  356. // case ReplicaSet:
  357. // return &r.ReplicaSet
  358. // case DaemonSet:
  359. // return &r.DaemonSet
  360. // case StatefulSet:
  361. // return &r.StatefulSet
  362. // case CronJob:
  363. // return &r.CronJob
  364. // case Namespace:
  365. // return &r.Namespace
  366. // case Host:
  367. // return &r.Host
  368. // case Overlay:
  369. // return &r.Overlay
  370. // case ECSTask:
  371. // return &r.ECSTask
  372. // case ECSService:
  373. // return &r.ECSService
  374. // case SwarmService:
  375. // return &r.SwarmService
  376. // case PersistentVolume:
  377. // return &r.PersistentVolume
  378. // case PersistentVolumeClaim:
  379. // return &r.PersistentVolumeClaim
  380. // case StorageClass:
  381. // return &r.StorageClass
  382. // case VolumeSnapshot:
  383. // return &r.VolumeSnapshot
  384. // case VolumeSnapshotData:
  385. // return &r.VolumeSnapshotData
  386. // case Job:
  387. // return &r.Job
  388. // }
  389. // return nil
  390. // }
  391. // // Topology returns one of the report's topologies, selected by name.
  392. // func (r Report) Topology(name string) (Topology, bool) {
  393. // if t := r.topology(name); t != nil {
  394. // return *t, true
  395. // }
  396. // return Topology{}, false
  397. // }
  398. // // Validate checks the report for various inconsistencies.
  399. // func (r Report) Validate() error {
  400. // var errs []string
  401. // for _, name := range topologyNames {
  402. // if err := r.topology(name).Validate(); err != nil {
  403. // errs = append(errs, err.Error())
  404. // }
  405. // }
  406. // if r.Sampling.Count > r.Sampling.Total {
  407. // errs = append(errs, fmt.Sprintf("sampling count (%d) bigger than total (%d)", r.Sampling.Count, r.Sampling.Total))
  408. // }
  409. // if len(errs) > 0 {
  410. // return fmt.Errorf("%d error(s): %s", len(errs), strings.Join(errs, "; "))
  411. // }
  412. // return nil
  413. // }
  414. // // DropTopologiesOver - as a protection against overloading the app
  415. // // server, drop topologies that have really large node counts. In
  416. // // practice we only see this with runaway numbers of zombie processes.
  417. // func (r Report) DropTopologiesOver(limit int) (Report, []string) {
  418. // dropped := []string{}
  419. // r.WalkNamedTopologies(func(name string, topology *Topology) {
  420. // if topology != nil && len(topology.Nodes) > limit {
  421. // topology.Nodes = Nodes{}
  422. // dropped = append(dropped, name)
  423. // }
  424. // })
  425. // return r, dropped
  426. // }
  427. // // Summary returns a human-readable string summarising the contents, for diagnostic purposes
  428. // func (r Report) Summary() string {
  429. // ret := ""
  430. // if len(r.Host.Nodes) == 1 {
  431. // for k := range r.Host.Nodes {
  432. // ret = k + ": "
  433. // }
  434. // }
  435. // count := 0
  436. // r.WalkNamedTopologies(func(n string, t *Topology) {
  437. // if len(t.Nodes) > 0 {
  438. // count++
  439. // if count > 1 {
  440. // ret = ret + ", "
  441. // }
  442. // ret = ret + fmt.Sprintf("%s:%d", n, len(t.Nodes))
  443. // }
  444. // })
  445. // return ret
  446. // }
  447. // // Sampling describes how the packet data sources for this report were
  448. // // sampled. It can be used to calculate effective sample rates. We can't
  449. // // just put the rate here, because that can't be accurately merged. Counts
  450. // // in e.g. edge metadata structures have already been adjusted to
  451. // // compensate for the sample rate.
  452. // type Sampling struct {
  453. // Count uint64 // observed and processed
  454. // Total uint64 // observed overall
  455. // }
  456. // // Rate returns the effective sampling rate.
  457. // func (s Sampling) Rate() float64 {
  458. // if s.Total <= 0 {
  459. // return 1.0
  460. // }
  461. // return float64(s.Count) / float64(s.Total)
  462. // }
  463. // // Merge combines two sampling structures via simple addition and returns the
  464. // // result. The original is not modified.
  465. // func (s Sampling) Merge(other Sampling) Sampling {
  466. // return Sampling{
  467. // Count: s.Count + other.Count,
  468. // Total: s.Total + other.Total,
  469. // }
  470. // }
  471. // const (
  472. // // HostNodeID is a metadata foreign key, linking a node in any topology to
  473. // // a node in the host topology. That host node is the origin host, where
  474. // // the node was originally detected.
  475. // HostNodeID = "host_node_id"
  476. // // ControlProbeID is the random ID of the probe which controls the specific node.
  477. // ControlProbeID = "control_probe_id"
  478. // )