metadata.yaml 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. type: apachespark
  2. status:
  3. class: receiver
  4. stability:
  5. development: [metrics]
  6. distributions: [contrib]
  7. codeowners:
  8. active: [djaglowski, Caleb-Hurshman, mrsillydog]
  9. resource_attributes:
  10. spark.application.id:
  11. description: The ID of the application for which the metric was recorded.
  12. type: string
  13. enabled: true
  14. spark.application.name:
  15. description: The name of the application for which the metric was recorded.
  16. type: string
  17. enabled: true
  18. spark.stage.id:
  19. description: The ID of the application stage for which the metric was recorded.
  20. type: int
  21. enabled: true
  22. spark.stage.attempt.id:
  23. description: The ID of the stage attempt for which the metric was recorded.
  24. type: int
  25. spark.executor.id:
  26. description: The ID of the executor for which the metric was recorded.
  27. type: string
  28. enabled: true
  29. spark.job.id:
  30. description: The ID of the job for which the metric was recorded.
  31. type: int
  32. enabled: true
  33. attributes:
  34. stage_active:
  35. name_override: active
  36. description: Whether the stage for which the metric was recorded is active.
  37. type: bool
  38. stage_complete:
  39. name_override: complete
  40. description: Whether the stage for which the metric was recorded is complete.
  41. type: bool
  42. stage_pending:
  43. name_override: pending
  44. description: Whether the stage for which the metric was recorded is pending.
  45. type: bool
  46. stage_failed:
  47. name_override: failed
  48. description: Whether the stage for which the metric was recorded is failed.
  49. type: bool
  50. stage_task_result:
  51. name_override: result
  52. description: The result of the stage tasks for which the metric was recorded.
  53. type: string
  54. enum:
  55. - completed
  56. - failed
  57. - killed
  58. executor_task_result:
  59. name_override: result
  60. description: The result of the executor tasks for which the metric was recorded.
  61. type: string
  62. enum:
  63. - completed
  64. - failed
  65. job_result:
  66. name_override: result
  67. description: The result of the job stages or tasks for which the metric was recorded.
  68. type: string
  69. enum:
  70. - completed
  71. - failed
  72. - skipped
  73. direction:
  74. description: Whether the metric is in regards to input or output operations.
  75. type: string
  76. enum:
  77. - in
  78. - out
  79. source:
  80. description: The source from which data was fetched for the metric.
  81. type: string
  82. enum:
  83. - local
  84. - remote
  85. location:
  86. description: The location of the memory for which the metric was recorded..
  87. type: string
  88. enum:
  89. - on_heap
  90. - off_heap
  91. state:
  92. description: The state of the memory for which the metric was recorded.
  93. type: string
  94. enum:
  95. - used
  96. - free
  97. scheduler_status:
  98. name_override: status
  99. description: The status of the DAGScheduler stages for which the metric was recorded.
  100. type: string
  101. enum:
  102. - waiting
  103. - running
  104. pool_memory_type:
  105. name_override: type
  106. description: The type of pool memory for which the metric was recorded.
  107. type: string
  108. enum:
  109. - direct
  110. - mapped
  111. gc_type:
  112. description: The type of the garbage collection performed for the metric.
  113. type: string
  114. enum:
  115. - major
  116. - minor
  117. metrics:
  118. #stage
  119. spark.stage.status:
  120. description: A one-hot encoding representing the status of this stage.
  121. enabled: true
  122. sum:
  123. monotonic: false
  124. aggregation_temporality: cumulative
  125. value_type: int
  126. unit: "{ status }"
  127. attributes:
  128. [stage_active, stage_complete, stage_pending, stage_failed]
  129. spark.stage.task.active:
  130. description: Number of active tasks in this stage.
  131. enabled: true
  132. sum:
  133. aggregation_temporality: cumulative
  134. monotonic: false
  135. value_type: int
  136. unit: "{ task }"
  137. attributes: []
  138. spark.stage.task.result:
  139. description: Number of tasks with a specific result in this stage.
  140. enabled: true
  141. sum:
  142. aggregation_temporality: cumulative
  143. monotonic: true
  144. value_type: int
  145. unit: "{ task }"
  146. attributes:
  147. [stage_task_result]
  148. spark.stage.executor.run_time:
  149. description: Amount of time spent by the executor in this stage.
  150. enabled: true
  151. sum:
  152. aggregation_temporality: cumulative
  153. monotonic: true
  154. value_type: int
  155. unit: ms
  156. attributes: []
  157. spark.stage.executor.cpu_time:
  158. description: CPU time spent by the executor in this stage.
  159. enabled: true
  160. sum:
  161. aggregation_temporality: cumulative
  162. monotonic: true
  163. value_type: int
  164. unit: ns
  165. attributes: []
  166. spark.stage.task.result_size:
  167. description: The amount of data transmitted back to the driver by all the tasks in this stage.
  168. enabled: true
  169. sum:
  170. aggregation_temporality: cumulative
  171. monotonic: true
  172. value_type: int
  173. unit: bytes
  174. attributes: []
  175. spark.stage.jvm_gc_time:
  176. description: The amount of time the JVM spent on garbage collection in this stage.
  177. enabled: true
  178. sum:
  179. aggregation_temporality: cumulative
  180. monotonic: true
  181. value_type: int
  182. unit: ms
  183. attributes: []
  184. spark.stage.memory.spilled:
  185. description: The amount of memory moved to disk due to size constraints (spilled) in this stage.
  186. enabled: true
  187. sum:
  188. aggregation_temporality: cumulative
  189. monotonic: true
  190. value_type: int
  191. unit: bytes
  192. attributes: []
  193. spark.stage.disk.spilled:
  194. description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage.
  195. enabled: true
  196. sum:
  197. aggregation_temporality: cumulative
  198. monotonic: true
  199. value_type: int
  200. unit: bytes
  201. attributes: []
  202. spark.stage.memory.peak:
  203. description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage.
  204. enabled: true
  205. sum:
  206. aggregation_temporality: cumulative
  207. monotonic: true
  208. value_type: int
  209. unit: bytes
  210. attributes: []
  211. spark.stage.io.size:
  212. description: Amount of data written and read at this stage.
  213. enabled: true
  214. sum:
  215. aggregation_temporality: cumulative
  216. monotonic: true
  217. value_type: int
  218. unit: bytes
  219. attributes:
  220. [direction]
  221. spark.stage.io.records:
  222. description: Number of records written and read in this stage.
  223. enabled: true
  224. sum:
  225. aggregation_temporality: cumulative
  226. monotonic: true
  227. value_type: int
  228. unit: "{ record }"
  229. attributes:
  230. [direction]
  231. spark.stage.shuffle.blocks_fetched:
  232. description: Number of blocks fetched in shuffle operations in this stage.
  233. enabled: true
  234. sum:
  235. aggregation_temporality: cumulative
  236. monotonic: true
  237. value_type: int
  238. unit: "{ block }"
  239. attributes:
  240. [source]
  241. spark.stage.shuffle.fetch_wait_time:
  242. description: Time spent in this stage waiting for remote shuffle blocks.
  243. enabled: true
  244. sum:
  245. aggregation_temporality: cumulative
  246. monotonic: true
  247. value_type: int
  248. unit: ms
  249. attributes: []
  250. spark.stage.shuffle.io.disk:
  251. description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory).
  252. enabled: true
  253. sum:
  254. aggregation_temporality: cumulative
  255. monotonic: true
  256. value_type: int
  257. unit: bytes
  258. attributes: []
  259. spark.stage.shuffle.io.read.size:
  260. description: Amount of data read in shuffle operations in this stage.
  261. enabled: true
  262. sum:
  263. aggregation_temporality: cumulative
  264. monotonic: true
  265. value_type: int
  266. unit: bytes
  267. attributes:
  268. [source]
  269. spark.stage.shuffle.io.write.size:
  270. description: Amount of data written in shuffle operations in this stage.
  271. enabled: true
  272. sum:
  273. aggregation_temporality: cumulative
  274. monotonic: true
  275. value_type: int
  276. unit: bytes
  277. attributes:
  278. []
  279. spark.stage.shuffle.io.records:
  280. description: Number of records written or read in shuffle operations in this stage.
  281. enabled: true
  282. sum:
  283. aggregation_temporality: cumulative
  284. monotonic: true
  285. value_type: int
  286. unit: "{ record }"
  287. attributes:
  288. [direction]
  289. spark.stage.shuffle.write_time:
  290. description: Time spent blocking on writes to disk or buffer cache in this stage.
  291. enabled: true
  292. sum:
  293. aggregation_temporality: cumulative
  294. monotonic: true
  295. value_type: int
  296. unit: ns
  297. attributes: []
  298. #executor
  299. spark.executor.memory.usage:
  300. description: Storage memory used by this executor.
  301. enabled: true
  302. sum:
  303. aggregation_temporality: cumulative
  304. monotonic: false
  305. value_type: int
  306. unit: bytes
  307. attributes: []
  308. spark.executor.disk.usage:
  309. description: Disk space used by this executor for RDD storage.
  310. enabled: true
  311. sum:
  312. aggregation_temporality: cumulative
  313. monotonic: false
  314. value_type: int
  315. unit: bytes
  316. attributes: []
  317. spark.executor.task.limit:
  318. description: Maximum number of tasks that can run concurrently in this executor.
  319. enabled: true
  320. sum:
  321. aggregation_temporality: cumulative
  322. monotonic: false
  323. value_type: int
  324. unit: "{ task }"
  325. attributes: []
  326. spark.executor.task.active:
  327. description: Number of tasks currently running in this executor.
  328. enabled: true
  329. sum:
  330. aggregation_temporality: cumulative
  331. monotonic: false
  332. value_type: int
  333. unit: "{ task }"
  334. attributes: []
  335. spark.executor.task.result:
  336. description: Number of tasks with a specific result in this executor.
  337. enabled: true
  338. sum:
  339. aggregation_temporality: cumulative
  340. monotonic: true
  341. value_type: int
  342. unit: "{ task }"
  343. attributes: [executor_task_result]
  344. spark.executor.time:
  345. description: Elapsed time the JVM spent executing tasks in this executor.
  346. enabled: true
  347. sum:
  348. aggregation_temporality: cumulative
  349. monotonic: true
  350. value_type: int
  351. unit: ms
  352. attributes: []
  353. spark.executor.gc_time:
  354. description: Elapsed time the JVM spent in garbage collection in this executor.
  355. enabled: true
  356. sum:
  357. aggregation_temporality: cumulative
  358. monotonic: true
  359. value_type: int
  360. unit: ms
  361. attributes: []
  362. spark.executor.input_size:
  363. description: Amount of data input for this executor.
  364. enabled: true
  365. sum:
  366. aggregation_temporality: cumulative
  367. monotonic: true
  368. value_type: int
  369. unit: bytes
  370. attributes: []
  371. spark.executor.shuffle.io.size:
  372. description: Amount of data written and read during shuffle operations for this executor.
  373. enabled: true
  374. sum:
  375. aggregation_temporality: cumulative
  376. monotonic: true
  377. value_type: int
  378. unit: bytes
  379. attributes: [direction]
  380. spark.executor.storage_memory.usage:
  381. description: The executor's storage memory usage.
  382. enabled: true
  383. sum:
  384. aggregation_temporality: cumulative
  385. monotonic: false
  386. value_type: int
  387. unit: bytes
  388. attributes: [location, state]
  389. #job
  390. spark.job.task.active:
  391. description: Number of active tasks in this job.
  392. enabled: true
  393. sum:
  394. aggregation_temporality: cumulative
  395. monotonic: false
  396. value_type: int
  397. unit: "{ task }"
  398. attributes: []
  399. spark.job.task.result:
  400. description: Number of tasks with a specific result in this job.
  401. enabled: true
  402. sum:
  403. aggregation_temporality: cumulative
  404. monotonic: true
  405. value_type: int
  406. unit: "{ task }"
  407. attributes: [job_result]
  408. spark.job.stage.active:
  409. description: Number of active stages in this job.
  410. enabled: true
  411. sum:
  412. aggregation_temporality: cumulative
  413. monotonic: false
  414. value_type: int
  415. unit: "{ stage }"
  416. attributes: []
  417. spark.job.stage.result:
  418. description: Number of stages with a specific result in this job.
  419. enabled: true
  420. sum:
  421. aggregation_temporality: cumulative
  422. monotonic: true
  423. value_type: int
  424. unit: "{ stage }"
  425. attributes: [job_result]
  426. # metrics
  427. spark.driver.block_manager.disk.usage:
  428. description: Disk space used by the BlockManager.
  429. enabled: true
  430. sum:
  431. aggregation_temporality: cumulative
  432. monotonic: false
  433. value_type: int
  434. unit: mb
  435. attributes: []
  436. spark.driver.block_manager.memory.usage:
  437. description: Memory usage for the driver's BlockManager.
  438. enabled: true
  439. sum:
  440. aggregation_temporality: cumulative
  441. monotonic: false
  442. value_type: int
  443. unit: mb
  444. attributes: [location, state]
  445. spark.driver.hive_external_catalog.file_cache_hits:
  446. description: Number of file cache hits on the HiveExternalCatalog.
  447. enabled: true
  448. sum:
  449. aggregation_temporality: cumulative
  450. monotonic: true
  451. value_type: int
  452. unit: "{ hit }"
  453. attributes: []
  454. spark.driver.hive_external_catalog.files_discovered:
  455. description: Number of files discovered while listing the partitions of a table in the Hive metastore
  456. enabled: true
  457. sum:
  458. aggregation_temporality: cumulative
  459. monotonic: true
  460. value_type: int
  461. unit: "{ file }"
  462. attributes: []
  463. spark.driver.hive_external_catalog.hive_client_calls:
  464. description: Number of calls to the underlying Hive Metastore client made by the Spark application.
  465. enabled: true
  466. sum:
  467. aggregation_temporality: cumulative
  468. monotonic: true
  469. value_type: int
  470. unit: "{ call }"
  471. attributes: []
  472. spark.driver.hive_external_catalog.parallel_listing_jobs:
  473. description: Number of parallel listing jobs initiated by the HiveExternalCatalog when listing partitions of a table.
  474. enabled: true
  475. sum:
  476. aggregation_temporality: cumulative
  477. monotonic: true
  478. value_type: int
  479. unit: "{ listing_job }"
  480. attributes: []
  481. spark.driver.hive_external_catalog.partitions_fetched:
  482. description: Table partitions fetched by the HiveExternalCatalog.
  483. enabled: true
  484. sum:
  485. aggregation_temporality: cumulative
  486. monotonic: true
  487. value_type: int
  488. unit: "{ partition }"
  489. attributes: []
  490. spark.driver.code_generator.compilation.count:
  491. description: Number of source code compilation operations performed by the CodeGenerator.
  492. enabled: true
  493. sum:
  494. aggregation_temporality: cumulative
  495. monotonic: true
  496. value_type: int
  497. unit: "{ compilation }"
  498. attributes: []
  499. spark.driver.code_generator.compilation.average_time:
  500. description: Average time spent during CodeGenerator source code compilation operations.
  501. enabled: true
  502. gauge:
  503. value_type: double
  504. unit: ms
  505. attributes: []
  506. spark.driver.code_generator.generated_class.count:
  507. description: Number of classes generated by the CodeGenerator.
  508. enabled: true
  509. sum:
  510. aggregation_temporality: cumulative
  511. monotonic: true
  512. value_type: int
  513. unit: "{ class }"
  514. attributes: []
  515. spark.driver.code_generator.generated_class.average_size:
  516. description: Average class size of the classes generated by the CodeGenerator.
  517. enabled: true
  518. gauge:
  519. value_type: double
  520. unit: bytes
  521. attributes: []
  522. spark.driver.code_generator.generated_method.count:
  523. description: Number of methods generated by the CodeGenerator.
  524. enabled: true
  525. sum:
  526. aggregation_temporality: cumulative
  527. monotonic: true
  528. value_type: int
  529. unit: "{ method }"
  530. attributes: []
  531. spark.driver.code_generator.generated_method.average_size:
  532. description: Average method size of the classes generated by the CodeGenerator.
  533. enabled: true
  534. gauge:
  535. value_type: double
  536. unit: bytes
  537. attributes: []
  538. spark.driver.code_generator.source_code.operations:
  539. description: Number of source code generation operations performed by the CodeGenerator.
  540. enabled: true
  541. sum:
  542. aggregation_temporality: cumulative
  543. monotonic: true
  544. value_type: int
  545. unit: "{ operation }"
  546. attributes: []
  547. spark.driver.code_generator.source_code.average_size:
  548. description: Average size of the source code generated by a CodeGenerator code generation operation.
  549. enabled: true
  550. gauge:
  551. value_type: double
  552. unit: bytes
  553. attributes: []
  554. spark.driver.dag_scheduler.job.active:
  555. description: Number of active jobs currently being processed by the DAGScheduler.
  556. enabled: true
  557. sum:
  558. aggregation_temporality: cumulative
  559. monotonic: false
  560. value_type: int
  561. unit: "{ job }"
  562. attributes: []
  563. spark.driver.dag_scheduler.job.count:
  564. description: Number of jobs that have been submitted to the DAGScheduler.
  565. enabled: true
  566. sum:
  567. aggregation_temporality: cumulative
  568. monotonic: true
  569. value_type: int
  570. unit: "{ job }"
  571. attributes: []
  572. spark.driver.dag_scheduler.stage.failed:
  573. description: Number of failed stages run by the DAGScheduler.
  574. enabled: true
  575. sum:
  576. aggregation_temporality: cumulative
  577. monotonic: true
  578. value_type: int
  579. unit: "{ stage }"
  580. attributes: []
  581. spark.driver.dag_scheduler.stage.count:
  582. description: Number of stages the DAGScheduler is either running or needs to run.
  583. enabled: true
  584. sum:
  585. aggregation_temporality: cumulative
  586. monotonic: false
  587. value_type: int
  588. unit: "{ stage }"
  589. attributes: [scheduler_status]
  590. spark.driver.live_listener_bus.posted:
  591. description: Number of events that have been posted on the LiveListenerBus.
  592. enabled: true
  593. sum:
  594. aggregation_temporality: cumulative
  595. monotonic: true
  596. value_type: int
  597. unit: "{ event }"
  598. attributes: []
  599. spark.driver.live_listener_bus.processing_time.average:
  600. description: Average time taken for the LiveListenerBus to process an event posted to it.
  601. enabled: true
  602. gauge:
  603. value_type: double
  604. unit: ms
  605. attributes: []
  606. spark.driver.live_listener_bus.dropped:
  607. description: Number of events that have been dropped by the LiveListenerBus.
  608. enabled: true
  609. sum:
  610. aggregation_temporality: cumulative
  611. monotonic: true
  612. value_type: int
  613. unit: "{ event }"
  614. attributes: []
  615. spark.driver.live_listener_bus.queue_size:
  616. description: Number of events currently waiting to be processed by the LiveListenerBus.
  617. enabled: true
  618. sum:
  619. aggregation_temporality: cumulative
  620. monotonic: false
  621. value_type: int
  622. unit: "{ event }"
  623. attributes: []
  624. spark.driver.jvm_cpu_time:
  625. description: Current CPU time taken by the Spark driver.
  626. enabled: true
  627. sum:
  628. aggregation_temporality: cumulative
  629. monotonic: true
  630. value_type: int
  631. unit: ns
  632. attributes: []
  633. spark.driver.executor.memory.jvm:
  634. description: Amount of memory used by the driver's JVM.
  635. enabled: true
  636. sum:
  637. aggregation_temporality: cumulative
  638. monotonic: false
  639. value_type: int
  640. unit: bytes
  641. attributes: [location]
  642. spark.driver.executor.memory.execution:
  643. description: Amount of execution memory currently used by the driver.
  644. enabled: true
  645. sum:
  646. aggregation_temporality: cumulative
  647. monotonic: false
  648. value_type: int
  649. unit: bytes
  650. attributes: [location]
  651. spark.driver.executor.memory.storage:
  652. description: Amount of storage memory currently used by the driver.
  653. enabled: true
  654. sum:
  655. aggregation_temporality: cumulative
  656. monotonic: false
  657. value_type: int
  658. unit: bytes
  659. attributes: [location]
  660. spark.driver.executor.memory.pool:
  661. description: Amount of pool memory currently used by the driver.
  662. enabled: true
  663. sum:
  664. aggregation_temporality: cumulative
  665. monotonic: false
  666. value_type: int
  667. unit: bytes
  668. attributes: [pool_memory_type]
  669. spark.driver.executor.gc.operations:
  670. description: Number of garbage collection operations performed by the driver.
  671. enabled: true
  672. sum:
  673. aggregation_temporality: cumulative
  674. monotonic: true
  675. value_type: int
  676. unit: "{ gc_operation }"
  677. attributes: [gc_type]
  678. spark.driver.executor.gc.time:
  679. description: Total elapsed time during garbage collection operations performed by the driver.
  680. enabled: true
  681. sum:
  682. aggregation_temporality: cumulative
  683. monotonic: true
  684. value_type: int
  685. unit: ms
  686. attributes: [gc_type]