report.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import pathlib
  2. from datetime import timedelta, datetime
  3. import jinja2
  4. from ob_jobs.common import human_duration
  5. def daily_report(templates_path, report_dir, err_df, query_df, insert_df, client_df):
  6. # count top 1000, avg_du top 10
  7. top_slow_sql_df = (
  8. query_df
  9. .sort_values(by='cnt', ascending=False)
  10. .head(1000)[['query', 'avg_du']]
  11. .sort_values('avg_du', ascending=False)
  12. )
  13. top_slow_sql_df = top_slow_sql_df[top_slow_sql_df['avg_du'] > 1000]
  14. top_slow_sql_df = (
  15. top_slow_sql_df
  16. .head(10)
  17. .reset_index()
  18. )
  19. top_slow_sql_df['avg_du'] = (top_slow_sql_df['avg_du']
  20. .map(lambda x: human_duration(timedelta(milliseconds=x)))
  21. )
  22. daily_report_content = render_daily_report(templates_path, top_slow_sql=list(top_slow_sql_df.iterrows()))
  23. today = datetime.today().strftime("%Y-%m-%d")
  24. daily_report_file_name = pathlib.Path(report_dir).joinpath(f'{today}.md')
  25. with open(daily_report_file_name, 'w') as f:
  26. f.write(daily_report_content)
  27. def render_daily_report(tmp_path, top_slow_sql):
  28. templateLoader = jinja2.FileSystemLoader(searchpath=tmp_path)
  29. templateEnv = jinja2.Environment(loader=templateLoader)
  30. TEMPLATE_FILE = "daily_report.md"
  31. template = templateEnv.get_template(TEMPLATE_FILE)
  32. return template.render(top_slow_sql=top_slow_sql)