import pathlib from datetime import timedelta, datetime import jinja2 from ob_jobs.common import human_duration def daily_report(templates_path, report_dir, err_df, query_df, insert_df, client_df): # count top 1000, avg_du top 10 top_slow_sql_df = ( query_df .sort_values(by='cnt', ascending=False) .head(1000)[['query', 'avg_du']] .sort_values('avg_du', ascending=False) ) top_slow_sql_df = top_slow_sql_df[top_slow_sql_df['avg_du'] > 1000] top_slow_sql_df = ( top_slow_sql_df .head(10) .reset_index() ) top_slow_sql_df['avg_du'] = (top_slow_sql_df['avg_du'] .map(lambda x: human_duration(timedelta(milliseconds=x))) ) daily_report_content = render_daily_report(templates_path, top_slow_sql=list(top_slow_sql_df.iterrows())) today = datetime.today().strftime("%Y-%m-%d") daily_report_file_name = pathlib.Path(report_dir).joinpath(f'{today}.md') with open(daily_report_file_name, 'w') as f: f.write(daily_report_content) def render_daily_report(tmp_path, top_slow_sql): templateLoader = jinja2.FileSystemLoader(searchpath=tmp_path) templateEnv = jinja2.Environment(loader=templateLoader) TEMPLATE_FILE = "daily_report.md" template = templateEnv.get_template(TEMPLATE_FILE) return template.render(top_slow_sql=top_slow_sql)