Skip to content

Commit 2c2395c

Browse files
committed
feat: plotly express
The histograms, heatmaps and comparisons have been replaced with interactive Plotly graphs. Plotly.js is used to build the graphs on the go from JSON. Initial tests show that plotly reports are smaller in size compared to matplotlib and the takes way less time for report generation compared to matplotlib. use parameter 'online_report' to use plotly.js from CDN server and use report online. Else, plotly.js is embedded in the report and can be used offline too. BREAKING CHANGE: matplotlib-related config is removed
1 parent a1ed9eb commit 2c2395c

22 files changed

+531
-447
lines changed

NOTICE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# pyyaml: https://github.com/yaml/pyyaml/blob/master/LICENSE
2222
# jinja2: https://github.com/noirbizarre/jinja2/blob/master/LICENSE
2323
# tqdm: https://github.com/tqdm/tqdm/blob/master/LICENCE
24-
# matplotlib: https://github.com/matplotlib/matplotlib/blob/master/LICENSE/LICENSE
24+
# plotly: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
2525
# joblib: https://github.com/joblib/joblib/blob/master/LICENSE.txt
2626
# pybase64: https://github.com/mayeut/pybase64/blob/master/LICENSE
2727
# htmlmin: https://github.com/mankyd/htmlmin/blob/master/LICENSE

popmon/config.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@
2929
# (see https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html for details)
3030
parallel_args = {"n_jobs": 1}
3131

32-
# Usage the `ing_matplotlib_theme`
33-
themed = True
34-
3532

3633
class SectionModel(BaseModel):
3734
name: str
@@ -108,7 +105,7 @@ class HistogramSectionModel(SectionModel):
108105
top_n: int = 20
109106
"""plot heatmap for top 'n' categories. default is 20 (optional)"""
110107

111-
cmap: str = "autumn_r"
108+
cmap: str = "ylorrd"
112109
"""colormap for histogram heatmaps"""
113110

114111

@@ -171,6 +168,9 @@ class Report(BaseModel):
171168
"""if True, show all the generated statistics in the report (optional)
172169
if set to False, then smaller show_stats (see below)"""
173170

171+
online_report: bool = True
172+
"""Use a CDN to host resources, or embed them into the report."""
173+
174174
show_stats: List[str] = [
175175
"distinct*",
176176
"filled*",
@@ -194,6 +194,9 @@ class Report(BaseModel):
194194
]
195195
"""list of statistic name patterns to show in the report. If None, show all (optional)"""
196196

197+
zline_color: List[str] = ["#FF0000", "#FFC800"]
198+
""""Configure line colors in barplots of Comparisons and Profiles section. First and second elements as hex color code in list will replace the default red and yellow respectively"""
199+
197200
section: Section = Section()
198201
"""Configuration for the individual sections"""
199202

popmon/notebooks/popmon_tutorial_advanced.ipynb

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,11 @@
467467
" store_key=\"report_sections\",\n",
468468
" settings=report_settings,\n",
469469
" ),\n",
470-
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
470+
" ReportGenerator(\n",
471+
" read_key=\"report_sections\",\n",
472+
" store_key=\"html_report\",\n",
473+
" settings=report_settings,\n",
474+
" ),\n",
471475
" ]\n",
472476
" super().__init__(modules)\n",
473477
"\n",
@@ -525,7 +529,11 @@
525529
" store_key=\"report_sections\",\n",
526530
" settings=report_settings,\n",
527531
" ),\n",
528-
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
532+
" ReportGenerator(\n",
533+
" read_key=\"report_sections\",\n",
534+
" store_key=\"html_report\",\n",
535+
" settings=report_settings,\n",
536+
" ),\n",
529537
" ]\n",
530538
" super().__init__(modules)\n",
531539
"\n",

popmon/pipeline/report_pipelines.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,9 @@ def __init__(
233233
settings=settings,
234234
),
235235
# generate report
236-
ReportGenerator(read_key=sections_key, store_key=store_key),
236+
ReportGenerator(
237+
read_key=sections_key, store_key=store_key, settings=settings
238+
),
237239
]
238240
if (
239241
isinstance(settings.report_filepath, (str, Path))

popmon/resources.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020

2121
# Resources lookup file for popmon
22-
22+
import json
2323
import pathlib
2424

2525
from jinja2 import Environment, FileSystemLoader
@@ -53,6 +53,31 @@
5353
_TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ")
5454

5555

56+
def js_list(encoder, data):
57+
pairs = [js_val(encoder, v) for v in data]
58+
return "[" + ", ".join(pairs) + "]"
59+
60+
61+
def js_dict(encoder, data):
62+
pairs = [k + ": " + js_val(encoder, v) for k, v in data.items()]
63+
return "{" + ", ".join(pairs) + "}"
64+
65+
66+
def js_val(encoder, data):
67+
if isinstance(data, dict):
68+
val = js_dict(encoder, data)
69+
elif isinstance(data, list):
70+
val = js_list(encoder, data)
71+
else:
72+
val = encoder.encode(data)
73+
return val
74+
75+
76+
_TEMPLATES_ENV.filters["json_plot"] = lambda x: js_val(
77+
json.JSONEncoder(ensure_ascii=False), x
78+
)
79+
80+
5681
def _resource(resource_type, name: str) -> str:
5782
"""Return the full path filename of a resource.
5883

popmon/visualization/__init__.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,6 @@
2828
TrafficLightSectionGenerator,
2929
)
3030

31-
# set matplotlib backend to batch mode when running in shell
32-
# need to do this *before* matplotlib.pyplot gets imported
33-
from ..visualization.backend import set_matplotlib_backend
34-
35-
set_matplotlib_backend()
36-
37-
3831
__all__ = [
3932
"SectionGenerator",
4033
"HistogramSection",

popmon/visualization/alert_section_generator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ def transform(
150150
plots = [e for e in plots if len(e["plot"])]
151151

152152
features_w_metrics.append(
153-
{"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
153+
{
154+
"name": feature,
155+
"plot_type_layouts": {"traffic_lights": ""},
156+
"plots": sorted(plots, key=lambda plot: plot["name"]),
157+
}
154158
)
155159

156160
sections.append(

popmon/visualization/backend.py

Lines changed: 0 additions & 152 deletions
This file was deleted.

popmon/visualization/histogram_section.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,17 +140,33 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
140140
]
141141
plots = parallel(_plot_histograms, args)
142142

143+
plot_type_layouts = {}
144+
143145
# filter out potential empty plots
144146
plots = [e for e in plots if len(e["plot"])]
145147
plots = sorted(plots, key=lambda plot: plot["name"])
148+
if len(plots) > 0:
149+
plot_type_layouts["histogram"] = plots[0]["layout"]
146150

147151
# filter out potential empty heatmap plots, then prepend them to the sorted histograms
148-
hplots = [h for h in heatmaps if isinstance(h, dict) and len(h["plot"])]
149-
150-
plots = hplots + plots
152+
hplots = []
153+
for h in heatmaps:
154+
if isinstance(h, dict):
155+
if len(h["plot"]):
156+
hplots.append(h)
151157

152-
features_w_metrics.append({"name": feature, "plots": plots})
158+
if len(hplots) > 0:
159+
plot_type_layouts["heatmap"] = hplots[0]["layout"]
153160

161+
plots = hplots + plots
162+
# print(plot_types,layouts)
163+
features_w_metrics.append(
164+
{
165+
"name": feature,
166+
"plot_type_layouts": plot_type_layouts,
167+
"plots": plots,
168+
}
169+
)
154170
sections.append(
155171
{
156172
"section_title": self.section_name,
@@ -230,11 +246,17 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
230246
hists, feature, hist_names, y_label, is_num, is_ts
231247
)
232248
elif hc_list[0].n_dim == 2:
233-
plot = ""
249+
plot = {}
234250
else:
235-
plot = ""
251+
plot = {}
236252

237-
return {"name": date, "description": "", "plot": plot}
253+
return {
254+
"name": date,
255+
"type": "histogram",
256+
"description": "",
257+
"plot": plot.get("data", ""),
258+
"layout": plot.get("layout", ""),
259+
}
238260

239261

240262
def _plot_heatmap(
@@ -321,13 +343,15 @@ def _plot_heatmap(
321343
if isinstance(heatmaps, list):
322344
plot = [hist_lookup(heatmaps, hist_name) for hist_name in hist_names]
323345
elif isinstance(heatmaps, dict):
324-
plot = [heatmaps["plot"]]
346+
plot = [heatmaps]
325347

326348
plots = [
327349
{
328350
"name": hist_names_formatted[hist_name],
329-
"description": descriptions[hist_name],
330-
"plot": pl,
351+
"type": "heatmap",
352+
"description": "",
353+
"plot": pl["plot"],
354+
"layout": pl["layout"],
331355
"full_width": True,
332356
}
333357
for pl, hist_name in zip(plot, hist_names)
@@ -364,4 +388,4 @@ def get_top_categories(entries_list, bins, top_n):
364388
def hist_lookup(plot, hist_name):
365389
for pl in plot:
366390
if pl["name"] == hist_name:
367-
return pl["plot"]
391+
return pl

popmon/visualization/overview_section.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def _plot_metrics(
177177

178178
return {
179179
"name": "Alert frequency per Feature",
180+
"type": "alert",
180181
"description": "",
181182
"plot": plot,
182183
"full_width": True,

0 commit comments

Comments
 (0)