Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Panic when computing histogram on column with inf value #469

Open
jonmmease opened this issue Mar 6, 2024 · 2 comments
Open

Panic when computing histogram on column with inf value #469

jonmmease opened this issue Mar 6, 2024 · 2 comments

Comments

@jonmmease
Copy link
Collaborator

VegaFusion panics when computing a histogram on a column with inf values.

import json
import pandas as pd
import numpy as np
import vegafusion as vf

dataframe = pd.DataFrame({"col": [0, 1, 2, np.inf, 4]})

spec = json.loads(r"""

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "background": "white",
  "padding": 5,
  "width": 200,
  "height": 200,
  "style": "cell",
  "data": [
    {"name": "interval_intervalselection_0_store"},
    {"name": "click_pointselection_0_store"},
    {"name": "dataframe", "url": "vegafusion+dataset://dataframe"},
    {
      "name": "data_0",
      "source": "dataframe",
      "transform": [
        {
          "type": "extent",
          "field": "col",
          "signal": "layer_0_layer_0_bin_maxbins_50_col_extent"
        },
        {
          "type": "bin",
          "field": "col",
          "as": ["__bin_field_name", "__bin_field_name_end"],
          "signal": "layer_0_layer_0_bin_maxbins_50_col_bins",
          "extent": {"signal": "layer_0_layer_0_bin_maxbins_50_col_extent"},
          "maxbins": 50
        },
        {
          "type": "aggregate",
          "groupby": ["__bin_field_name", "__bin_field_name_end"],
          "ops": ["count"],
          "fields": [null],
          "as": ["__count"]
        },
        {
          "type": "formula",
          "expr": "'[' + toString(datum[\"__bin_field_name\"]) + ', ' + toString(datum[\"__bin_field_name_end\"]) + ')'",
          "as": "__bin_range"
        },
        {
          "type": "filter",
          "expr": "isValid(datum[\"__bin_field_name\"]) && isFinite(+datum[\"__bin_field_name\"]) && isValid(datum[\"__count\"]) && isFinite(+datum[\"__count\"])"
        }
      ]
    }
  ],
  "signals": [
    {
      "name": "unit",
      "value": {},
      "on": [
        {"events": "pointermove", "update": "isTuple(group()) ? group() : unit"}
      ]
    },
    {
      "name": "interval_intervalselection_0",
      "update": "vlSelectionResolve(\"interval_intervalselection_0_store\", \"union\")"
    },
    {
      "name": "click_pointselection_0",
      "update": "vlSelectionResolve(\"click_pointselection_0_store\", \"union\", true, true)"
    },
    {
      "name": "interval_intervalselection_0_x",
      "value": [],
      "on": [
        {
          "events": {
            "source": "scope",
            "type": "pointerdown",
            "filter": [
              "!event.item || event.item.mark.name !== \"interval_intervalselection_0_brush\""
            ]
          },
          "update": "[x(unit), x(unit)]"
        },
        {
          "events": {
            "source": "window",
            "type": "pointermove",
            "consume": true,
            "between": [
              {
                "source": "scope",
                "type": "pointerdown",
                "filter": [
                  "!event.item || event.item.mark.name !== \"interval_intervalselection_0_brush\""
                ]
              },
              {"source": "window", "type": "pointerup"}
            ]
          },
          "update": "[interval_intervalselection_0_x[0], clamp(x(unit), 0, width)]"
        },
        {
          "events": {"signal": "interval_intervalselection_0_scale_trigger"},
          "update": "[scale(\"x\", interval_intervalselection_0___bin_field_name[0]), scale(\"x\", interval_intervalselection_0___bin_field_name[1])]"
        },
        {
          "events": [{"source": "view", "type": "dblclick"}],
          "update": "[0, 0]"
        },
        {
          "events": {"signal": "interval_intervalselection_0_translate_delta"},
          "update": "clampRange(panLinear(interval_intervalselection_0_translate_anchor.extent_x, interval_intervalselection_0_translate_delta.x / span(interval_intervalselection_0_translate_anchor.extent_x)), 0, width)"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0___bin_field_name",
      "on": [
        {
          "events": {"signal": "interval_intervalselection_0_x"},
          "update": "interval_intervalselection_0_x[0] === interval_intervalselection_0_x[1] ? null : invert(\"x\", interval_intervalselection_0_x)"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0_scale_trigger",
      "value": {},
      "on": [
        {
          "events": [{"scale": "x"}],
          "update": "(!isArray(interval_intervalselection_0___bin_field_name) || (+invert(\"x\", interval_intervalselection_0_x)[0] === +interval_intervalselection_0___bin_field_name[0] && +invert(\"x\", interval_intervalselection_0_x)[1] === +interval_intervalselection_0___bin_field_name[1])) ? interval_intervalselection_0_scale_trigger : {}"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0_tuple",
      "on": [
        {
          "events": [
            {"signal": "interval_intervalselection_0___bin_field_name"}
          ],
          "update": "interval_intervalselection_0___bin_field_name ? {unit: \"layer_0_layer_0\", fields: interval_intervalselection_0_tuple_fields, values: [interval_intervalselection_0___bin_field_name]} : null"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0_tuple_fields",
      "value": [{"field": "__bin_field_name", "channel": "x", "type": "R"}]
    },
    {
      "name": "interval_intervalselection_0_translate_anchor",
      "value": {},
      "on": [
        {
          "events": [
            {
              "source": "scope",
              "type": "pointerdown",
              "markname": "interval_intervalselection_0_brush"
            }
          ],
          "update": "{x: x(unit), y: y(unit), extent_x: slice(interval_intervalselection_0_x)}"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0_translate_delta",
      "value": {},
      "on": [
        {
          "events": [
            {
              "source": "window",
              "type": "pointermove",
              "consume": true,
              "between": [
                {
                  "source": "scope",
                  "type": "pointerdown",
                  "markname": "interval_intervalselection_0_brush"
                },
                {"source": "window", "type": "pointerup"}
              ]
            }
          ],
          "update": "{x: interval_intervalselection_0_translate_anchor.x - x(unit), y: interval_intervalselection_0_translate_anchor.y - y(unit)}"
        }
      ]
    },
    {
      "name": "interval_intervalselection_0_modify",
      "on": [
        {
          "events": {"signal": "interval_intervalselection_0_tuple"},
          "update": "modify(\"interval_intervalselection_0_store\", interval_intervalselection_0_tuple, true)"
        }
      ]
    },
    {
      "name": "click_pointselection_0_tuple",
      "on": [
        {
          "events": [{"source": "scope", "type": "click"}],
          "update": "datum && item().mark.marktype !== 'group' && indexof(item().mark.role, 'legend') < 0 && indexof(item().mark.name, 'interval_intervalselection_0_brush') < 0 ? {unit: \"layer_0_layer_0\", fields: click_pointselection_0_tuple_fields, values: [(item().isVoronoi ? datum.datum : datum)[\"__bin_field_name\"]]} : null",
          "force": true
        },
        {"events": [{"source": "view", "type": "dblclick"}], "update": "null"}
      ]
    },
    {
      "name": "click_pointselection_0_tuple_fields",
      "value": [{"field": "__bin_field_name", "channel": "x", "type": "E"}]
    },
    {
      "name": "click_pointselection_0_toggle",
      "value": false,
      "on": [
        {
          "events": [{"source": "scope", "type": "click"}],
          "update": "event.shiftKey"
        },
        {"events": [{"source": "view", "type": "dblclick"}], "update": "false"}
      ]
    },
    {
      "name": "click_pointselection_0_modify",
      "on": [
        {
          "events": {"signal": "click_pointselection_0_tuple"},
          "update": "modify(\"click_pointselection_0_store\", click_pointselection_0_toggle ? null : click_pointselection_0_tuple, click_pointselection_0_toggle ? null : true, click_pointselection_0_toggle ? click_pointselection_0_tuple : null)"
        }
      ]
    }
  ],
  "marks": [
    {
      "name": "interval_intervalselection_0_brush_bg",
      "type": "rect",
      "clip": true,
      "encode": {
        "enter": {"fill": {"value": "#669EFF"}, "fillOpacity": {"value": 0.07}},
        "update": {
          "x": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "signal": "interval_intervalselection_0_x[0]"
            },
            {"value": 0}
          ],
          "y": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "value": 0
            },
            {"value": 0}
          ],
          "x2": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "signal": "interval_intervalselection_0_x[1]"
            },
            {"value": 0}
          ],
          "y2": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "field": {"group": "height"}
            },
            {"value": 0}
          ]
        }
      }
    },
    {
      "name": "layer_0_layer_0_marks",
      "type": "rect",
      "clip": true,
      "style": ["bar"],
      "interactive": true,
      "from": {"data": "data_0"},
      "encode": {
        "update": {
          "cursor": {"value": "pointer"},
          "fill": {"value": "#3e277a"},
          "opacity": [
            {
              "test": "!((!interval_intervalselection_0['__bin_field_name'] && !click_pointselection_0['__bin_field_name']) || ((interval_intervalselection_0['__bin_field_name'] && interval_intervalselection_0['__bin_field_name'][0] <= datum.__bin_field_name_end && datum.__bin_field_name <= interval_intervalselection_0['__bin_field_name'][1])) || ((click_pointselection_0['__bin_field_name'] && indexof(click_pointselection_0['__bin_field_name'] || [], datum.__bin_field_name) >= 0)))",
              "value": 0.3
            },
            {"value": 1}
          ],
          "tooltip": {
            "signal": "{\"Count of Records\": NUMBER_FORMATTER(datum[\"__count\"], {\"columnType\":\"NUMBER\",\"currency\":\"$\",\"format\":\"NUMBER\",\"nanFormat\":\"\",\"numDecimalDigits\":-1}), \"col\": isValid(datum[\"__bin_range\"]) ? datum[\"__bin_range\"] : \"\"+datum[\"__bin_range\"]}"
          },
          "ariaRoleDescription": {"value": "bar"},
          "description": {
            "signal": "\"col: \" + (format(datum[\"__bin_field_name\"], \"\")) + \"; Count of Records: \" + (format(datum[\"__count\"], \"\")) + \"; __bin_field_name_end: \" + (format(datum[\"__bin_field_name_end\"], \"\"))"
          },
          "x": {"scale": "x", "field": "__bin_field_name"},
          "x2": {"scale": "x", "field": "__bin_field_name_end", "offset": -1},
          "y": {"scale": "y", "field": "__count"},
          "y2": {"scale": "y", "value": 0}
        }
      }
    },
    {
      "name": "interval_intervalselection_0_brush",
      "type": "rect",
      "clip": true,
      "encode": {
        "enter": {"fill": {"value": "transparent"}},
        "update": {
          "x": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "signal": "interval_intervalselection_0_x[0]"
            },
            {"value": 0}
          ],
          "y": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "value": 0
            },
            {"value": 0}
          ],
          "x2": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "signal": "interval_intervalselection_0_x[1]"
            },
            {"value": 0}
          ],
          "y2": [
            {
              "test": "data(\"interval_intervalselection_0_store\").length && data(\"interval_intervalselection_0_store\")[0].unit === \"layer_0_layer_0\"",
              "field": {"group": "height"}
            },
            {"value": 0}
          ],
          "stroke": [
            {
              "test": "interval_intervalselection_0_x[0] !== interval_intervalselection_0_x[1]",
              "value": "#669EFF"
            },
            {"value": null}
          ],
          "strokeOpacity": [
            {
              "test": "interval_intervalselection_0_x[0] !== interval_intervalselection_0_x[1]",
              "value": 0.4
            },
            {"value": null}
          ]
        }
      }
    }
  ],
  "scales": [
    {
      "name": "x",
      "type": "linear",
      "domain": {
        "data": "data_0",
        "fields": ["__bin_field_name", "__bin_field_name_end"]
      },
      "range": [0, {"signal": "width"}],
      "nice": true,
      "zero": true
    },
    {
      "name": "y",
      "type": "linear",
      "domain": {"fields": [{"data": "data_0", "field": "__count"}, [0]]},
      "range": [{"signal": "height"}, 0],
      "nice": true,
      "zero": true
    }
  ],
  "axes": [
    {
      "scale": "x",
      "orient": "bottom",
      "grid": true,
      "tickCount": 50,
      "gridScale": "y",
      "domain": false,
      "labels": false,
      "aria": false,
      "maxExtent": 0,
      "minExtent": 0,
      "ticks": false,
      "zindex": 0
    },
    {
      "scale": "y",
      "orient": "left",
      "grid": true,
      "gridScale": "x",
      "tickCount": {"signal": "ceil(height/40)"},
      "domain": false,
      "labels": false,
      "aria": false,
      "maxExtent": 0,
      "minExtent": 0,
      "ticks": false,
      "zindex": 0
    },
    {
      "scale": "x",
      "orient": "bottom",
      "grid": false,
      "title": "col",
      "labelFlush": false,
      "labels": true,
      "tickCount": 50,
      "ticks": true,
      "labelOverlap": true,
      "zindex": 0
    },
    {
      "scale": "y",
      "orient": "left",
      "grid": false,
      "title": "Count of Records",
      "labelFlush": false,
      "labels": true,
      "ticks": true,
      "labelOverlap": true,
      "tickCount": {"signal": "ceil(height/40)"},
      "zindex": 0
    }
  ]
}

""")

vf.runtime.pre_transform_spec(spec, inline_datasets=dict(dataframe=dataframe))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_12/2474008022.py in <cell line: 449>()
    447 """)
    448 
--> 449 vf.runtime.pre_transform_spec(spec, inline_datasets=dict(dataframe=dataframe))

~/.cache/pypoetry/virtualenvs/python-kernel-OtKFaj5M-py3.9/lib/python3.9/site-packages/vegafusion/runtime.py in pre_transform_spec(self, spec, local_tz, default_input_tz, row_limit, preserve_interactivity, inline_datasets, keep_signals, keep_datasets, data_encoding_threshold, data_encoding_format)
    365             try:
    366                 if data_encoding_threshold is None:
--> 367                     new_spec, warnings = self.embedded_runtime.pre_transform_spec(
    368                         spec,
    369                         local_tz=local_tz,

ValueError: External error: task 2 panicked
    Context[0]: tokio error
    Context[1]: Failed to get node value
@jonmmease
Copy link
Collaborator Author

I think what's going on is that the extent transform is including inf in the extent range.

import json
import pandas as pd
import numpy as np
import vegafusion as vf

dataframe = pd.DataFrame({"col": [0, 1, 2, np.inf, 4]})

spec = json.loads(r"""

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "background": "white",
  "padding": 5,
  "width": 200,
  "height": 200,
  "style": "cell",
  "data": [
    {"name": "interval_intervalselection_0_store"},
    {"name": "click_pointselection_0_store"},
    {"name": "dataframe", "url": "vegafusion+dataset://dataframe"},
    {
      "name": "data_0",
      "source": "dataframe",
      "transform": [
        {
          "type": "extent",
          "field": "col",
          "signal": "layer_0_layer_0_bin_maxbins_50_col_extent"
        },
        {
          "type": "formula",
          "expr": "layer_0_layer_0_bin_maxbins_50_col_extent",
          "as": "extent"
        }
      ]
    }
  ]
}
""")

vf.runtime.pre_transform_datasets(spec, ["data_0"], inline_datasets=dict(dataframe=dataframe))
([   col      extent
  0  0.0  [0.0, inf]
  1  1.0  [0.0, inf]
  2  2.0  [0.0, inf]
  3  inf  [0.0, inf]
  4  4.0  [0.0, inf]],
 [])

@jonmmease
Copy link
Collaborator Author

Huh, it looks like in Vega the extent transform returns [null, null] when there are infinite values:

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "background": "white",
  "padding": 5,
  "width": 200,
  "height": 200,
  "style": "cell",
  "data": [
    {"name": "interval_intervalselection_0_store"},
    {"name": "click_pointselection_0_store"},
    {
      "name": "dataframe", 
      "values": [
        {"col": 0},
        {"col": 1},
        {"col": 2},
        {"col": "Infinity"}, 
        {"col": 4}]
    },
    {
      "name": "data_0",
      "source": "dataframe",
      "transform": [
        {
          "type": "formula",
          "expr": "+datum.col",
          "as": "col"
        },
        {
          "type": "extent",
          "field": "col",
          "signal": "layer_0_layer_0_bin_maxbins_50_col_extent"
        },
        {
          "type": "formula",
          "expr": "layer_0_layer_0_bin_maxbins_50_col_extent",
          "as": "extent"
        }
      ]
    }
  ]
}
Screenshot 2024-03-06 at 3 13 16 PM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant