From f2caca51bf8d716e445d7fce6ceb811d6786a792 Mon Sep 17 00:00:00 2001 From: "Egor.Kraev" Date: Mon, 25 Nov 2024 13:13:50 +0000 Subject: [PATCH] minor time series tweaks --- ... interesting segments in time series.ipynb | 218 ++++++++---------- tests/timeseries_wip_entrypoint.py | 3 +- wise_pizza/explain.py | 23 +- wise_pizza/plotting_time_tree.py | 7 +- wise_pizza/slicer.py | 5 + wise_pizza/solve/tree.py | 29 ++- wise_pizza/time.py | 1 + 7 files changed, 149 insertions(+), 137 deletions(-) diff --git a/notebooks/Finding interesting segments in time series.ipynb b/notebooks/Finding interesting segments in time series.ipynb index cc1ad30..f717f1f 100644 --- a/notebooks/Finding interesting segments in time series.ipynb +++ b/notebooks/Finding interesting segments in time series.ipynb @@ -42,20 +42,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "961bc9d1", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\EgorKraev\\AppData\\Local\\Temp\\ipykernel_12296\\3308931027.py:2: DeprecationWarning:\n", - "\n", - "Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display\n", - "\n" - ] - }, { "data": { "text/html": [ @@ -71,7 +61,8 @@ ], "source": [ "# this makes the notebook expand to full width of the browser window\n", - "from IPython.core.display import display, HTML\n", + "from IPython.core.display import HTML\n", + "from IPython.display import display\n", "display(HTML(\"\"))" ] }, @@ -152,28 +143,24 @@ "\n", "The most important choice you have to make here is whether you just want to look at time series behavior for the averages, or also to that of the weights - this is controlled by the `fit_sizes` parameter. `max_depth` works as usual, controlling the maximal number of dimensions any segment can constrain.\n", "\n", - "**explain_timeseries**: Find the most unusual segments in the timeseries\n", + "**explain_timeseries**: \n", + "\n", + "This function divides a time series panel dataset into segments that are as distinct as possible.\n", + "\n", + "Parameters:\n", + "\n", + "- **df**: A pandas DataFrame with the time series data.\n", + "- **dims**: Discrete dimensions to segment by.\n", + "- **total_name**: Name of the column containing totals.\n", + "- **time_name**: Name of the column containing the time values.\n", + "- **num_segments**: Number of segments to find.\n", + "- **size_name** (Optional): Name of the column containing the size of the segment.\n", + "- **max_depth** (Optional, defaults to 2): Maximum number of dimensions to constrain per segment.\n", + "- **fit_sizes** (Optional): Whether to fit the sizes of the segments or just the averages.\n", + "- **n_jobs** (Optional, defaults to 10): Number of jobs to run in parallel when finding segments.\n", + "- **num_breaks** (Optional, defaults to 3): Number of breaks in the stylized time series used for comparing segments.\n", + "\n", "\n", - "- `df`: Dataset\n", - "- `dims`: List of discrete dimensions\n", - "- `total_name`: Name of column that contains totals per segment\n", - "- `size_name`: Name of column containing segment sizes\n", - "- `min_segments`: Minimum number of segments to find\n", - "- `max_segments`: Maximum number of segments to find, defaults to min_segments\n", - "- `min_depth`: Minimum number of dimension to constrain in segment definition\n", - "- `max_depth`: Maximum number of dimension to constrain in segment definition\n", - "- `solver`: If this equals to \"lp\" uses the LP solver, else uses the (recommended) Lasso solver\n", - " - `\"lasso\"`: Lasso-based finder of unusual segments\n", - " - `\"lp\"`: LP-based finder of unusual segments\n", - "- `cluster_values`: In addition to single-value slices, consider slices that consist of a\n", - " group of segments from the same dimension with similar naive averages\n", - " - `True`: to use cluster values, you can them using `sf.relevant_cluster_names`\n", - " - `False`: to use simple segments\n", - "- `verbose`: If set to a truish value, lots of debug info is printed to console, also you can check progressbar\n", - " - `True`: to get info\n", - " - `False`: to get result without info\n", - " \n", - " \n", "- Use `.plot()` to see the plot after fitting:\n", " - `plot_is_static`: static (True) or dynamic (False) plotly result\n", " - `True`: to get static plots (Doesn't work on all platforms yet)\n", @@ -184,43 +171,47 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "id": "0d57a44a", "metadata": { "scrolled": false }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Adding node 1...\n", - "Done!\n", - "Adding node 2...\n", - "Done!\n", - "Adding node 3...\n", - "Done!\n", - "Adding node 4...\n", - "Done!\n", - "Adding node 5...\n", - "Done!\n", - "Adding node 6...\n", - "Done!\n", - "0 {'segment': {'PRODUCT': 'Spend', 'TARGET_CURRENCY': 'TARGET_CURRENCY_cluster_2'}, 'index': 5, 'orig_i': 5, 'total': 22349930640.964485, 'seg_size': 58287375.0, 'naive_avg': 383.44376704156747, 'dummy': array([0, 0, 0, ..., 0, 0, 0])}\n", - "1 {'segment': {'PRODUCT': 'Credit', 'SOURCE_CURRENCY': 'SOURCE_CURRENCY_cluster_1'}, 'index': 0, 'orig_i': 0, 'total': 9418280996.490507, 'seg_size': 23444615.0, 'naive_avg': 401.7247029431069, 'dummy': array([1, 1, 1, ..., 0, 0, 0])}\n", - "2 {'segment': {'PRODUCT': 'Spend', 'TARGET_CURRENCY': 'BRL;CLP;MAD;NZD;XOF'}, 'index': 2, 'orig_i': 2, 'total': 1765844363.260038, 'seg_size': 2330795.0, 'naive_avg': 757.6146178707427, 'dummy': array([0, 0, 0, ..., 0, 0, 0])}\n", - "3 {'segment': {'PRODUCT': 'Transfer', 'TARGET_CURRENCY': 'TARGET_CURRENCY_cluster_2'}, 'index': 6, 'orig_i': 6, 'total': 1760778920.4430783, 'seg_size': 8148355.0, 'naive_avg': 216.09010903956423, 'dummy': array([0, 0, 0, ..., 1, 1, 1])}\n", - "4 {'segment': {'PRODUCT': 'Transfer', 'TARGET_CURRENCY': 'TARGET_CURRENCY_cluster_1'}, 'index': 4, 'orig_i': 4, 'total': 702549273.7995473, 'seg_size': 3479735.0, 'naive_avg': 201.89734959689383, 'dummy': array([0, 0, 0, ..., 0, 0, 0])}\n", - "5 {'segment': {'PRODUCT': 'Spend', 'TARGET_CURRENCY': 'TARGET_CURRENCY_cluster_3'}, 'index': 3, 'orig_i': 3, 'total': 408786271.34145737, 'seg_size': 815440.0, 'naive_avg': 501.30760244954547, 'dummy': array([0, 0, 0, ..., 0, 0, 0])}\n", - "6 {'segment': {'PRODUCT': 'Credit', 'SOURCE_CURRENCY': 'UYU'}, 'index': 1, 'orig_i': 1, 'total': 145067544.57249302, 'seg_size': 320370.0, 'naive_avg': 452.81251232166875, 'dummy': array([0, 0, 0, ..., 0, 0, 0])}\n" - ] + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" }, { "data": { "text/html": [ - "