From fe5f8547a332b99c48b8149961ddeb0ef181f5cf Mon Sep 17 00:00:00 2001 From: yvictor Date: Thu, 6 Jun 2024 15:44:10 +0800 Subject: [PATCH] feat: add bench plot --- examples/bench.ipynb | 394 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 examples/bench.ipynb diff --git a/examples/bench.ipynb b/examples/bench.ipynb new file mode 100644 index 0000000..137ecd4 --- /dev/null +++ b/examples/bench.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import polars as pl\n", + "import polars_talib as plta\n", + "import pandas as pd\n", + "import talib.abstract as ta" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "p = pl.scan_parquet(\"us_market_cap2000.parquet\").select(\n", + " pl.col(\"Date\"), pl.col(\"Ticker\").alias(\"Symbol\"), pl.selectors.float().name.to_lowercase()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "spend_records = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.collect()\n", + "end_t = time.time()\n", + "pl_read_spend = end_t - start_t\n", + "spend_records[\"pl_read\"] = pl_read_spend" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.sma(timeperiod=5).over(\"Symbol\").alias(\"sma5\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_sma_spend_t = end_t - start_t\n", + "spend_records[\"pl_sma\"] = pl_sma_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df = (\n", + " pd.read_parquet(\"us_market_cap2000.parquet\")\n", + " .set_index([\"Ticker\", \"Date\"])\n", + " .rename(columns={c: c.lower() for c in [\"Open\", \"High\", \"Low\", \"Close\"]})\n", + ")\n", + "end_t = time.time()\n", + "pd_read_spend = end_t - start_t\n", + "spend_records[\"pd_read\"] = pd_read_spend" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df[\"sma5\"] = df.groupby(\"Ticker\")[\"close\"].transform(lambda x: ta.SMA(x, timeperiod=5))\n", + "end_t = time.time()\n", + "pd_sma_spend_t = end_t - start_t\n", + "spend_records[\"pd_sma\"] = pd_sma_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.macd(fastperiod=10, slowperiod=20, signalperiod=5).over(\"Symbol\").alias(\"macd\"),\n", + ").with_columns(\n", + " pl.col(\"macd\").struct.field(\"macd\"),\n", + " pl.col(\"macd\").struct.field(\"macdsignal\"),\n", + " pl.col(\"macd\").struct.field(\"macdhist\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_macd_spend_t = end_t - start_t\n", + "spend_records[\"pl_macd\"] = pl_macd_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "g = df.groupby(\"Ticker\")[\"close\"]\n", + "df[\"macd\"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[0])\n", + "df[\"macdsignal\"] = g.transform(\n", + " lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[1]\n", + ")\n", + "df[\"macdhist\"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[2])\n", + "end_t = time.time()\n", + "pd_macd_spend_t = end_t - start_t\n", + "spend_records[\"pd_macd\"] = pd_macd_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.stoch(\n", + " pl.col(\"high\"),\n", + " pl.col(\"low\"),\n", + " pl.col(\"close\"),\n", + " fastk_period=14,\n", + " slowk_period=7,\n", + " slowd_period=7,\n", + " )\n", + " .over(\"Symbol\")\n", + " .alias(\"stoch\"),\n", + ").with_columns(\n", + " pl.col(\"stoch\").struct.field(\"slowk\"),\n", + " pl.col(\"stoch\").struct.field(\"slowd\"),\n", + ").select(pl.exclude(\"stoch\")).collect()\n", + "end_t = time.time()\n", + "pl_stoch_spend_t = end_t - start_t\n", + "spend_records[\"pl_stoch\"] = pl_stoch_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "g = df.groupby(\"Ticker\")\n", + "df[\"slowk\"] = g.apply(\n", + " lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)\n", + ").droplevel(0)[\"slowk\"]\n", + "df[\"slowd\"] = g.apply(\n", + " lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)\n", + ").droplevel(0)[\"slowd\"]\n", + "end_t = time.time()\n", + "pd_stoch_spend_t = end_t - start_t\n", + "spend_records[\"pd_stoch\"] = pd_stoch_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.wclprice().over(\"Symbol\").alias(\"wclprice\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_wclprice_spend_t = end_t - start_t\n", + "spend_records[\"pl_wclprice\"] = pl_wclprice_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df[\"wclprice\"] = df.groupby(\"Ticker\").apply(lambda x: ta.WCLPRICE(x)).droplevel(0)\n", + "end_t = time.time()\n", + "pd_wclprice_spend_t = end_t - start_t\n", + "spend_records[\"pd_wclprice\"] = pd_wclprice_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "df_bench = pl.DataFrame(\n", + " {\n", + " \"kind\": [k.split(\"_\")[0] for k in spend_records.keys()],\n", + " \"op\": [k.split(\"_\")[1] for k in spend_records.keys()],\n", + " \"time\": [v for v in spend_records.values()],\n", + " }\n", + ").with_columns(\n", + " pl.when(pl.col(\"kind\")==\"pl\").then(pl.lit(\"polars\")).otherwise(pl.lit(\"pandas\")).alias(\"stack\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 4)
kindoptimestack
strstrf64str
"pl""read"0.427506"polars"
"pl""sma"0.399857"polars"
"pd""read"1.490565"pandas"
"pd""sma"1.955835"pandas"
"pl""macd"0.63583"polars"
"pd""macd"5.529319"pandas"
"pl""stoch"1.062676"polars"
"pd""stoch"8.159397"pandas"
"pl""wclprice"0.607319"polars"
"pd""wclprice"3.896856"pandas"
" + ], + "text/plain": [ + "shape: (10, 4)\n", + "┌──────┬──────────┬──────────┬────────┐\n", + "│ kind ┆ op ┆ time ┆ stack │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ f64 ┆ str │\n", + "╞══════╪══════════╪══════════╪════════╡\n", + "│ pl ┆ read ┆ 0.427506 ┆ polars │\n", + "│ pl ┆ sma ┆ 0.399857 ┆ polars │\n", + "│ pd ┆ read ┆ 1.490565 ┆ pandas │\n", + "│ pd ┆ sma ┆ 1.955835 ┆ pandas │\n", + "│ pl ┆ macd ┆ 0.63583 ┆ polars │\n", + "│ pd ┆ macd ┆ 5.529319 ┆ pandas │\n", + "│ pl ┆ stoch ┆ 1.062676 ┆ polars │\n", + "│ pd ┆ stoch ┆ 8.159397 ┆ pandas │\n", + "│ pl ┆ wclprice ┆ 0.607319 ┆ polars │\n", + "│ pd ┆ wclprice ┆ 3.896856 ┆ pandas │\n", + "└──────┴──────────┴──────────┴────────┘" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_bench" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Bars [op,stack] (time)" + ] + }, + "execution_count": 27, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p1193" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "df_bench.plot.bar(x=\"op\", y=\"time\", by=\"stack\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}