diff --git a/examples/bench.ipynb b/examples/bench.ipynb new file mode 100644 index 0000000..137ecd4 --- /dev/null +++ b/examples/bench.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import polars as pl\n", + "import polars_talib as plta\n", + "import pandas as pd\n", + "import talib.abstract as ta" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "p = pl.scan_parquet(\"us_market_cap2000.parquet\").select(\n", + " pl.col(\"Date\"), pl.col(\"Ticker\").alias(\"Symbol\"), pl.selectors.float().name.to_lowercase()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "spend_records = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.collect()\n", + "end_t = time.time()\n", + "pl_read_spend = end_t - start_t\n", + "spend_records[\"pl_read\"] = pl_read_spend" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.sma(timeperiod=5).over(\"Symbol\").alias(\"sma5\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_sma_spend_t = end_t - start_t\n", + "spend_records[\"pl_sma\"] = pl_sma_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df = (\n", + " pd.read_parquet(\"us_market_cap2000.parquet\")\n", + " .set_index([\"Ticker\", \"Date\"])\n", + " .rename(columns={c: c.lower() for c in [\"Open\", \"High\", \"Low\", \"Close\"]})\n", + ")\n", + "end_t = time.time()\n", + "pd_read_spend = end_t - start_t\n", + "spend_records[\"pd_read\"] = pd_read_spend" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df[\"sma5\"] = df.groupby(\"Ticker\")[\"close\"].transform(lambda x: ta.SMA(x, timeperiod=5))\n", + "end_t = time.time()\n", + "pd_sma_spend_t = end_t - start_t\n", + "spend_records[\"pd_sma\"] = pd_sma_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.macd(fastperiod=10, slowperiod=20, signalperiod=5).over(\"Symbol\").alias(\"macd\"),\n", + ").with_columns(\n", + " pl.col(\"macd\").struct.field(\"macd\"),\n", + " pl.col(\"macd\").struct.field(\"macdsignal\"),\n", + " pl.col(\"macd\").struct.field(\"macdhist\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_macd_spend_t = end_t - start_t\n", + "spend_records[\"pl_macd\"] = pl_macd_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "g = df.groupby(\"Ticker\")[\"close\"]\n", + "df[\"macd\"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[0])\n", + "df[\"macdsignal\"] = g.transform(\n", + " lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[1]\n", + ")\n", + "df[\"macdhist\"] = g.transform(lambda x: ta.MACD(x, fastperiod=10, slowperiod=20, signalperiod=5)[2])\n", + "end_t = time.time()\n", + "pd_macd_spend_t = end_t - start_t\n", + "spend_records[\"pd_macd\"] = pd_macd_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.stoch(\n", + " pl.col(\"high\"),\n", + " pl.col(\"low\"),\n", + " pl.col(\"close\"),\n", + " fastk_period=14,\n", + " slowk_period=7,\n", + " slowd_period=7,\n", + " )\n", + " .over(\"Symbol\")\n", + " .alias(\"stoch\"),\n", + ").with_columns(\n", + " pl.col(\"stoch\").struct.field(\"slowk\"),\n", + " pl.col(\"stoch\").struct.field(\"slowd\"),\n", + ").select(pl.exclude(\"stoch\")).collect()\n", + "end_t = time.time()\n", + "pl_stoch_spend_t = end_t - start_t\n", + "spend_records[\"pl_stoch\"] = pl_stoch_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "g = df.groupby(\"Ticker\")\n", + "df[\"slowk\"] = g.apply(\n", + " lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)\n", + ").droplevel(0)[\"slowk\"]\n", + "df[\"slowd\"] = g.apply(\n", + " lambda x: ta.STOCH(x, fastk_period=14, slowk_period=7, slowd_period=7)\n", + ").droplevel(0)[\"slowd\"]\n", + "end_t = time.time()\n", + "pd_stoch_spend_t = end_t - start_t\n", + "spend_records[\"pd_stoch\"] = pd_stoch_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "p.with_columns(\n", + " plta.wclprice().over(\"Symbol\").alias(\"wclprice\"),\n", + ").collect()\n", + "end_t = time.time()\n", + "pl_wclprice_spend_t = end_t - start_t\n", + "spend_records[\"pl_wclprice\"] = pl_wclprice_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "start_t = time.time()\n", + "df[\"wclprice\"] = df.groupby(\"Ticker\").apply(lambda x: ta.WCLPRICE(x)).droplevel(0)\n", + "end_t = time.time()\n", + "pd_wclprice_spend_t = end_t - start_t\n", + "spend_records[\"pd_wclprice\"] = pd_wclprice_spend_t" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "df_bench = pl.DataFrame(\n", + " {\n", + " \"kind\": [k.split(\"_\")[0] for k in spend_records.keys()],\n", + " \"op\": [k.split(\"_\")[1] for k in spend_records.keys()],\n", + " \"time\": [v for v in spend_records.values()],\n", + " }\n", + ").with_columns(\n", + " pl.when(pl.col(\"kind\")==\"pl\").then(pl.lit(\"polars\")).otherwise(pl.lit(\"pandas\")).alias(\"stack\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
kind | op | time | stack |
---|---|---|---|
str | str | f64 | str |
"pl" | "read" | 0.427506 | "polars" |
"pl" | "sma" | 0.399857 | "polars" |
"pd" | "read" | 1.490565 | "pandas" |
"pd" | "sma" | 1.955835 | "pandas" |
"pl" | "macd" | 0.63583 | "polars" |
"pd" | "macd" | 5.529319 | "pandas" |
"pl" | "stoch" | 1.062676 | "polars" |
"pd" | "stoch" | 8.159397 | "pandas" |
"pl" | "wclprice" | 0.607319 | "polars" |
"pd" | "wclprice" | 3.896856 | "pandas" |