From 715d59784007002356333ce42a5bb25419586305 Mon Sep 17 00:00:00 2001 From: Serge Retkowsky Date: Mon, 11 Sep 2023 12:44:32 +0200 Subject: [PATCH] Add files via upload --- Utilities/Tokenizer.ipynb | 1313 ++++++++++++++++++++++++ Utilities/meeting_notes_from_audio.txt | 35 + 2 files changed, 1348 insertions(+) create mode 100644 Utilities/Tokenizer.ipynb create mode 100644 Utilities/meeting_notes_from_audio.txt diff --git a/Utilities/Tokenizer.ipynb b/Utilities/Tokenizer.ipynb new file mode 100644 index 0000000..83ec1e2 --- /dev/null +++ b/Utilities/Tokenizer.ipynb @@ -0,0 +1,1313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bbeeb8bc", + "metadata": {}, + "source": [ + "# Tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "eeefd124", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import openai\n", + "import tiktoken\n", + "\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "023347ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openai version = 0.28.0\n" + ] + } + ], + "source": [ + "print(\"openai version =\", openai.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1eb66c65", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(\"azure.env\")\n", + "\n", + "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", + "openai.api_base = os.getenv(\"OPENAI_API_BASE\")\n", + "openai.api_type = \"azure\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "691e1935", + "metadata": {}, + "outputs": [], + "source": [ + "encoding = tiktoken.get_encoding(\"cl100k_base\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8905048a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "encoding = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")\n", + "print(encoding)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "06547221", + "metadata": {}, + "outputs": [], + "source": [ + "file_path = \"meeting_notes_from_audio.txt\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "09ea8601", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " with open(file_path, \"r\") as file:\n", + " meeting_notes = file.read()\n", + "\n", + "except FileNotFoundError:\n", + " print(f\"The file '{file_path}' was not found.\")\n", + "except Exception as e:\n", + " print(f\"An error occurred: {str(e)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "74b82bf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[5159,\n", + " 6574,\n", + " 8554,\n", + " 15590,\n", + " 555,\n", + " 35219,\n", + " 5377,\n", + " 15592,\n", + " 323,\n", + " 35219,\n", + " 39841,\n", + " 3600,\n", + " 13,\n", + " 2696,\n", + " 25,\n", + " 220,\n", + " 806,\n", + " 96253,\n", + " 12,\n", + " 2366,\n", + " 18,\n", + " 220,\n", + " 806,\n", + " 25,\n", + " 2970,\n", + " 25,\n", + " 2491,\n", + " 271,\n", + " 19791,\n", + " 1473,\n", + " 37,\n", + " 396,\n", + " 4842,\n", + " 12623,\n", + " 30037,\n", + " 1047,\n", + " 264,\n", + " 6992,\n", + " 2132,\n", + " 8502,\n", + " 315,\n", + " 220,\n", + " 2366,\n", + " 18,\n", + " 449,\n", + " 264,\n", + " 13254,\n", + " 315,\n", + " 220,\n", + " 6549,\n", + " 3610,\n", + " 323,\n", + " 264,\n", + " 20547,\n", + " 11626,\n", + " 4850,\n", + " 315,\n", + " 220,\n", + " 2970,\n", + " 14697,\n", + " 11205,\n", + " 4272,\n", + " 8070,\n", + " 574,\n", + " 220,\n", + " 845,\n", + " 3610,\n", + " 11,\n", + " 264,\n", + " 5199,\n", + " 5376,\n", + " 505,\n", + " 279,\n", + " 220,\n", + " 605,\n", + " 3610,\n", + " 3970,\n", + " 304,\n", + " 279,\n", + " 1890,\n", + " 8502,\n", + " 315,\n", + " 279,\n", + " 3766,\n", + " 1060,\n", + " 13,\n", + " 23212,\n", + " 11,\n", + " 435,\n", + " 396,\n", + " 4842,\n", + " 12623,\n", + " 706,\n", + " 85957,\n", + " 872,\n", + " 9513,\n", + " 46128,\n", + " 34919,\n", + " 20136,\n", + " 323,\n", + " 29091,\n", + " 304,\n", + " 48197,\n", + " 22359,\n", + " 13166,\n", + " 27460,\n", + " 311,\n", + " 18885,\n", + " 5326,\n", + " 60684,\n", + " 291,\n", + " 4780,\n", + " 13,\n", + " 24296,\n", + " 11,\n", + " 814,\n", + " 617,\n", + " 3970,\n", + " 17808,\n", + " 1217,\n", + " 6650,\n", + " 449,\n", + " 264,\n", + " 445,\n", + " 16027,\n", + " 11547,\n", + " 1741,\n", + " 11595,\n", + " 315,\n", + " 220,\n", + " 18,\n", + " 13,\n", + " 20,\n", + " 13689,\n", + " 323,\n", + " 872,\n", + " 18057,\n", + " 369,\n", + " 279,\n", + " 5108,\n", + " 8502,\n", + " 374,\n", + " 6928,\n", + " 449,\n", + " 220,\n", + " 23,\n", + " 4,\n", + " 8502,\n", + " 29352,\n", + " 58414,\n", + " 6650,\n", + " 13,\n", + " 17830,\n", + " 11,\n", + " 872,\n", + " 435,\n", + " 396,\n", + " 4842,\n", + " 41164,\n", + " 374,\n", + " 743,\n", + " 311,\n", + " 6929,\n", + " 459,\n", + " 67992,\n", + " 902,\n", + " 374,\n", + " 3685,\n", + " 311,\n", + " 4933,\n", + " 220,\n", + " 1049,\n", + " 3610,\n", + " 382,\n", + " 6334,\n", + " 21387,\n", + " 512,\n", + " 12,\n", + " 435,\n", + " 396,\n", + " 4842,\n", + " 12623,\n", + " 30037,\n", + " 1047,\n", + " 264,\n", + " 3831,\n", + " 2132,\n", + " 8502,\n", + " 449,\n", + " 264,\n", + " 220,\n", + " 914,\n", + " 4,\n", + " 44188,\n", + " 56,\n", + " 13254,\n", + " 5376,\n", + " 198,\n", + " 12,\n", + " 43907,\n", + " 11626,\n", + " 4850,\n", + " 574,\n", + " 220,\n", + " 2970,\n", + " 4,\n", + " 4245,\n", + " 311,\n", + " 2853,\n", + " 92126,\n", + " 198,\n", + " 12,\n", + " 13315,\n", + " 300,\n", + " 85007,\n", + " 311,\n", + " 220,\n", + " 1806,\n", + " 13,\n", + " 20,\n", + " 3610,\n", + " 67371,\n", + " 311,\n", + " 264,\n", + " 220,\n", + " 966,\n", + " 4,\n", + " 469,\n", + " 21587,\n", + " 6486,\n", + " 4850,\n", + " 198,\n", + " 12,\n", + " 9558,\n", + " 8070,\n", + " 315,\n", + " 220,\n", + " 845,\n", + " 3610,\n", + " 11,\n", + " 709,\n", + " 505,\n", + " 220,\n", + " 605,\n", + " 3610,\n", + " 198,\n", + " 12,\n", + " 10884,\n", + " 2686,\n", + " 481,\n", + " 3157,\n", + " 14264,\n", + " 4245,\n", + " 311,\n", + " 17626,\n", + " 2027,\n", + " 1584,\n", + " 198,\n", + " 12,\n", + " 13038,\n", + " 291,\n", + " 304,\n", + " 9513,\n", + " 46128,\n", + " 34919,\n", + " 323,\n", + " 13166,\n", + " 27460,\n", + " 198,\n", + " 12,\n", + " 31946,\n", + " 11071,\n", + " 12032,\n", + " 315,\n", + " 220,\n", + " 16,\n", + " 13,\n", + " 20,\n", + " 7239,\n", + " 449,\n", + " 25452,\n", + " 315,\n", + " 220,\n", + " 5067,\n", + " 3610,\n", + " 198,\n", + " 12,\n", + " 58654,\n", + " 311,\n", + " 25452,\n", + " 11595,\n", + " 315,\n", + " 220,\n", + " 16,\n", + " 13,\n", + " 20,\n", + " 720,\n", + " 12,\n", + " 62697,\n", + " 17808,\n", + " 1217,\n", + " 6650,\n", + " 323,\n", + " 4827,\n", + " 6130,\n", + " 24279,\n", + " 2853,\n", + " 198,\n", + " 12,\n", + " 445,\n", + " 16027,\n", + " 356,\n", + " 1741,\n", + " 11595,\n", + " 315,\n", + " 220,\n", + " 18,\n", + " 13,\n", + " 20,\n", + " 14062,\n", + " 12,\n", + " 5273,\n", + " 520,\n", + " 5326,\n", + " 1646,\n", + " 304,\n", + " 2035,\n", + " 198,\n", + " 12,\n", + " 56775,\n", + " 369,\n", + " 5108,\n", + " 8502,\n", + " 315,\n", + " 220,\n", + " 8878,\n", + " 3610,\n", + " 13254,\n", + " 198,\n", + " 12,\n", + " 3216,\n", + " 5065,\n", + " 67992,\n", + " 315,\n", + " 11728,\n", + " 12623,\n", + " 198,\n", + " 12,\n", + " 56775,\n", + " 19738,\n", + " 6650,\n", + " 15174,\n", + " 271,\n", + " 2573,\n", + " 19974,\n", + " 1473,\n", + " 16,\n", + " 13,\n", + " 13038,\n", + " 220,\n", + " 914,\n", + " 11,\n", + " 931,\n", + " 11,\n", + " 931,\n", + " 304,\n", + " 48197,\n", + " 22359,\n", + " 13166,\n", + " 27460,\n", + " 13,\n", + " 720,\n", + " 17,\n", + " 13,\n", + " 423,\n", + " 1986,\n", + " 1463,\n", + " 9513,\n", + " 22126,\n", + " 34919,\n", + " 20136,\n", + " 13,\n", + " 720,\n", + " 18,\n", + " 13,\n", + " 13038,\n", + " 17345,\n", + " 304,\n", + " 52508,\n", + " 1534,\n", + " 11897,\n", + " 30255,\n", + " 323,\n", + " 20658,\n", + " 21515,\n", + " 46128,\n", + " 34919,\n", + " 13,\n", + " 720,\n", + " 19,\n", + " 13,\n", + " 51241,\n", + " 1579,\n", + " 7692,\n", + " 19523,\n", + " 2027,\n", + " 1584,\n", + " 13,\n", + " 720,\n", + " 20,\n", + " 13,\n", + " 8000,\n", + " 502,\n", + " 4997,\n", + " 78,\n", + " 37713,\n", + " 5452,\n", + " 13,\n", + " 720,\n", + " 21,\n", + " 13,\n", + " 32175,\n", + " 907,\n", + " 520,\n", + " 5326,\n", + " 1646,\n", + " 449,\n", + " 264,\n", + " 220,\n", + " 1484,\n", + " 4,\n", + " 12410,\n", + " 2237,\n", + " 13,\n", + " 720,\n", + " 22,\n", + " 13,\n", + " 86380,\n", + " 15692,\n", + " 5603,\n", + " 311,\n", + " 18646,\n", + " 33164,\n", + " 13,\n", + " 720,\n", + " 23,\n", + " 13,\n", + " 24083,\n", + " 435,\n", + " 396,\n", + " 4842,\n", + " 41164,\n", + " 11728,\n", + " 12623,\n", + " 67992,\n", + " 311,\n", + " 4933,\n", + " 220,\n", + " 1049,\n", + " 3610,\n", + " 13,\n", + " 720,\n", + " 24,\n", + " 13,\n", + " 32175,\n", + " 19738,\n", + " 6650,\n", + " 15174,\n", + " 382,\n", + " 791,\n", + " 27065,\n", + " 315,\n", + " 279,\n", + " 1495,\n", + " 374,\n", + " 8965,\n", + " 6928,\n", + " 13,\n", + " 578,\n", + " 4221,\n", + " 1511,\n", + " 390,\n", + " 50369,\n", + " 36232,\n", + " 323,\n", + " 54508,\n", + " 11,\n", + " 39686,\n", + " 279,\n", + " 16358,\n", + " 6650,\n", + " 323,\n", + " 6020,\n", + " 5178,\n", + " 315,\n", + " 279,\n", + " 2883,\n", + " 13,\n", + " 578,\n", + " 12432,\n", + " 1101,\n", + " 61120,\n", + " 46135,\n", + " 311,\n", + " 41777,\n", + " 11,\n", + " 902,\n", + " 15151,\n", + " 264,\n", + " 6928,\n", + " 19451,\n", + " 13,\n", + " 23212,\n", + " 11,\n", + " 279,\n", + " 2317,\n", + " 315,\n", + " 279,\n", + " 10430,\n", + " 374,\n", + " 6928,\n", + " 11,\n", + " 449,\n", + " 279,\n", + " 12432,\n", + " 38787,\n", + " 264,\n", + " 6992,\n", + " 8502,\n", + " 323,\n", + " 264,\n", + " 6992,\n", + " 14827,\n", + " 67992,\n", + " 13]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokens_integer = encoding.encode(meeting_notes)\n", + "tokens_integer" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fbe71cce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "535 is the number of tokens in my text\n" + ] + } + ], + "source": [ + "print(f\"{len(tokens_integer)} is the number of tokens in my text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7e44a2fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[b'My',\n", + " b' meeting',\n", + " b' notes',\n", + " b' processed',\n", + " b' by',\n", + " b' Azure',\n", + " b' Open',\n", + " b' AI',\n", + " b' and',\n", + " b' Azure',\n", + " b' Speech',\n", + " b' services',\n", + " b'.',\n", + " b' Date',\n", + " b':',\n", + " b' ',\n", + " b'11',\n", + " b'-Sep',\n", + " b'-',\n", + " b'202',\n", + " b'3',\n", + " b' ',\n", + " b'11',\n", + " b':',\n", + " b'58',\n", + " b':',\n", + " b'49',\n", + " b'\\n\\n',\n", + " b'Summary',\n", + " b':\\n\\n',\n", + " b'F',\n", + " b'int',\n", + " b'ech',\n", + " b' Plus',\n", + " b' Sync',\n", + " b' had',\n", + " b' a',\n", + " b' successful',\n", + " b' second',\n", + " b' quarter',\n", + " b' of',\n", + " b' ',\n", + " b'202',\n", + " b'3',\n", + " b' with',\n", + " b' a',\n", + " b' revenue',\n", + " b' of',\n", + " b' ',\n", + " b'125',\n", + " b' million',\n", + " b' and',\n", + " b' a',\n", + " b' gross',\n", + " b' profit',\n", + " b' margin',\n", + " b' of',\n", + " b' ',\n", + " b'58',\n", + " b'%.',\n", + " b' Their',\n", + " b' net',\n", + " b' income',\n", + " b' was',\n", + " b' ',\n", + " b'16',\n", + " b' million',\n", + " b',',\n", + " b' a',\n", + " b' significant',\n", + " b' increase',\n", + " b' from',\n", + " b' the',\n", + " b' ',\n", + " b'10',\n", + " b' million',\n", + " b' seen',\n", + " b' in',\n", + " b' the',\n", + " b' same',\n", + " b' quarter',\n", + " b' of',\n", + " b' the',\n", + " b' previous',\n", + " b' year',\n", + " b'.',\n", + " b' Additionally',\n", + " b',',\n", + " b' F',\n", + " b'int',\n", + " b'ech',\n", + " b' Plus',\n", + " b' has',\n", + " b' diversified',\n", + " b' their',\n", + " b' asset',\n", + " b'-backed',\n", + " b' securities',\n", + " b' portfolio',\n", + " b' and',\n", + " b' invested',\n", + " b' in',\n", + " b' AAA',\n", + " b' rated',\n", + " b' corporate',\n", + " b' bonds',\n", + " b' to',\n", + " b' enhance',\n", + " b' risk',\n", + " b'-adjust',\n", + " b'ed',\n", + " b' returns',\n", + " b'.',\n", + " b' Furthermore',\n", + " b',',\n", + " b' they',\n", + " b' have',\n", + " b' seen',\n", + " b' organic',\n", + " b' user',\n", + " b' growth',\n", + " b' with',\n", + " b' a',\n", + " b' L',\n", + " b'TV',\n", + " b'/C',\n", + " b'AC',\n", + " b' ratio',\n", + " b' of',\n", + " b' ',\n", + " b'3',\n", + " b'.',\n", + " b'5',\n", + " b'%,',\n", + " b' and',\n", + " b' their',\n", + " b' forecast',\n", + " b' for',\n", + " b' the',\n", + " b' coming',\n", + " b' quarter',\n", + " b' is',\n", + " b' positive',\n", + " b' with',\n", + " b' ',\n", + " b'8',\n", + " b'%',\n", + " b' quarter',\n", + " b'-over',\n", + " b'-quarter',\n", + " b' growth',\n", + " b'.',\n", + " b' Finally',\n", + " b',',\n", + " b' their',\n", + " b' F',\n", + " b'int',\n", + " b'ech',\n", + " b' subsidiary',\n", + " b' is',\n", + " b' set',\n", + " b' to',\n", + " b' conduct',\n", + " b' an',\n", + " b' IPO',\n", + " b' which',\n", + " b' is',\n", + " b' expected',\n", + " b' to',\n", + " b' raise',\n", + " b' ',\n", + " b'200',\n", + " b' million',\n", + " b'.\\n\\n',\n", + " b'Main',\n", + " b' Points',\n", + " b':\\n',\n", + " b'-',\n", + " b' F',\n", + " b'int',\n", + " b'ech',\n", + " b' Plus',\n", + " b' Sync',\n", + " b' had',\n", + " b' a',\n", + " b' strong',\n", + " b' second',\n", + " b' quarter',\n", + " b' with',\n", + " b' a',\n", + " b' ',\n", + " b'25',\n", + " b'%',\n", + " b' Yo',\n", + " b'Y',\n", + " b' revenue',\n", + " b' increase',\n", + " b'\\n',\n", + " b'-',\n", + " b' Gross',\n", + " b' profit',\n", + " b' margin',\n", + " b' was',\n", + " b' ',\n", + " b'58',\n", + " b'%',\n", + " b' due',\n", + " b' to',\n", + " b' cost',\n", + " b' efficiencies',\n", + " b'\\n',\n", + " b'-',\n", + " b' Bet',\n", + " b'as',\n", + " b' surged',\n", + " b' to',\n", + " b' ',\n", + " b'37',\n", + " b'.',\n", + " b'5',\n", + " b' million',\n", + " b' translating',\n", + " b' to',\n", + " b' a',\n", + " b' ',\n", + " b'30',\n", + " b'%',\n", + " b' E',\n", + " b'BIT',\n", + " b'DA',\n", + " b' margin',\n", + " b'\\n',\n", + " b'-',\n", + " b' Net',\n", + " b' income',\n", + " b' of',\n", + " b' ',\n", + " b'16',\n", + " b' million',\n", + " b',',\n", + " b' up',\n", + " b' from',\n", + " b' ',\n", + " b'10',\n", + " b' million',\n", + " b'\\n',\n", + " b'-',\n", + " b' Total',\n", + " b' address',\n", + " b'able',\n", + " b' market',\n", + " b' grew',\n", + " b' due',\n", + " b' to',\n", + " b' expanded',\n", + " b' product',\n", + " b' line',\n", + " b'\\n',\n", + " b'-',\n", + " b' Invest',\n", + " b'ed',\n", + " b' in',\n", + " b' asset',\n", + " b'-backed',\n", + " b' securities',\n", + " b' and',\n", + " b' corporate',\n", + " b' bonds',\n", + " b'\\n',\n", + " b'-',\n", + " b' Balance',\n", + " b' sheet',\n", + " b' assets',\n", + " b' of',\n", + " b' ',\n", + " b'1',\n", + " b'.',\n", + " b'5',\n", + " b' billion',\n", + " b' with',\n", + " b' equity',\n", + " b' of',\n", + " b' ',\n", + " b'600',\n", + " b' million',\n", + " b'\\n',\n", + " b'-',\n", + " b' Debt',\n", + " b' to',\n", + " b' equity',\n", + " b' ratio',\n", + " b' of',\n", + " b' ',\n", + " b'1',\n", + " b'.',\n", + " b'5',\n", + " b' \\n',\n", + " b'-',\n", + " b' Increased',\n", + " b' organic',\n", + " b' user',\n", + " b' growth',\n", + " b' and',\n", + " b' lower',\n", + " b' customer',\n", + " b' acquisition',\n", + " b' cost',\n", + " b'\\n',\n", + " b'-',\n", + " b' L',\n", + " b'TV',\n", + " b' C',\n", + " b'AC',\n", + " b' ratio',\n", + " b' of',\n", + " b' ',\n", + " b'3',\n", + " b'.',\n", + " b'5',\n", + " b'%\\n',\n", + " b'-',\n", + " b' Value',\n", + " b' at',\n", + " b' risk',\n", + " b' model',\n", + " b' in',\n", + " b' place',\n", + " b'\\n',\n", + " b'-',\n", + " b' Forecast',\n", + " b' for',\n", + " b' coming',\n", + " b' quarter',\n", + " b' of',\n", + " b' ',\n", + " b'135',\n", + " b' million',\n", + " b' revenue',\n", + " b'\\n',\n", + " b'-',\n", + " b' Up',\n", + " b'coming',\n", + " b' IPO',\n", + " b' of',\n", + " b' Pay',\n", + " b' Plus',\n", + " b'\\n',\n", + " b'-',\n", + " b' Forecast',\n", + " b' aggressive',\n", + " b' growth',\n", + " b' strategies',\n", + " b'\\n\\n',\n", + " b'Action',\n", + " b' Items',\n", + " b':\\n\\n',\n", + " b'1',\n", + " b'.',\n", + " b' Invest',\n", + " b' ',\n", + " b'25',\n", + " b',',\n", + " b'000',\n", + " b',',\n", + " b'000',\n", + " b' in',\n", + " b' AAA',\n", + " b' rated',\n", + " b' corporate',\n", + " b' bonds',\n", + " b'.',\n", + " b' \\n',\n", + " b'2',\n", + " b'.',\n", + " b' D',\n", + " b'ivers',\n", + " b'ify',\n", + " b' asset',\n", + " b' backed',\n", + " b' securities',\n", + " b' portfolio',\n", + " b'.',\n", + " b' \\n',\n", + " b'3',\n", + " b'.',\n", + " b' Invest',\n", + " b' heavily',\n", + " b' in',\n", + " b' collateral',\n", + " b'ized',\n", + " b' debt',\n", + " b' obligations',\n", + " b' and',\n", + " b' residential',\n", + " b' mortgage',\n", + " b'-backed',\n", + " b' securities',\n", + " b'.',\n", + " b' \\n',\n", + " b'4',\n", + " b'.',\n", + " b' Expand',\n", + " b' high',\n", + " b' yield',\n", + " b' savings',\n", + " b' product',\n", + " b' line',\n", + " b'.',\n", + " b' \\n',\n", + " b'5',\n", + " b'.',\n", + " b' Develop',\n", + " b' new',\n", + " b' Rob',\n", + " b'o',\n", + " b' advisor',\n", + " b' platform',\n", + " b'.',\n", + " b' \\n',\n", + " b'6',\n", + " b'.',\n", + " b' Implement',\n", + " b' value',\n", + " b' at',\n", + " b' risk',\n", + " b' model',\n", + " b' with',\n", + " b' a',\n", + " b' ',\n", + " b'99',\n", + " b'%',\n", + " b' confidence',\n", + " b' level',\n", + " b'.',\n", + " b' \\n',\n", + " b'7',\n", + " b'.',\n", + " b' Adopt',\n", + " b' conservative',\n", + " b' approach',\n", + " b' to',\n", + " b' managing',\n", + " b' leverage',\n", + " b'.',\n", + " b' \\n',\n", + " b'8',\n", + " b'.',\n", + " b' Launch',\n", + " b' F',\n", + " b'int',\n", + " b'ech',\n", + " b' subsidiary',\n", + " b' Pay',\n", + " b' Plus',\n", + " b' IPO',\n", + " b' to',\n", + " b' raise',\n", + " b' ',\n", + " b'200',\n", + " b' million',\n", + " b'.',\n", + " b' \\n',\n", + " b'9',\n", + " b'.',\n", + " b' Implement',\n", + " b' aggressive',\n", + " b' growth',\n", + " b' strategies',\n", + " b'.\\n\\n',\n", + " b'The',\n", + " b' sentiment',\n", + " b' of',\n", + " b' the',\n", + " b' text',\n", + " b' is',\n", + " b' generally',\n", + " b' positive',\n", + " b'.',\n", + " b' The',\n", + " b' language',\n", + " b' used',\n", + " b' con',\n", + " b'veys',\n", + " b' enthusiasm',\n", + " b' and',\n", + " b' optimism',\n", + " b',',\n", + " b' highlighting',\n", + " b' the',\n", + " b' impressive',\n", + " b' growth',\n", + " b' and',\n", + " b' financial',\n", + " b' performance',\n", + " b' of',\n", + " b' the',\n", + " b' company',\n", + " b'.',\n", + " b' The',\n", + " b' CEO',\n", + " b' also',\n", + " b' expresses',\n", + " b' gratitude',\n", + " b' to',\n", + " b' shareholders',\n", + " b',',\n", + " b' which',\n", + " b' indicates',\n", + " b' a',\n", + " b' positive',\n", + " b' attitude',\n", + " b'.',\n", + " b' Additionally',\n", + " b',',\n", + " b' the',\n", + " b' context',\n", + " b' of',\n", + " b' the',\n", + " b' discussion',\n", + " b' is',\n", + " b' positive',\n", + " b',',\n", + " b' with',\n", + " b' the',\n", + " b' CEO',\n", + " b' announcing',\n", + " b' a',\n", + " b' successful',\n", + " b' quarter',\n", + " b' and',\n", + " b' a',\n", + " b' successful',\n", + " b' upcoming',\n", + " b' IPO',\n", + " b'.']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokens_string = [encoding.decode_single_token_bytes(token) for token in tokens_integer]\n", + "tokens_string" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3c8cdf73", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 is the number of token included in role\n", + "14 is the number of token included in content\n", + "22 number of tokens to be sent in our request\n" + ] + } + ], + "source": [ + "message = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Explain to me how tokenization is working in OpenAI models?\",\n", + " }\n", + "]\n", + "\n", + "tokens_per_message = 4\n", + "\n", + "num_tokens = 0\n", + "num_tokens += tokens_per_message\n", + "\n", + "for key, value in message[0].items():\n", + " text = value\n", + " num_tokens += len(encoding.encode(value))\n", + " print(f\"{len(encoding.encode(value))} is the number of token included in {key}\")\n", + "\n", + "num_tokens += 3\n", + "# every reply is primed with <|start|>assistant<|message|>\n", + "\n", + "print(f\"{num_tokens} number of tokens to be sent in our request\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9236a00", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10 - SDK v2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Utilities/meeting_notes_from_audio.txt b/Utilities/meeting_notes_from_audio.txt new file mode 100644 index 0000000..5cdfec7 --- /dev/null +++ b/Utilities/meeting_notes_from_audio.txt @@ -0,0 +1,35 @@ +My meeting notes processed by Azure Open AI and Azure Speech services. Date: 11-Sep-2023 11:58:49 + +Summary: + +Fintech Plus Sync had a successful second quarter of 2023 with a revenue of 125 million and a gross profit margin of 58%. Their net income was 16 million, a significant increase from the 10 million seen in the same quarter of the previous year. Additionally, Fintech Plus has diversified their asset-backed securities portfolio and invested in AAA rated corporate bonds to enhance risk-adjusted returns. Furthermore, they have seen organic user growth with a LTV/CAC ratio of 3.5%, and their forecast for the coming quarter is positive with 8% quarter-over-quarter growth. Finally, their Fintech subsidiary is set to conduct an IPO which is expected to raise 200 million. + +Main Points: +- Fintech Plus Sync had a strong second quarter with a 25% YoY revenue increase +- Gross profit margin was 58% due to cost efficiencies +- Betas surged to 37.5 million translating to a 30% EBITDA margin +- Net income of 16 million, up from 10 million +- Total addressable market grew due to expanded product line +- Invested in asset-backed securities and corporate bonds +- Balance sheet assets of 1.5 billion with equity of 600 million +- Debt to equity ratio of 1.5 +- Increased organic user growth and lower customer acquisition cost +- LTV CAC ratio of 3.5% +- Value at risk model in place +- Forecast for coming quarter of 135 million revenue +- Upcoming IPO of Pay Plus +- Forecast aggressive growth strategies + +Action Items: + +1. Invest 25,000,000 in AAA rated corporate bonds. +2. Diversify asset backed securities portfolio. +3. Invest heavily in collateralized debt obligations and residential mortgage-backed securities. +4. Expand high yield savings product line. +5. Develop new Robo advisor platform. +6. Implement value at risk model with a 99% confidence level. +7. Adopt conservative approach to managing leverage. +8. Launch Fintech subsidiary Pay Plus IPO to raise 200 million. +9. Implement aggressive growth strategies. + +The sentiment of the text is generally positive. The language used conveys enthusiasm and optimism, highlighting the impressive growth and financial performance of the company. The CEO also expresses gratitude to shareholders, which indicates a positive attitude. Additionally, the context of the discussion is positive, with the CEO announcing a successful quarter and a successful upcoming IPO. \ No newline at end of file