diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 71e23b2..eaf304c 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -8,14 +8,14 @@ pool:
   vmImage: 'ubuntu-latest'
 strategy:
   matrix:
-    Python38:
-      python.version: '3.8'
     Python39:
       python.version: '3.9'
     Python310:
       python.version: '3.10'
     Python311:
       python.version: '3.11'
+    Python312:
+      python.version: '3.12'
 steps:
 - task: UsePythonVersion@0
   inputs:
diff --git a/notebooks/1_Generate_data.ipynb b/notebooks/1_Generate_data.ipynb
index f685cc1..78da83c 100644
--- a/notebooks/1_Generate_data.ipynb
+++ b/notebooks/1_Generate_data.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "is_executing": true
    },
@@ -29,9 +29,8 @@
     "from pathlib import Path\n",
     "from typing import Dict, List\n",
     "\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
-    "import tqdm\n",
+    "import numpy as np\n",
     "\n",
     "from presidio_evaluator import InputSample\n",
     "from presidio_evaluator.data_generator import PresidioSentenceFaker"
@@ -70,11 +69,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "is_executing": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using default entity providers\n",
+      "Using default entity mapping between the entities in the templates and the ones in the output dataset\n",
+      "Using default provider aliases\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Sampling: 100%|██████████| 10/10 [00:00<00:00, 3959.88it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Please send it to {{STREET_ADDRESS}}\n",
+      "[Span(type: address, value: the corner of Καλαμπάκα 33 and Stefan Land, char_span: [18: 60])]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "sentence_templates = [\n",
     "    \"My name is {{name}}\",\n",
@@ -83,7 +114,9 @@
     "]\n",
     "\n",
     "\n",
-    "sentence_faker = PresidioSentenceFaker('en_US', lower_case_ratio=0.05, sentence_templates=sentence_templates)\n",
+    "sentence_faker = PresidioSentenceFaker('en_US', \n",
+    "                                       lower_case_ratio=0.05, \n",
+    "                                       sentence_templates=sentence_templates)\n",
     "fake_sentence_results = sentence_faker.generate_new_fake_sentences(10)\n",
     "\n",
     "# Print the spans of the first sample\n",
@@ -103,7 +136,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "is_executing": true,
     "scrolled": true
@@ -138,18 +171,242 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using default entity providers\n",
+      "Using default entity mapping between the entities in the templates and the ones in the output dataset\n",
+      "Using default provider aliases\n"
+     ]
+    }
+   ],
    "source": [
     "sentence_faker = PresidioSentenceFaker('en_US', lower_case_ratio=0.05)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>number</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>nationality</th>\n",
+       "      <th>prefix</th>\n",
+       "      <th>first_name</th>\n",
+       "      <th>middle_initial</th>\n",
+       "      <th>last_name</th>\n",
+       "      <th>street_name</th>\n",
+       "      <th>city</th>\n",
+       "      <th>state_abbr</th>\n",
+       "      <th>...</th>\n",
+       "      <th>company</th>\n",
+       "      <th>domain_name</th>\n",
+       "      <th>person</th>\n",
+       "      <th>name</th>\n",
+       "      <th>first_name_female</th>\n",
+       "      <th>first_name_male</th>\n",
+       "      <th>prefix_female</th>\n",
+       "      <th>prefix_male</th>\n",
+       "      <th>last_name_female</th>\n",
+       "      <th>last_name_male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>Czech</td>\n",
+       "      <td>Mrs.</td>\n",
+       "      <td>Marie</td>\n",
+       "      <td>J</td>\n",
+       "      <td>Hamanová</td>\n",
+       "      <td>P.O. Box 255</td>\n",
+       "      <td>Kangerlussuaq</td>\n",
+       "      <td>QE</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Simple Solutions</td>\n",
+       "      <td>MarathonDancing.gl</td>\n",
+       "      <td>Marie Hamanová</td>\n",
+       "      <td>Marie Hamanová</td>\n",
+       "      <td>Marie</td>\n",
+       "      <td></td>\n",
+       "      <td>Mrs.</td>\n",
+       "      <td></td>\n",
+       "      <td>Hamanová</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>female</td>\n",
+       "      <td>French</td>\n",
+       "      <td>Ms.</td>\n",
+       "      <td>Patricia</td>\n",
+       "      <td>G</td>\n",
+       "      <td>Desrosiers</td>\n",
+       "      <td>Avenida Noruega 42</td>\n",
+       "      <td>Vila Real</td>\n",
+       "      <td>VR</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Formula Gray</td>\n",
+       "      <td>LostMillions.com.pt</td>\n",
+       "      <td>Patricia G. Desrosiers</td>\n",
+       "      <td>Patricia G. Desrosiers</td>\n",
+       "      <td>Patricia</td>\n",
+       "      <td></td>\n",
+       "      <td>Ms.</td>\n",
+       "      <td></td>\n",
+       "      <td>Desrosiers</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>American</td>\n",
+       "      <td>Ms.</td>\n",
+       "      <td>Debra</td>\n",
+       "      <td>O</td>\n",
+       "      <td>Neal</td>\n",
+       "      <td>1659 Hoog St</td>\n",
+       "      <td>Brakpan</td>\n",
+       "      <td>GA</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Dahlkemper's</td>\n",
+       "      <td>MediumTube.co.za</td>\n",
+       "      <td>Debra O. Neal</td>\n",
+       "      <td>Debra O. Neal</td>\n",
+       "      <td>Debra</td>\n",
+       "      <td></td>\n",
+       "      <td>Ms.</td>\n",
+       "      <td></td>\n",
+       "      <td>Neal</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>male</td>\n",
+       "      <td>French</td>\n",
+       "      <td>Mr.</td>\n",
+       "      <td>Peverell</td>\n",
+       "      <td>C</td>\n",
+       "      <td>Racine</td>\n",
+       "      <td>183 Epimenidou Street</td>\n",
+       "      <td>Limassol</td>\n",
+       "      <td>LI</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Quickbiz</td>\n",
+       "      <td>ImproveLook.com.cy</td>\n",
+       "      <td>Peverell C. Racine</td>\n",
+       "      <td>Peverell C. Racine</td>\n",
+       "      <td></td>\n",
+       "      <td>Peverell</td>\n",
+       "      <td></td>\n",
+       "      <td>Mr.</td>\n",
+       "      <td></td>\n",
+       "      <td>Racine</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>female</td>\n",
+       "      <td>Slovenian</td>\n",
+       "      <td>Mrs.</td>\n",
+       "      <td>Iolanda</td>\n",
+       "      <td>S</td>\n",
+       "      <td>Tratnik</td>\n",
+       "      <td>Karu põik 61</td>\n",
+       "      <td>Pärnu</td>\n",
+       "      <td>PR</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Dubrow's Cafeteria</td>\n",
+       "      <td>PostTan.com.ee</td>\n",
+       "      <td>Iolanda S. Tratnik</td>\n",
+       "      <td>Iolanda S. Tratnik</td>\n",
+       "      <td>Iolanda</td>\n",
+       "      <td></td>\n",
+       "      <td>Mrs.</td>\n",
+       "      <td></td>\n",
+       "      <td>Tratnik</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 37 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   number  gender nationality prefix first_name middle_initial   last_name  \\\n",
+       "0       1  female       Czech   Mrs.      Marie              J    Hamanová   \n",
+       "1       2  female      French    Ms.   Patricia              G  Desrosiers   \n",
+       "2       3  female    American    Ms.      Debra              O        Neal   \n",
+       "3       4    male      French    Mr.   Peverell              C      Racine   \n",
+       "4       5  female   Slovenian   Mrs.    Iolanda              S     Tratnik   \n",
+       "\n",
+       "             street_name           city state_abbr  ...             company  \\\n",
+       "0           P.O. Box 255  Kangerlussuaq         QE  ...    Simple Solutions   \n",
+       "1     Avenida Noruega 42      Vila Real         VR  ...        Formula Gray   \n",
+       "2           1659 Hoog St        Brakpan         GA  ...        Dahlkemper's   \n",
+       "3  183 Epimenidou Street       Limassol         LI  ...            Quickbiz   \n",
+       "4           Karu põik 61          Pärnu         PR  ...  Dubrow's Cafeteria   \n",
+       "\n",
+       "           domain_name                  person                    name  \\\n",
+       "0   MarathonDancing.gl          Marie Hamanová          Marie Hamanová   \n",
+       "1  LostMillions.com.pt  Patricia G. Desrosiers  Patricia G. Desrosiers   \n",
+       "2     MediumTube.co.za           Debra O. Neal           Debra O. Neal   \n",
+       "3   ImproveLook.com.cy      Peverell C. Racine      Peverell C. Racine   \n",
+       "4       PostTan.com.ee      Iolanda S. Tratnik      Iolanda S. Tratnik   \n",
+       "\n",
+       "  first_name_female first_name_male prefix_female prefix_male  \\\n",
+       "0             Marie                          Mrs.               \n",
+       "1          Patricia                           Ms.               \n",
+       "2             Debra                           Ms.               \n",
+       "3                          Peverell                       Mr.   \n",
+       "4           Iolanda                          Mrs.               \n",
+       "\n",
+       "   last_name_female last_name_male  \n",
+       "0          Hamanová                 \n",
+       "1        Desrosiers                 \n",
+       "2              Neal                 \n",
+       "3                           Racine  \n",
+       "4           Tratnik                 \n",
+       "\n",
+       "[5 rows x 37 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pd.DataFrame(sentence_faker._sentence_faker.records).head()"
    ]
@@ -159,16 +416,53 @@
    "metadata": {},
    "source": [
     "`PresidioSentenceFaker` adds additional providers by default, which are not included in the Faker package.\n",
-    "These can be found in `presidio_evaluator.data_generator.faker_extensions.providers`"
+    "These can be found in `presidio_evaluator.data_generator.faker_extensions.providers`\n",
+    "\n",
+    "It is possible to create providers for additional entity types by extending Faker's `BaseProvider` class, \n",
+    "and calling `add_provider` on the `PresidioSentenceFaker` instance.\n",
+    "For example:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "from faker.providers import BaseProvider\n",
+    "\n",
+    "class MarsIdProvider(BaseProvider):\n",
+    "    def mars_id(self):\n",
+    "        # Generate a random row number between 1 and 50\n",
+    "        row = random.randint(1, 50)\n",
+    "        # Generate a random letter for the seat location from A-K\n",
+    "        location = random.choice('ABCDEFGHIJK')\n",
+    "        # Return the seat in the format \"row-letter\" (e.g., \"25A\")\n",
+    "        return f\"{row}{location}\"\n",
+    "\n",
+    "sentence_faker.add_provider(MarsIdProvider)\n",
+    "# Now a new `mars_id` entity can be generated if a template has `mars_id` in it.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {
     "is_executing": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "presidio_evaluator.data_generator.faker_extensions.providers.ReligionProvider"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from presidio_evaluator.data_generator.faker_extensions.providers import *\n",
     "\n",
@@ -196,17 +490,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {
     "is_executing": true,
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('name', 'person'),\n",
+       " ('credit_card_number', 'credit_card'),\n",
+       " ('date_of_birth', 'birthday')]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Create entity aliases (e.g. if your provider supports \"name\" but templates contain \"person\").\n",
-    "PresidioSentenceFaker.PROVIDER_ALIASES"
+    "provider_aliases = PresidioSentenceFaker.PROVIDER_ALIASES\n",
+    "provider_aliases\n",
+    "\n",
+    "# To customize, call `PresidioSentenceFaker(locale=\"en_US\",...,provider_aliases=provider_aliases)`"
    ]
   },
   {
@@ -222,9 +532,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Sampling: 100%|██████████| 1500/1500 [00:00<00:00, 13821.21it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Full text: The title refers to Riddersporen 1 street in STAVANGER. It was on this street that many of the clubs where Metallica first played were situated. \"Battery is found in me\" shows that these early shows on Everardus Mountains Street were important to them. Battery is where \"lunacy finds you\" and you \"smash through the boundaries.\"\n",
+      "Spans: [Span(type: street_name, value: Everardus Mountains, char_span: [202: 221]), Span(type: city, value: STAVANGER, char_span: [45: 54]), Span(type: street_name, value: Riddersporen 1, char_span: [20: 34])]\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "fake_records = sentence_faker.generate_new_fake_sentences(num_samples=number_of_samples)\n",
     "pprint.pprint(fake_records[0])"
@@ -239,12 +573,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "is_executing": true,
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total: 1500\n",
+      "Avg # of records per template: 7.142857142857143\n",
+      "Median # of records per template: 7.0\n",
+      "Std: 2.6812526263406258\n"
+     ]
+    }
+   ],
    "source": [
     "count_per_template_id = Counter([sample.template_id for sample in fake_records])\n",
     "\n",
@@ -267,14 +612,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "is_executing": true,
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Counter({'PERSON': 874,\n",
+       "         'STREET_ADDRESS': 609,\n",
+       "         'GPE': 442,\n",
+       "         'ORGANIZATION': 253,\n",
+       "         'CREDIT_CARD': 131,\n",
+       "         'PHONE_NUMBER': 117,\n",
+       "         'DATE_TIME': 106,\n",
+       "         'TITLE': 91,\n",
+       "         'AGE': 79,\n",
+       "         'NRP': 66,\n",
+       "         'ZIP_CODE': 42,\n",
+       "         'EMAIL_ADDRESS': 33,\n",
+       "         'DOMAIN_NAME': 30,\n",
+       "         'IBAN_CODE': 26,\n",
+       "         'IP_ADDRESS': 18,\n",
+       "         'US_SSN': 18,\n",
+       "         'US_DRIVER_LICENSE': 9})"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "count_per_entity = Counter()\n",
     "for record in fake_records:\n",
@@ -285,33 +657,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import dataclasses\n",
-    "def get_json(result) -> str:\n",
-    "    spans_dict = json.dumps([dataclasses.asdict(span) for span in result.spans])\n",
-    "    return dict(fake=result.fake, spans=spans_dict, template=result.template, template_id=result.template_id)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "is_executing": true
-   },
-   "outputs": [],
-   "source": [
-    "len(fake_records)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Full text: The title refers to Riddersporen 1 street in STAVANGER. It was on this street that many of the clubs where Metallica first played were situated. \"Battery is found in me\" shows that these early shows on Everardus Mountains Street were important to them. Battery is where \"lunacy finds you\" and you \"smash through the boundaries.\"\n",
+      "Spans: [Span(type: street_name, value: Everardus Mountains, char_span: [202: 221]), Span(type: city, value: STAVANGER, char_span: [45: 54]), Span(type: street_name, value: Riddersporen 1, char_span: [20: 34])]\n",
+      "\n",
+      "Full text: The Ilta T Ryhänen version recorded for Weatherford International Inc became the first celebrity recording by a classical musician to sell one million copies. The song was awarded the seventh gold disc ever granted.\n",
+      "Spans: [Span(type: organization, value: Weatherford International Inc, char_span: [40: 69]), Span(type: person, value: Ilta T Ryhänen, char_span: [4: 18])]\n",
+      "\n",
+      "Full text: We'll meet Monday at JAPAN PULP AND PAPER COMPANY LIMITED, 5931 84 Cassinia Street, GUNDAGAI\n",
+      "Spans: [Span(type: city, value: GUNDAGAI, char_span: [84: 92]), Span(type: street_name, value: 84 Cassinia Street, char_span: [64: 82]), Span(type: building_number, value: 5931, char_span: [59: 63]), Span(type: organization, value: JAPAN PULP AND PAPER COMPANY LIMITED, char_span: [21: 57]), Span(type: day_of_week, value: Monday, char_span: [11: 17])]\n",
+      "\n",
+      "Full text: Can someone call me on 0377 7151585? I have some questions about opening an account.\n",
+      "Spans: [Span(type: phone_number, value: 0377 7151585, char_span: [23: 35])]\n",
+      "\n",
+      "Full text: Leena R Filppula\\nTelephone and Data Systems Inc.\\nServidão Fernando Albrecht 673 Szemere Radial\n",
+      " Suite 538\n",
+      " Joinville\n",
+      " Brazil 27518\\n032 627 37 30 office\\n(07700)331659 fax\\n+41 47 717 21 68 mobile\\n\n",
+      "Spans: [Span(type: phone_number, value: +41 47 717 21 68, char_span: [175: 191]), Span(type: phone_number, value: (07700)331659, char_span: [156: 169]), Span(type: phone_number, value: 032 627 37 30, char_span: [134: 147]), Span(type: address, value: Servidão Fernando Albrecht 673 Szemere Radial\n",
+      " Suite 538\n",
+      " Joinville\n",
+      " Brazil 27518, char_span: [51: 132]), Span(type: organization, value: Telephone and Data Systems Inc., char_span: [18: 49]), Span(type: name, value: Leena R Filppula, char_span: [0: 16])]\n",
+      "\n",
+      "Full text: Bot: Where would you like this to be sent to? User: 11129 Rua Forno 76\n",
+      " Suite 599\n",
+      " Quinta do Passadouro de Cima\n",
+      " Portugal 66984\n",
+      "Spans: [Span(type: address, value: 11129 Rua Forno 76\n",
+      " Suite 599\n",
+      " Quinta do Passadouro de Cima\n",
+      " Portugal 66984, char_span: [52: 127])]\n",
+      "\n",
+      "Full text: One of the most depressing songs on the list. He's injured from the waist down from Spain, but Alexander just has to get laid. Don't go to town, Christopher!\n",
+      "Spans: [Span(type: first_name, value: Christopher, char_span: [145: 156]), Span(type: first_name, value: Alexander, char_span: [95: 104]), Span(type: country, value: Spain, char_span: [84: 89])]\n",
+      "\n",
+      "Full text: Our offices are located at Romina and Müürivahe 27\n",
+      "Spans: [Span(type: address, value: Romina and Müürivahe 27, char_span: [27: 50])]\n",
+      "\n",
+      "Full text: Meet me at Unit 8161 Box 6817\n",
+      "DPO AE 26241\n",
+      "Spans: [Span(type: address, value: Unit 8161 Box 6817\n",
+      "DPO AE 26241, char_span: [11: 42])]\n",
+      "\n",
+      "Full text: How do I open my credit card statement?\n",
+      "Spans: []\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "for record in fake_records[:10]:\n",
     "    print(record)"
@@ -330,7 +729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {
     "is_executing": true,
     "pycharm": {
@@ -344,9 +743,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'../data/generated_size_1500_date_January_06_2025.json'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "output_file"
    ]
@@ -364,30 +774,204 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {
     "is_executing": true,
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1500 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading model en_core_web_sm\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1500/1500 [00:03<00:00, 386.94it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>text</th>\n",
+       "      <th>pos</th>\n",
+       "      <th>tag</th>\n",
+       "      <th>template_id</th>\n",
+       "      <th>label</th>\n",
+       "      <th>sentence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>The</td>\n",
+       "      <td>DET</td>\n",
+       "      <td>DT</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>title</td>\n",
+       "      <td>NOUN</td>\n",
+       "      <td>NN</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>refers</td>\n",
+       "      <td>VERB</td>\n",
+       "      <td>VBZ</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>to</td>\n",
+       "      <td>ADP</td>\n",
+       "      <td>IN</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Riddersporen</td>\n",
+       "      <td>PROPN</td>\n",
+       "      <td>NNP</td>\n",
+       "      <td>110</td>\n",
+       "      <td>B-street_name</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>NUM</td>\n",
+       "      <td>CD</td>\n",
+       "      <td>110</td>\n",
+       "      <td>I-street_name</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>street</td>\n",
+       "      <td>NOUN</td>\n",
+       "      <td>NN</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>in</td>\n",
+       "      <td>ADP</td>\n",
+       "      <td>IN</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>STAVANGER</td>\n",
+       "      <td>PROPN</td>\n",
+       "      <td>NNP</td>\n",
+       "      <td>110</td>\n",
+       "      <td>B-city</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>.</td>\n",
+       "      <td>PUNCT</td>\n",
+       "      <td>.</td>\n",
+       "      <td>110</td>\n",
+       "      <td>O</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           text    pos  tag  template_id          label  sentence\n",
+       "0           The    DET   DT          110              O         0\n",
+       "1         title   NOUN   NN          110              O         0\n",
+       "2        refers   VERB  VBZ          110              O         0\n",
+       "3            to    ADP   IN          110              O         0\n",
+       "4  Riddersporen  PROPN  NNP          110  B-street_name         0\n",
+       "5             1    NUM   CD          110  I-street_name         0\n",
+       "6        street   NOUN   NN          110              O         0\n",
+       "7            in    ADP   IN          110              O         0\n",
+       "8     STAVANGER  PROPN  NNP          110         B-city         0\n",
+       "9             .  PUNCT    .          110              O         0"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "conll = InputSample.create_conll_dataset(fake_records)"
+    "conll = InputSample.create_conll_dataset(dataset=fake_records)\n",
+    "conll.head(10)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {
     "is_executing": true,
     "pycharm": {
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CoNLL2003 dataset structure output location: ../data/generated_size_1500_date_January_06_2025.tsv\n"
+     ]
+    }
+   ],
    "source": [
-    "conll.to_csv(output_conll, sep=\"\\t\")"
+    "conll.to_csv(output_conll, sep=\"\\t\")\n",
+    "print(f\"CoNLL2003 dataset structure output location: {output_conll}\")"
    ]
   },
   {
@@ -396,7 +980,7 @@
    "source": [
     "### Next steps\n",
     "\n",
-    "- Evaluate Presidio using this fake data: [Sample](4_Evaluate_Presidio_Analyzer.ipynb)\n",
+    "- Evaluate Presidio using fake data: [Sample](4_Evaluate_Presidio_Analyzer.ipynb)\n",
     "- Split to train/test/validation while ensuring sentences originiating from the same template are all on the same subset: [Sample](3_Split_by_pattern_#.ipynb)\n",
     "- Conduct a small exploratory data analysis on the generated data: [Sample](2_PII_EDA.ipynb)"
    ]
@@ -417,9 +1001,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "presidio",
+   "display_name": "presidio-research",
    "language": "python",
-   "name": "python3"
+   "name": "presidio_research"
   },
   "language_info": {
    "codemirror_mode": {
@@ -431,7 +1015,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.15"
+   "version": "3.9.6"
   }
  },
  "nbformat": 4,
diff --git a/presidio_evaluator/data_generator/faker_extensions/sentences.py b/presidio_evaluator/data_generator/faker_extensions/sentences.py
index fee59bc..087052c 100644
--- a/presidio_evaluator/data_generator/faker_extensions/sentences.py
+++ b/presidio_evaluator/data_generator/faker_extensions/sentences.py
@@ -195,9 +195,9 @@ def parse(
             return fake_pattern
         except Exception as err:
             raise AttributeError(
-                f'Failed to generate fake data based on template "{template}".'
-                f"You might need to add a new Faker provider! "
-                f"{err}"
+                f'Failed to generate fake data based on template "{template}". '
+                f"Add a new Faker provider or create an alias "
+                f"for the entity name. {err}"
             )
 
     @staticmethod
diff --git a/presidio_evaluator/data_generator/presidio_sentence_faker.py b/presidio_evaluator/data_generator/presidio_sentence_faker.py
index 7a85cae..abc3205 100644
--- a/presidio_evaluator/data_generator/presidio_sentence_faker.py
+++ b/presidio_evaluator/data_generator/presidio_sentence_faker.py
@@ -1,7 +1,8 @@
 import json
 import random
 from pathlib import Path
-from typing import List, Optional, Union, Dict
+from typing import List, Optional, Tuple, Union, Dict
+import re
 
 import numpy as np
 import pandas as pd
@@ -52,12 +53,17 @@ class PresidioSentenceFaker:
     :param: entity_providers: Defaults to presidio_additional_entity_providers, a provided argument overrides this
     :param: base_records: A DataFrame with entity types as columns and each row corresponding to a fake individual.
     Defaults to presidio_evaluator.data_generator.faker_extensions.datasets.load_fake_person_df()
+    :param: entity_type_mapping: A dictionary mapping entity types to Presidio entity types
+    :param: provider_aliases: A dictionary mapping provider names to the given entity types.
+    Useful if the templates contain a different name for the entity type than the one supported by Faker or PresidioSentenceFaker.
     :param: random_seed: A seed to make results reproducible between runs
     """
 
-    PROVIDER_ALIASES = dict(
-        name="person", credit_card_number="credit_card", date_of_birth="birthday"
-    )
+    PROVIDER_ALIASES = [
+        ("name", "person"),
+        ("credit_card_number", "credit_card"),
+        ("date_of_birth", "birthday"),
+    ]
     ENTITY_TYPE_MAPPING = dict(
         person="PERSON",
         ip_address="IP_ADDRESS",
@@ -111,6 +117,8 @@ def __init__(
         sentence_templates: Optional[List[str]] = None,
         entity_providers: Optional[List[BaseProvider]] = None,
         base_records: Optional[Union[pd.DataFrame, List[Dict]]] = None,
+        entity_type_mapping: Optional[Dict[str, str]] = None,
+        provider_aliases: Optional[List[Tuple[str, str]]] = None,
         random_seed: Optional[SeedType] = None,
     ):
         self._sentence_templates = sentence_templates
@@ -120,6 +128,7 @@ def __init__(
                 for line in presidio_templates_file_path.read_text().splitlines()
             ]
         if entity_providers is None:
+            print("Using default entity providers")
             entity_providers = presidio_additional_entity_providers
         if base_records is None:
             base_records = load_fake_person_df()
@@ -131,33 +140,101 @@ def __init__(
             self._sentence_faker.add_provider(entity_provider)
 
         self.seed(random_seed)
-        for provider, alias in self.PROVIDER_ALIASES.items():
+
+        if not entity_type_mapping:
+            print(
+                "Using default entity mapping between the entities "
+                "in the templates and the ones in the output dataset"
+            )
+            entity_type_mapping = self.ENTITY_TYPE_MAPPING
+
+        self._entity_type_mapping = entity_type_mapping
+
+        if not provider_aliases:
+            print("Using default provider aliases")
+            provider_aliases = self.PROVIDER_ALIASES
+
+        for provider, alias in provider_aliases:
             self._sentence_faker.add_provider_alias(
                 provider_name=provider, new_name=alias
             )
         self.fake_sentence_results = None
 
     def generate_new_fake_sentences(self, num_samples: int) -> List[InputSample]:
+        """Generate fake sentences based on the templates, input data and entity providers."""
         self.fake_sentence_results = []
         # Map faker generated entity types to Presidio entity types
         for _ in tqdm(range(num_samples), desc="Sampling"):
             template_id = random.choice(range(len(self._sentence_templates)))
             template = self._sentence_templates[template_id]
+            template = self._preprocess_template(template)
             fake_sentence_result = self._sentence_faker.parse(template, template_id)
             for span in fake_sentence_result.spans:
-                span.type = self.ENTITY_TYPE_MAPPING[span.type]
-            for key, value in self.ENTITY_TYPE_MAPPING.items():
+                if span.type in self._entity_type_mapping.keys():
+                    # Use the mapped entity type if exists
+                    span.type = self._entity_type_mapping[span.type]
+                else:
+                    # Otherwise, capitalize the entity type and add to the mapping
+                    print(
+                        f"Warning: Non-mapped entity type found: {span.type}. "
+                        f"Non-mapped entities will be mapped to {span.type.upper()} "
+                        f"in the output dataset. If you prefer a different mapping, "
+                        f"pass the `entity_type_mapping` argument with a mapping for this entity type."
+                    )
+                    self._entity_type_mapping[span.type] = span.type.upper()
+            for key, value in self._entity_type_mapping.items():
                 fake_sentence_result.masked = fake_sentence_result.masked.replace(
                     "{{%s}}" % key, "{{%s}}" % value
                 )
             self.fake_sentence_results.append(fake_sentence_result)
         return self.fake_sentence_results
 
-    def seed(self, seed_value=42):
+    @staticmethod
+    def seed(seed_value=42) -> None:
+        """Seed the faker and random modules for reproducibility."""
         Faker.seed(seed_value)
         random.seed(seed_value)
         np.random.seed(seed_value)
 
+    def add_provider(self, provider: BaseProvider) -> None:
+        """
+        Add a provider to the sentence faker
+        :param provider: A faker provider inheriting from BaseProvider
+        """
+        self._sentence_faker.add_provider(provider)
+
+    def add_provider_alias(self, provider_name: str, new_name: str) -> None:
+        """
+        Adds a copy of a provider, with a different name
+        :param provider_name: Name of original provider
+        :param new_name: New name
+        :example:
+        >>>self.add_provider_alias(provider_name="name", new_name="person")
+        >>>self.person()
+        """
+        self._sentence_faker.add_provider_alias(
+            provider_name=provider_name, new_name=new_name
+        )
+
+    def add_entity_type_mapping(
+        self, input_entity_type: str, output_entity_type: str
+    ) -> None:
+        self._entity_type_mapping[input_entity_type] = output_entity_type
+
+    @staticmethod
+    def _preprocess_template(template: str):
+        """Lowercase the entity names within double curly braces in the template, and replace < and > with {{ and }}."""  # noqa: E501
+
+        def lowercase_within_braces(s):
+            return re.sub(
+                r"{{(.*?)}}", lambda match: f"{{{{{match.group(1).lower()}}}}}", s
+            )
+
+        template = template.replace("<", "{{").replace(">", "}}")
+        template = lowercase_within_braces(template)
+
+        return template
+
 
 if __name__ == "__main__":
     sentence_faker = PresidioSentenceFaker(
diff --git a/presidio_evaluator/data_objects.py b/presidio_evaluator/data_objects.py
index 9e52601..0dd7d2f 100644
--- a/presidio_evaluator/data_objects.py
+++ b/presidio_evaluator/data_objects.py
@@ -179,7 +179,13 @@ def from_json(cls, data, **kwargs):
             data["spans"] = [Span.from_json(span) for span in data["spans"]]
         return cls(**data, create_tags_from_span=True, **kwargs)
 
-    def get_tags(self, scheme="IOB", model_version="en_core_web_sm"):
+    def get_tags(self, scheme: str = "IOB", model_version: str = "en_core_web_sm"):
+        """Extract the tokens and tags from the spans.
+
+        :param scheme: IO, BIO or BILUO
+        :param model_version: The name of the spaCy model to use for tokenization
+        """
+
         start_indices = [span.start_position for span in self.spans]
         end_indices = [span.end_position for span in self.spans]
         tags = [span.entity_type for span in self.spans]
@@ -192,19 +198,27 @@ def get_tags(self, scheme="IOB", model_version="en_core_web_sm"):
             starts=start_indices,
             ends=end_indices,
             tokens=tokens,
+            token_model_version=model_version,
         )
 
         return tokens, labels
 
-    def to_conll(self, translate_tags: bool) -> List[Dict[str, Any]]:
+    def to_conll(
+        self, translate_tags: bool, tokenizer: str = "en_core_web_sm"
+    ) -> List[Dict[str, Any]]:
         """
         Turns a list of InputSample objects to a dictionary
         containing text, pos, tag, template_id and label.
         :param translate_tags: Whether to translate tags using the PRESIDIO_SPACY_ENTITIES dictionary
+        :param tokenizer: The name of the spaCy model to use for tokenization
         :return: Dict
         """
 
         conll = []
+
+        if len(self.tokens) == 0:
+            self.tokens, self.tags = self.get_tags(model_version=tokenizer)
+
         for i, token in enumerate(self.tokens):
             if translate_tags:
                 label = self.translate_tag(
@@ -233,7 +247,7 @@ def create_conll_dataset(
         dataset: List["InputSample"],
         translate_tags=False,
         to_bio=True,
-        token_model_version="en_core_web_sm",
+        tokenizer: str = "en_core_web_sm",
     ) -> pd.DataFrame:
         if len(dataset) <= 1:
             raise ValueError("Dataset should contain multiple records")
@@ -243,7 +257,7 @@ def create_conll_dataset(
         for sample in tqdm(dataset):
             if to_bio:
                 sample.biluo_to_bio()
-            conll = sample.to_conll(translate_tags=translate_tags)
+            conll = sample.to_conll(translate_tags=translate_tags, tokenizer=tokenizer)
             for token in conll:
                 token["sentence"] = i
                 conlls.append(token)
diff --git a/pyproject.toml b/pyproject.toml
index 99cd13a..eca721e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "presidio_evaluator"
-version = "0.2.0"
+version = "0.2.1"
 description = ""
 authors = ["Microsoft"]
 readme = "README.md"
diff --git a/tests/test_presidio_sentence_faker.py b/tests/test_presidio_sentence_faker.py
index 206d5fd..34d2d9e 100644
--- a/tests/test_presidio_sentence_faker.py
+++ b/tests/test_presidio_sentence_faker.py
@@ -24,13 +24,13 @@ def test_generate_new_fake_sentences(num_sentences: int):
 
     expected_providers = deepcopy(default_faker_providers)
     expected_providers.extend(presidio_providers)
-    expected_providers.extend([standard_faker.__getattr__(key)
-                               for key in PresidioSentenceFaker.PROVIDER_ALIASES.keys()])
+    expected_providers.extend([standard_faker.__getattr__(alias[0])
+                               for alias in PresidioSentenceFaker.PROVIDER_ALIASES])
     actual_providers = sentence_faker._sentence_faker.providers
     num_aliases = len(PresidioSentenceFaker.PROVIDER_ALIASES)
     actual_num_providers = len(actual_providers)
-    expected_aliases = set(getattr(standard_faker, provider_name)
-                           for provider_name in PresidioSentenceFaker.PROVIDER_ALIASES.keys())
+    expected_aliases = set(getattr(standard_faker, provider_name[0])
+                           for provider_name in PresidioSentenceFaker.PROVIDER_ALIASES)
     assert actual_num_providers == len(expected_providers), \
         f'Expected {len(presidio_providers)} presidio providers to be used and {num_aliases} aliases. ' \
         f'Faker has been extended with {actual_num_providers - len(default_faker_providers)} providers/aliases. ' \
@@ -43,3 +43,14 @@ def test_generate_new_fake_sentences(num_sentences: int):
         assert fake_sentence_result.full_text
         assert fake_sentence_result.masked
         assert fake_sentence_result.template_id >= 0
+
+
+@pytest.mark.parametrize("template_before, template_after", [
+    ("I just moved to {{CiTY}} from {{Country}}",
+    "I just moved to {{city}} from {{country}}"),
+    ("I just moved to <city> from <country>.",
+    "I just moved to {{city}} from {{country}}.")
+])
+def test_preprocess_template(template_before: str, template_after: str):
+    sentence_faker = PresidioSentenceFaker(locale='en', lower_case_ratio=0)
+    assert sentence_faker._preprocess_template(template_before) == template_after

	number	gender	nationality	prefix	first_name	middle_initial	last_name	street_name	city	state_abbr	...	company	domain_name	person	name	first_name_female	first_name_male	prefix_female	prefix_male	last_name_female	last_name_male
0	1	female	Czech	Mrs.	Marie	J	Hamanová	P.O. Box 255	Kangerlussuaq	QE	...	Simple Solutions	MarathonDancing.gl	Marie Hamanová	Marie Hamanová	Marie		Mrs.		Hamanová
1	2	female	French	Ms.	Patricia	G	Desrosiers	Avenida Noruega 42	Vila Real	VR	...	Formula Gray	LostMillions.com.pt	Patricia G. Desrosiers	Patricia G. Desrosiers	Patricia		Ms.		Desrosiers
2	3	female	American	Ms.	Debra	O	Neal	1659 Hoog St	Brakpan	GA	...	Dahlkemper's	MediumTube.co.za	Debra O. Neal	Debra O. Neal	Debra		Ms.		Neal
3	4	male	French	Mr.	Peverell	C	Racine	183 Epimenidou Street	Limassol	LI	...	Quickbiz	ImproveLook.com.cy	Peverell C. Racine	Peverell C. Racine		Peverell		Mr.		Racine
4	5	female	Slovenian	Mrs.	Iolanda	S	Tratnik	Karu põik 61	Pärnu	PR	...	Dubrow's Cafeteria	PostTan.com.ee	Iolanda S. Tratnik	Iolanda S. Tratnik	Iolanda		Mrs.		Tratnik
	text	pos	tag	template_id	label
0	The	DET	DT	110	O
1	title	NOUN	NN	110	O
2	refers	VERB	VBZ	110	O
3	to	ADP	IN	110	O
4	Riddersporen	PROPN	NNP	110	B-street_name
5	1	NUM	CD	110	I-street_name
6	street	NOUN	NN	110	O
7	in	ADP	IN	110	O
8	STAVANGER	PROPN	NNP	110	B-city
9	.	PUNCT	.	110	O