Skip to content

Commit

Permalink
Prep for 0.1b3 release (#184)
Browse files Browse the repository at this point in the history
* Get tests working on Pandas 1.0.x

* Re-enable Feather example in intro notebook

* Rerun notebooks prior to release

* Rerun tutorial notebooks prior to release

* Update version number
  • Loading branch information
frreiss authored Apr 1, 2021
1 parent dd521f7 commit 844a525
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 163 deletions.
2 changes: 1 addition & 1 deletion notebooks/Analyze_Text.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@
{
"data": {
"text/plain": [
"<ibm_watson.natural_language_understanding_v1.NaturalLanguageUnderstandingV1 at 0x7fb258940150>"
"<ibm_watson.natural_language_understanding_v1.NaturalLanguageUnderstandingV1 at 0x7ff68869a510>"
]
},
"execution_count": 3,
Expand Down
116 changes: 58 additions & 58 deletions notebooks/Integrate_NLP_Libraries.ipynb

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions notebooks/Model_Training_with_BERT.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1643,7 +1643,7 @@
{
"data": {
"text/plain": [
"<text_extensions_for_pandas.array.tensor.TensorDtype at 0x7ff8d05e0290>"
"<text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fb13131ee10>"
]
},
"execution_count": 13,
Expand Down Expand Up @@ -1996,7 +1996,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "061593082c6e43f8bbbdab066a447502",
"model_id": "9e44a9af650543e59d81dfd8d5baa4ed",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -2017,7 +2017,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d2e59e90113648dfb0a929c90ff7d1fb",
"model_id": "acc0673fceae466f94cc16ccfbd67fdd",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -2038,7 +2038,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "716b10c7c7a840048c1a780ff1723d84",
"model_id": "a0f64dff11304556ad22cd8df77954b7",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -3183,8 +3183,8 @@
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.1min remaining: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.1min finished\n"
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 11.9min remaining: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 11.9min finished\n"
]
},
{
Expand Down Expand Up @@ -5041,7 +5041,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0a775ec7ee9f42ccb4367432d97f6958",
"model_id": "fde70dc5306b41f09a4844106b127aa1",
"version_major": 2,
"version_minor": 0
},
Expand Down
94 changes: 80 additions & 14 deletions notebooks/Text_Extensions_for_Pandas_Overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1522,7 +1522,7 @@
" [4, 5],\n",
" [6, 7],\n",
" [8, 9]]),\n",
" <text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fb2e82c5d10>)"
" <text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fe6a86432d0>)"
]
},
"execution_count": 22,
Expand Down Expand Up @@ -1903,7 +1903,7 @@
" <tr>\n",
" <th>0</th>\n",
" <td>[0, 2): 'In'</td>\n",
" <td>[0, 1, 0, 0]</td>\n",
" <td>[0, 0, 1, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand All @@ -1918,24 +1918,24 @@
" <tr>\n",
" <th>3</th>\n",
" <td>[11, 15): 'King'</td>\n",
" <td>[0, 0, 0, 1]</td>\n",
" <td>[0, 1, 0, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[16, 22): 'Arthur'</td>\n",
" <td>[0, 1, 0, 0]</td>\n",
" <td>[0, 0, 1, 0]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" span features\n",
"0 [0, 2): 'In' [0, 1, 0, 0]\n",
"0 [0, 2): 'In' [0, 0, 1, 0]\n",
"1 [3, 5): 'AD' [0, 1, 0, 0]\n",
"2 [6, 9): '932' [0, 0, 0, 1]\n",
"3 [11, 15): 'King' [0, 0, 0, 1]\n",
"4 [16, 22): 'Arthur' [0, 1, 0, 0]"
"3 [11, 15): 'King' [0, 1, 0, 0]\n",
"4 [16, 22): 'Arthur' [0, 0, 1, 0]"
]
},
"execution_count": 32,
Expand All @@ -1958,22 +1958,88 @@
"# Save DataFrame to a feather file.\n",
"# Feather is a lightweight, fast binary columnar format, with basic\n",
"# compression and support built into Pandas.\n",
"\n",
"# TODO: Temporarily disabled while we revamp Feather support to handle multi-doc span arrays\n",
"#df.to_feather(\"outputs/tp_overview.feather\")"
"df.to_feather(\"outputs/tp_overview.feather\")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>span</th>\n",
" <th>features</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[0, 2): 'In'</td>\n",
" <td>[0, 0, 1, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[3, 5): 'AD'</td>\n",
" <td>[0, 1, 0, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[6, 9): '932'</td>\n",
" <td>[0, 0, 0, 1]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[11, 15): 'King'</td>\n",
" <td>[0, 1, 0, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[16, 22): 'Arthur'</td>\n",
" <td>[0, 0, 1, 0]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" span features\n",
"0 [0, 2): 'In' [0, 0, 1, 0]\n",
"1 [3, 5): 'AD' [0, 1, 0, 0]\n",
"2 [6, 9): '932' [0, 0, 0, 1]\n",
"3 [11, 15): 'King' [0, 1, 0, 0]\n",
"4 [16, 22): 'Arthur' [0, 0, 1, 0]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Read the file back into a new DataFrame.\n",
"\n",
"# TODO: Temporarily disabled while we revamp Feather support to handle multi-doc span arrays\n",
"#df_load = pd.read_feather(\"outputs/tp_overview.feather\")\n",
"#df_load.head()"
"df_load = pd.read_feather(\"outputs/tp_overview.feather\")\n",
"df_load.head()"
]
},
{
Expand Down
12 changes: 1 addition & 11 deletions notebooks/Understand_Tables.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setuptools.setup(
name="text_extensions_for_pandas",
version="0.1b2",
version="0.1b3",
author="IBM",
author_email="[email protected]",
description="Natural language processing support for Pandas dataframes.",
Expand Down
11 changes: 4 additions & 7 deletions text_extensions_for_pandas/array/test_token_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,8 @@ def data_for_grouping(dtype):
return pd.array([b, b, na, na, a, a, b, c], dtype=dtype)


# Can't import due to dependencies, taken from pandas.conftest import all_compare_operators
# Can't import due to dependencies, taken
# from pandas.conftest import all_compare_operators
@pytest.fixture(params=["__eq__", "__ne__", "__lt__", "__gt__", "__le__", "__ge__"])
def all_compare_operators(request):
return request.param
Expand Down Expand Up @@ -552,14 +553,10 @@ class TestPandasConstructors(base.BaseConstructorsTests):
def test_series_constructor_no_data_with_index(self, dtype, na_value):
pass

@pytest.mark.skipif(pd.__version__.startswith("1.0"),
reason="Test added in Pandas 1.1.0")
def test_construct_empty_dataframe(self, dtype):
super().test_construct_empty_dataframe(dtype)
# try:
# with pytest.raises(TypeError, match="Expected SpanArray as tokens"):
# super().test_construct_empty_dataframe(dtype)
# except AttributeError:
# # Test added in Pandas 1.1.0, ignore for earlier versions
# pass


class TestPandasGetitem(base.BaseGetitemTests):
Expand Down
8 changes: 4 additions & 4 deletions tutorials/corpus/CoNLL_2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3730,7 +3730,7 @@
"\n",
"<div id=\"spanArray\">\n",
" <div id=\"spans\" \n",
" style=\"background-color:#F0F0F0; border: 1px solid #E0E0E0; float:left; padding:10px;\">\n",
" style=\"color: var(--jp-layout-color2); border: 1px solid var(--jp-border-color0); float:left; padding:10px;\">\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
Expand Down Expand Up @@ -3899,11 +3899,11 @@
"</table>\n",
" </div>\n",
" <div id=\"text\"\n",
" style=\"float:right; background-color:#F5F5F5; border: 1px solid #E0E0E0; width: 60%;\">\n",
" style=\"float:right; border: 1px solid var(--jp-border-color0); width: 60%;\">\n",
"\n",
" <div style=\"float:center; padding:10px\">\n",
" <p style=\"font-family:monospace\">\n",
" -DOCSTART-<br><span style=\"background-color:yellow\">Belgian</span> police smash major drugs rings, 30 arrested.<br><span style=\"background-color:yellow\">BRUSSELS</span> 1996-12-06<br>Police smashed two drugs smuggling rings and arrested 30 people after a taxidriver in <span style=\"background-color:yellow\">Spain</span> alerted them to a suitcase of heroin left in his cab, <span style=\"background-color:yellow\">Belgian</span> police said on Friday.<br>Police seized dozens of kilos of heroin with a street value of hundreds of millions of <span style=\"background-color:yellow\">Belgian</span> francs, a public prosecutor&#39;s office spokesman in the port city of <span style=\"background-color:yellow\">Antwerp</span> said.<br>He said a 24-year-old <span style=\"background-color:yellow\">Belgian</span> woman left a suitcase containing 13 kg (29 lb) of heroin in a taxi in <span style=\"background-color:yellow\">Barcelona</span>.<br>The taxidriver alerted police who arrested a 33-year-old <span style=\"background-color:yellow\">Turkish</span> man when he came to pick up the suitcase at a lost luggage office.<br>The woman was later arrested in <span style=\"background-color:yellow\">Belgium</span>.<br>She and the <span style=\"background-color:yellow\">Turkish</span> man smuggled heroin from <span style=\"background-color:yellow\">Turkey</span> to <span style=\"background-color:yellow\">Antwerp</span> from where it was taken to <span style=\"background-color:yellow\">Spain</span>, <span style=\"background-color:yellow\">France</span> and <span style=\"background-color:yellow\">Germany</span> by others, the spokesman said.<br>He said 14 people were arrested in <span style=\"background-color:yellow\">Belgium</span> and 16 others in other <span style=\"background-color:yellow\">European</span> nations after an investigation lasting nearly a year.<br>(<span>&#36;</span>1=32.14 <span style=\"background-color:yellow\">Belgian</span> Franc)\n",
" <p style=\"font-family:var(--jp-code-font-family); font-size:var(--jp-code-font-size)\">\n",
" -DOCSTART-<br><span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> police smash major drugs rings, 30 arrested.<br><span style=\"background-color:rgba(255, 215, 0, 0.5)\">BRUSSELS</span> 1996-12-06<br>Police smashed two drugs smuggling rings and arrested 30 people after a taxidriver in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Spain</span> alerted them to a suitcase of heroin left in his cab, <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> police said on Friday.<br>Police seized dozens of kilos of heroin with a street value of hundreds of millions of <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> francs, a public prosecutor&#39;s office spokesman in the port city of <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Antwerp</span> said.<br>He said a 24-year-old <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> woman left a suitcase containing 13 kg (29 lb) of heroin in a taxi in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Barcelona</span>.<br>The taxidriver alerted police who arrested a 33-year-old <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkish</span> man when he came to pick up the suitcase at a lost luggage office.<br>The woman was later arrested in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgium</span>.<br>She and the <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkish</span> man smuggled heroin from <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkey</span> to <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Antwerp</span> from where it was taken to <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Spain</span>, <span style=\"background-color:rgba(255, 215, 0, 0.5)\">France</span> and <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Germany</span> by others, the spokesman said.<br>He said 14 people were arrested in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgium</span> and 16 others in other <span style=\"background-color:rgba(255, 215, 0, 0.5)\">European</span> nations after an investigation lasting nearly a year.<br>(<span>&#36;</span>1=32.14 <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> Franc)\n",
" </p>\n",
" </div>\n",
"\n",
Expand Down
28 changes: 14 additions & 14 deletions tutorials/corpus/CoNLL_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "815b01606369445c892dedefbfd4916b",
"model_id": "0a612388df9249dab67efb5a4d358d5c",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -1826,7 +1826,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "daa8e13738e2453c93e334d08b6d251b",
"model_id": "cae2fc8df4a44049be700f26f4f20e88",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -1847,7 +1847,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f5e610805fac4f44b706c223dc091820",
"model_id": "83c8f7f605eb4a55ab194aad964e947f",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -3027,8 +3027,8 @@
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 41.7min remaining: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 41.7min finished\n"
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 46.1min remaining: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 46.1min finished\n"
]
},
{
Expand Down Expand Up @@ -6006,7 +6006,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "49d7275934fe48f3a27017d92225844a",
"model_id": "af7f756dbfc2467c9cf525caa83b83eb",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -6499,12 +6499,12 @@
{
"data": {
"text/plain": [
"{'num_true_positives': 4169,\n",
"{'num_true_positives': 4329,\n",
" 'num_entities': 5648,\n",
" 'num_extracted': 4929,\n",
" 'precision': 0.8458105092310814,\n",
" 'recall': 0.7381373937677054,\n",
" 'F1': 0.7883142668053323}"
" 'num_extracted': 5163,\n",
" 'precision': 0.8384660081348053,\n",
" 'recall': 0.7664660056657224,\n",
" 'F1': 0.8008509851077606}"
]
},
"execution_count": 38,
Expand Down Expand Up @@ -6965,7 +6965,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "571d376cdc19420d93df405970d42435",
"model_id": "63868a5aeb4e4847ba9b7df10e6d28b5",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -8598,7 +8598,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ab550997976f4d9b9ea777894d28fd12",
"model_id": "13b1edab9e1241ccb22166e9c0c8ca40",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -10072,7 +10072,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bd25f50dd1aa4bed9c9fc148b87603b5",
"model_id": "7110dc85e13145a2a9ab455aaa167948",
"version_major": 2,
"version_minor": 0
},
Expand Down
Loading

0 comments on commit 844a525

Please sign in to comment.