Skip to content

Commit 844a525

Browse files
authored
Prep for 0.1b3 release (#184)
* Get tests working on Pandas 1.0.x * Re-enable Feather example in intro notebook * Rerun notebooks prior to release * Rerun tutorial notebooks prior to release * Update version number
1 parent dd521f7 commit 844a525

File tree

11 files changed

+216
-163
lines changed

11 files changed

+216
-163
lines changed

notebooks/Analyze_Text.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@
143143
{
144144
"data": {
145145
"text/plain": [
146-
"<ibm_watson.natural_language_understanding_v1.NaturalLanguageUnderstandingV1 at 0x7fb258940150>"
146+
"<ibm_watson.natural_language_understanding_v1.NaturalLanguageUnderstandingV1 at 0x7ff68869a510>"
147147
]
148148
},
149149
"execution_count": 3,

notebooks/Integrate_NLP_Libraries.ipynb

Lines changed: 58 additions & 58 deletions
Large diffs are not rendered by default.

notebooks/Model_Training_with_BERT.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,7 +1643,7 @@
16431643
{
16441644
"data": {
16451645
"text/plain": [
1646-
"<text_extensions_for_pandas.array.tensor.TensorDtype at 0x7ff8d05e0290>"
1646+
"<text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fb13131ee10>"
16471647
]
16481648
},
16491649
"execution_count": 13,
@@ -1996,7 +1996,7 @@
19961996
{
19971997
"data": {
19981998
"application/vnd.jupyter.widget-view+json": {
1999-
"model_id": "061593082c6e43f8bbbdab066a447502",
1999+
"model_id": "9e44a9af650543e59d81dfd8d5baa4ed",
20002000
"version_major": 2,
20012001
"version_minor": 0
20022002
},
@@ -2017,7 +2017,7 @@
20172017
{
20182018
"data": {
20192019
"application/vnd.jupyter.widget-view+json": {
2020-
"model_id": "d2e59e90113648dfb0a929c90ff7d1fb",
2020+
"model_id": "acc0673fceae466f94cc16ccfbd67fdd",
20212021
"version_major": 2,
20222022
"version_minor": 0
20232023
},
@@ -2038,7 +2038,7 @@
20382038
{
20392039
"data": {
20402040
"application/vnd.jupyter.widget-view+json": {
2041-
"model_id": "716b10c7c7a840048c1a780ff1723d84",
2041+
"model_id": "a0f64dff11304556ad22cd8df77954b7",
20422042
"version_major": 2,
20432043
"version_minor": 0
20442044
},
@@ -3183,8 +3183,8 @@
31833183
"output_type": "stream",
31843184
"text": [
31853185
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
3186-
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.1min remaining: 0.0s\n",
3187-
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.1min finished\n"
3186+
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 11.9min remaining: 0.0s\n",
3187+
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 11.9min finished\n"
31883188
]
31893189
},
31903190
{
@@ -5041,7 +5041,7 @@
50415041
{
50425042
"data": {
50435043
"application/vnd.jupyter.widget-view+json": {
5044-
"model_id": "0a775ec7ee9f42ccb4367432d97f6958",
5044+
"model_id": "fde70dc5306b41f09a4844106b127aa1",
50455045
"version_major": 2,
50465046
"version_minor": 0
50475047
},

notebooks/Text_Extensions_for_Pandas_Overview.ipynb

Lines changed: 80 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,7 +1522,7 @@
15221522
" [4, 5],\n",
15231523
" [6, 7],\n",
15241524
" [8, 9]]),\n",
1525-
" <text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fb2e82c5d10>)"
1525+
" <text_extensions_for_pandas.array.tensor.TensorDtype at 0x7fe6a86432d0>)"
15261526
]
15271527
},
15281528
"execution_count": 22,
@@ -1903,7 +1903,7 @@
19031903
" <tr>\n",
19041904
" <th>0</th>\n",
19051905
" <td>[0, 2): 'In'</td>\n",
1906-
" <td>[0, 1, 0, 0]</td>\n",
1906+
" <td>[0, 0, 1, 0]</td>\n",
19071907
" </tr>\n",
19081908
" <tr>\n",
19091909
" <th>1</th>\n",
@@ -1918,24 +1918,24 @@
19181918
" <tr>\n",
19191919
" <th>3</th>\n",
19201920
" <td>[11, 15): 'King'</td>\n",
1921-
" <td>[0, 0, 0, 1]</td>\n",
1921+
" <td>[0, 1, 0, 0]</td>\n",
19221922
" </tr>\n",
19231923
" <tr>\n",
19241924
" <th>4</th>\n",
19251925
" <td>[16, 22): 'Arthur'</td>\n",
1926-
" <td>[0, 1, 0, 0]</td>\n",
1926+
" <td>[0, 0, 1, 0]</td>\n",
19271927
" </tr>\n",
19281928
" </tbody>\n",
19291929
"</table>\n",
19301930
"</div>"
19311931
],
19321932
"text/plain": [
19331933
" span features\n",
1934-
"0 [0, 2): 'In' [0, 1, 0, 0]\n",
1934+
"0 [0, 2): 'In' [0, 0, 1, 0]\n",
19351935
"1 [3, 5): 'AD' [0, 1, 0, 0]\n",
19361936
"2 [6, 9): '932' [0, 0, 0, 1]\n",
1937-
"3 [11, 15): 'King' [0, 0, 0, 1]\n",
1938-
"4 [16, 22): 'Arthur' [0, 1, 0, 0]"
1937+
"3 [11, 15): 'King' [0, 1, 0, 0]\n",
1938+
"4 [16, 22): 'Arthur' [0, 0, 1, 0]"
19391939
]
19401940
},
19411941
"execution_count": 32,
@@ -1958,22 +1958,88 @@
19581958
"# Save DataFrame to a feather file.\n",
19591959
"# Feather is a lightweight, fast binary columnar format, with basic\n",
19601960
"# compression and support built into Pandas.\n",
1961-
"\n",
1962-
"# TODO: Temporarily disabled while we revamp Feather support to handle multi-doc span arrays\n",
1963-
"#df.to_feather(\"outputs/tp_overview.feather\")"
1961+
"df.to_feather(\"outputs/tp_overview.feather\")"
19641962
]
19651963
},
19661964
{
19671965
"cell_type": "code",
19681966
"execution_count": 34,
19691967
"metadata": {},
1970-
"outputs": [],
1968+
"outputs": [
1969+
{
1970+
"data": {
1971+
"text/html": [
1972+
"<div>\n",
1973+
"<style scoped>\n",
1974+
" .dataframe tbody tr th:only-of-type {\n",
1975+
" vertical-align: middle;\n",
1976+
" }\n",
1977+
"\n",
1978+
" .dataframe tbody tr th {\n",
1979+
" vertical-align: top;\n",
1980+
" }\n",
1981+
"\n",
1982+
" .dataframe thead th {\n",
1983+
" text-align: right;\n",
1984+
" }\n",
1985+
"</style>\n",
1986+
"<table border=\"1\" class=\"dataframe\">\n",
1987+
" <thead>\n",
1988+
" <tr style=\"text-align: right;\">\n",
1989+
" <th></th>\n",
1990+
" <th>span</th>\n",
1991+
" <th>features</th>\n",
1992+
" </tr>\n",
1993+
" </thead>\n",
1994+
" <tbody>\n",
1995+
" <tr>\n",
1996+
" <th>0</th>\n",
1997+
" <td>[0, 2): 'In'</td>\n",
1998+
" <td>[0, 0, 1, 0]</td>\n",
1999+
" </tr>\n",
2000+
" <tr>\n",
2001+
" <th>1</th>\n",
2002+
" <td>[3, 5): 'AD'</td>\n",
2003+
" <td>[0, 1, 0, 0]</td>\n",
2004+
" </tr>\n",
2005+
" <tr>\n",
2006+
" <th>2</th>\n",
2007+
" <td>[6, 9): '932'</td>\n",
2008+
" <td>[0, 0, 0, 1]</td>\n",
2009+
" </tr>\n",
2010+
" <tr>\n",
2011+
" <th>3</th>\n",
2012+
" <td>[11, 15): 'King'</td>\n",
2013+
" <td>[0, 1, 0, 0]</td>\n",
2014+
" </tr>\n",
2015+
" <tr>\n",
2016+
" <th>4</th>\n",
2017+
" <td>[16, 22): 'Arthur'</td>\n",
2018+
" <td>[0, 0, 1, 0]</td>\n",
2019+
" </tr>\n",
2020+
" </tbody>\n",
2021+
"</table>\n",
2022+
"</div>"
2023+
],
2024+
"text/plain": [
2025+
" span features\n",
2026+
"0 [0, 2): 'In' [0, 0, 1, 0]\n",
2027+
"1 [3, 5): 'AD' [0, 1, 0, 0]\n",
2028+
"2 [6, 9): '932' [0, 0, 0, 1]\n",
2029+
"3 [11, 15): 'King' [0, 1, 0, 0]\n",
2030+
"4 [16, 22): 'Arthur' [0, 0, 1, 0]"
2031+
]
2032+
},
2033+
"execution_count": 34,
2034+
"metadata": {},
2035+
"output_type": "execute_result"
2036+
}
2037+
],
19712038
"source": [
19722039
"# Read the file back into a new DataFrame.\n",
19732040
"\n",
1974-
"# TODO: Temporarily disabled while we revamp Feather support to handle multi-doc span arrays\n",
1975-
"#df_load = pd.read_feather(\"outputs/tp_overview.feather\")\n",
1976-
"#df_load.head()"
2041+
"df_load = pd.read_feather(\"outputs/tp_overview.feather\")\n",
2042+
"df_load.head()"
19772043
]
19782044
},
19792045
{

notebooks/Understand_Tables.ipynb

Lines changed: 1 addition & 11 deletions
Large diffs are not rendered by default.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
setuptools.setup(
2626
name="text_extensions_for_pandas",
27-
version="0.1b2",
27+
version="0.1b3",
2828
author="IBM",
2929
author_email="[email protected]",
3030
description="Natural language processing support for Pandas dataframes.",

text_extensions_for_pandas/array/test_token_span.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,8 @@ def data_for_grouping(dtype):
518518
return pd.array([b, b, na, na, a, a, b, c], dtype=dtype)
519519

520520

521-
# Can't import due to dependencies, taken from pandas.conftest import all_compare_operators
521+
# Can't import due to dependencies, taken
522+
# from pandas.conftest import all_compare_operators
522523
@pytest.fixture(params=["__eq__", "__ne__", "__lt__", "__gt__", "__le__", "__ge__"])
523524
def all_compare_operators(request):
524525
return request.param
@@ -552,14 +553,10 @@ class TestPandasConstructors(base.BaseConstructorsTests):
552553
def test_series_constructor_no_data_with_index(self, dtype, na_value):
553554
pass
554555

556+
@pytest.mark.skipif(pd.__version__.startswith("1.0"),
557+
reason="Test added in Pandas 1.1.0")
555558
def test_construct_empty_dataframe(self, dtype):
556559
super().test_construct_empty_dataframe(dtype)
557-
# try:
558-
# with pytest.raises(TypeError, match="Expected SpanArray as tokens"):
559-
# super().test_construct_empty_dataframe(dtype)
560-
# except AttributeError:
561-
# # Test added in Pandas 1.1.0, ignore for earlier versions
562-
# pass
563560

564561

565562
class TestPandasGetitem(base.BaseGetitemTests):

tutorials/corpus/CoNLL_2.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3730,7 +3730,7 @@
37303730
"\n",
37313731
"<div id=\"spanArray\">\n",
37323732
" <div id=\"spans\" \n",
3733-
" style=\"background-color:#F0F0F0; border: 1px solid #E0E0E0; float:left; padding:10px;\">\n",
3733+
" style=\"color: var(--jp-layout-color2); border: 1px solid var(--jp-border-color0); float:left; padding:10px;\">\n",
37343734
" <table border=\"1\" class=\"dataframe\">\n",
37353735
" <thead>\n",
37363736
" <tr style=\"text-align: right;\">\n",
@@ -3899,11 +3899,11 @@
38993899
"</table>\n",
39003900
" </div>\n",
39013901
" <div id=\"text\"\n",
3902-
" style=\"float:right; background-color:#F5F5F5; border: 1px solid #E0E0E0; width: 60%;\">\n",
3902+
" style=\"float:right; border: 1px solid var(--jp-border-color0); width: 60%;\">\n",
39033903
"\n",
39043904
" <div style=\"float:center; padding:10px\">\n",
3905-
" <p style=\"font-family:monospace\">\n",
3906-
" -DOCSTART-<br><span style=\"background-color:yellow\">Belgian</span> police smash major drugs rings, 30 arrested.<br><span style=\"background-color:yellow\">BRUSSELS</span> 1996-12-06<br>Police smashed two drugs smuggling rings and arrested 30 people after a taxidriver in <span style=\"background-color:yellow\">Spain</span> alerted them to a suitcase of heroin left in his cab, <span style=\"background-color:yellow\">Belgian</span> police said on Friday.<br>Police seized dozens of kilos of heroin with a street value of hundreds of millions of <span style=\"background-color:yellow\">Belgian</span> francs, a public prosecutor&#39;s office spokesman in the port city of <span style=\"background-color:yellow\">Antwerp</span> said.<br>He said a 24-year-old <span style=\"background-color:yellow\">Belgian</span> woman left a suitcase containing 13 kg (29 lb) of heroin in a taxi in <span style=\"background-color:yellow\">Barcelona</span>.<br>The taxidriver alerted police who arrested a 33-year-old <span style=\"background-color:yellow\">Turkish</span> man when he came to pick up the suitcase at a lost luggage office.<br>The woman was later arrested in <span style=\"background-color:yellow\">Belgium</span>.<br>She and the <span style=\"background-color:yellow\">Turkish</span> man smuggled heroin from <span style=\"background-color:yellow\">Turkey</span> to <span style=\"background-color:yellow\">Antwerp</span> from where it was taken to <span style=\"background-color:yellow\">Spain</span>, <span style=\"background-color:yellow\">France</span> and <span style=\"background-color:yellow\">Germany</span> by others, the spokesman said.<br>He said 14 people were arrested in <span style=\"background-color:yellow\">Belgium</span> and 16 others in other <span style=\"background-color:yellow\">European</span> nations after an investigation lasting nearly a year.<br>(<span>&#36;</span>1=32.14 <span style=\"background-color:yellow\">Belgian</span> Franc)\n",
3905+
" <p style=\"font-family:var(--jp-code-font-family); font-size:var(--jp-code-font-size)\">\n",
3906+
" -DOCSTART-<br><span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> police smash major drugs rings, 30 arrested.<br><span style=\"background-color:rgba(255, 215, 0, 0.5)\">BRUSSELS</span> 1996-12-06<br>Police smashed two drugs smuggling rings and arrested 30 people after a taxidriver in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Spain</span> alerted them to a suitcase of heroin left in his cab, <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> police said on Friday.<br>Police seized dozens of kilos of heroin with a street value of hundreds of millions of <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> francs, a public prosecutor&#39;s office spokesman in the port city of <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Antwerp</span> said.<br>He said a 24-year-old <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> woman left a suitcase containing 13 kg (29 lb) of heroin in a taxi in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Barcelona</span>.<br>The taxidriver alerted police who arrested a 33-year-old <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkish</span> man when he came to pick up the suitcase at a lost luggage office.<br>The woman was later arrested in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgium</span>.<br>She and the <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkish</span> man smuggled heroin from <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Turkey</span> to <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Antwerp</span> from where it was taken to <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Spain</span>, <span style=\"background-color:rgba(255, 215, 0, 0.5)\">France</span> and <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Germany</span> by others, the spokesman said.<br>He said 14 people were arrested in <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgium</span> and 16 others in other <span style=\"background-color:rgba(255, 215, 0, 0.5)\">European</span> nations after an investigation lasting nearly a year.<br>(<span>&#36;</span>1=32.14 <span style=\"background-color:rgba(255, 215, 0, 0.5)\">Belgian</span> Franc)\n",
39073907
" </p>\n",
39083908
" </div>\n",
39093909
"\n",

tutorials/corpus/CoNLL_3.ipynb

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,7 +1805,7 @@
18051805
{
18061806
"data": {
18071807
"application/vnd.jupyter.widget-view+json": {
1808-
"model_id": "815b01606369445c892dedefbfd4916b",
1808+
"model_id": "0a612388df9249dab67efb5a4d358d5c",
18091809
"version_major": 2,
18101810
"version_minor": 0
18111811
},
@@ -1826,7 +1826,7 @@
18261826
{
18271827
"data": {
18281828
"application/vnd.jupyter.widget-view+json": {
1829-
"model_id": "daa8e13738e2453c93e334d08b6d251b",
1829+
"model_id": "cae2fc8df4a44049be700f26f4f20e88",
18301830
"version_major": 2,
18311831
"version_minor": 0
18321832
},
@@ -1847,7 +1847,7 @@
18471847
{
18481848
"data": {
18491849
"application/vnd.jupyter.widget-view+json": {
1850-
"model_id": "f5e610805fac4f44b706c223dc091820",
1850+
"model_id": "83c8f7f605eb4a55ab194aad964e947f",
18511851
"version_major": 2,
18521852
"version_minor": 0
18531853
},
@@ -3027,8 +3027,8 @@
30273027
"output_type": "stream",
30283028
"text": [
30293029
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n",
3030-
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 41.7min remaining: 0.0s\n",
3031-
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 41.7min finished\n"
3030+
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 46.1min remaining: 0.0s\n",
3031+
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 46.1min finished\n"
30323032
]
30333033
},
30343034
{
@@ -6006,7 +6006,7 @@
60066006
{
60076007
"data": {
60086008
"application/vnd.jupyter.widget-view+json": {
6009-
"model_id": "49d7275934fe48f3a27017d92225844a",
6009+
"model_id": "af7f756dbfc2467c9cf525caa83b83eb",
60106010
"version_major": 2,
60116011
"version_minor": 0
60126012
},
@@ -6499,12 +6499,12 @@
64996499
{
65006500
"data": {
65016501
"text/plain": [
6502-
"{'num_true_positives': 4169,\n",
6502+
"{'num_true_positives': 4329,\n",
65036503
" 'num_entities': 5648,\n",
6504-
" 'num_extracted': 4929,\n",
6505-
" 'precision': 0.8458105092310814,\n",
6506-
" 'recall': 0.7381373937677054,\n",
6507-
" 'F1': 0.7883142668053323}"
6504+
" 'num_extracted': 5163,\n",
6505+
" 'precision': 0.8384660081348053,\n",
6506+
" 'recall': 0.7664660056657224,\n",
6507+
" 'F1': 0.8008509851077606}"
65086508
]
65096509
},
65106510
"execution_count": 38,
@@ -6965,7 +6965,7 @@
69656965
{
69666966
"data": {
69676967
"application/vnd.jupyter.widget-view+json": {
6968-
"model_id": "571d376cdc19420d93df405970d42435",
6968+
"model_id": "63868a5aeb4e4847ba9b7df10e6d28b5",
69696969
"version_major": 2,
69706970
"version_minor": 0
69716971
},
@@ -8598,7 +8598,7 @@
85988598
{
85998599
"data": {
86008600
"application/vnd.jupyter.widget-view+json": {
8601-
"model_id": "ab550997976f4d9b9ea777894d28fd12",
8601+
"model_id": "13b1edab9e1241ccb22166e9c0c8ca40",
86028602
"version_major": 2,
86038603
"version_minor": 0
86048604
},
@@ -10072,7 +10072,7 @@
1007210072
{
1007310073
"data": {
1007410074
"application/vnd.jupyter.widget-view+json": {
10075-
"model_id": "bd25f50dd1aa4bed9c9fc148b87603b5",
10075+
"model_id": "7110dc85e13145a2a9ab455aaa167948",
1007610076
"version_major": 2,
1007710077
"version_minor": 0
1007810078
},

0 commit comments

Comments
 (0)