Skip to content

Commit be8f8d0

Browse files
authored
Feature/GitHub workflows (#8)
Adds GitHub Actions for building and uploading a package
1 parent ec1faa5 commit be8f8d0

File tree

8 files changed

+112
-30
lines changed

8 files changed

+112
-30
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Python package
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize, reopened]
6+
branches:
7+
- master
8+
9+
jobs:
10+
build:
11+
12+
runs-on: ubuntu-latest
13+
strategy:
14+
max-parallel: 4
15+
matrix:
16+
python-version: [3.6, 3.7, 3.8]
17+
18+
steps:
19+
- uses: actions/checkout@v1
20+
- name: Set up Python ${{ matrix.python-version }}
21+
uses: actions/setup-python@v1
22+
with:
23+
python-version: ${{ matrix.python-version }}
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install -r requirements.txt
28+
- name: Lint with flake8
29+
run: |
30+
pip install flake8
31+
# stop the build if there are Python syntax errors or undefined names
32+
flake8 **/*.py --count --show-source --statistics
33+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34+
flake8 **/*.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35+
- name: Test with pytest
36+
run: |
37+
pip install pytest
38+
python -m pytest -vvv test
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Upload Python Package
2+
3+
on:
4+
release:
5+
types: [created]
6+
7+
jobs:
8+
deploy:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/checkout@v1
12+
- name: Set up Python
13+
uses: actions/setup-python@v1
14+
with:
15+
python-version: '3.6'
16+
- name: Install dependencies
17+
run: |
18+
python -m pip install --upgrade pip
19+
pip install setuptools wheel twine
20+
- name: Build and publish
21+
env:
22+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
23+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
24+
run: |
25+
python setup.py sdist bdist_wheel
26+
twine upload dist/*

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ Pull requests are welcome. For major changes, please open an issue first to disc
4343

4444
Please make sure to update tests as appropriate.
4545

46-
To start the tests, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.
46+
Tests are run automatically for each pull request on the master branch.
47+
To start the tests locally, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.
4748

4849
## License
4950
[MIT](https://choosealicense.com/licenses/mit/) © TakeLab

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
with open(langs_path, "r") as f:
1515
LANGUAGES = json.load(f)
1616

17-
ENTRY_LANGS = set("udpipe_{} = spacy_udpipe:UDPipeLanguage".format(s.split('-')[0])
17+
ENTRY_LANGS = set(f"udpipe_{s.split('-')[0]} = spacy_udpipe:UDPipeLanguage"
1818
for s in LANGUAGES.keys())
1919

2020
setuptools.setup(
2121
name="spacy-udpipe",
22-
version="0.0.5",
22+
version="0.1.0",
2323
description="Use fast UDPipe models directly in spaCy",
2424
long_description=long_description,
2525
long_description_content_type="text/markdown",

spacy_udpipe/language.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ class UDPipeTokenizer(object):
8282
>>> nlp = spacy.load('/path/to/model', udpipe_model=udpipe_model)
8383
"""
8484

85-
to_disk = lambda self, *args, **kwargs: None
86-
from_disk = lambda self, *args, **kwargs: None
87-
to_bytes = lambda self, *args, **kwargs: None
88-
from_bytes = lambda self, *args, **kwargs: None
85+
to_disk = lambda self, *args, **kwargs: None # noqa: E731
86+
from_disk = lambda self, *args, **kwargs: None # noqa: E731
87+
to_bytes = lambda self, *args, **kwargs: None # noqa: E731
88+
from_bytes = lambda self, *args, **kwargs: None # noqa: E731
8989
_ws_pattern = re.compile(r"\s+")
9090

9191
def __init__(self, model, vocab):
@@ -217,15 +217,15 @@ def __init__(self, lang, path=None, meta=None):
217217
raise Exception(msg)
218218
self._lang = lang.split('-')[0]
219219
if meta is None:
220-
self._meta = {'authors': ("Milan Straka, "
221-
"Jana Straková"),
220+
self._meta = {'author': ("Milan Straka & "
221+
"Jana Straková"),
222222
'description': "UDPipe pretrained model.",
223223
'email': '[email protected]',
224224
'lang': 'udpipe_' + self._lang,
225225
'license': 'CC BY-NC-SA 4.0',
226226
'name': path.split('/')[-1],
227227
'parent_package': 'spacy_udpipe',
228-
'pipeline': 'Tokenizer, POS Tagger, Lemmatizer, Parser',
228+
'pipeline': 'Tokenizer, Tagger, Lemmatizer, Parser',
229229
'source': 'Universal Dependencies 2.4',
230230
'url': 'http://ufal.mff.cuni.cz/udpipe',
231231
'version': '1.2.0'
@@ -277,15 +277,15 @@ def tokenize(self, text):
277277
return self._read(text, tokenizer)
278278

279279
def tag(self, sentence):
280-
"""Assing part-of-speech tags (inplace).
280+
"""Assign part-of-speech tags (inplace).
281281
282282
sentence (ufal.udpipe.Sentence): Input sentence.
283283
RETURNS (ufal.udpipe.Sentence): Tagged sentence.
284284
"""
285285
self.model.tag(sentence, self.model.DEFAULT)
286286

287287
def parse(self, sentence):
288-
"""Assing dependency parse relations (inplace).
288+
"""Assign dependency parse relations (inplace).
289289
290290
sentence (ufal.udpipe.Sentence): Input sentence.
291291
RETURNS (ufal.udpipe.Sentence): Tagged sentence.

spacy_udpipe/languages.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,4 @@
109109
"vi": "vietnamese-vtb-ud-2.4-190531.udpipe",
110110
"ug": "uyghur-udt-ud-2.4-190531.udpipe",
111111
"ur": "urdu-udtb-ud-2.4-190531.udpipe"
112-
}
112+
}

spacy_udpipe/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from spacy.language import Language
88
from spacy.util import get_lang_class
99

10-
BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/"
10+
BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/" # noqa: E501
1111
MODELS_DIR = os.path.join(Path(__file__).parent, "models")
1212
langs_path = os.path.join(Path(__file__).parent, "languages.json")
1313
with open(langs_path, "r") as f:
@@ -32,7 +32,7 @@ def download(lang):
3232
_check_language(lang)
3333
try:
3434
_check_models_dir(lang)
35-
except:
35+
except Exception:
3636
os.makedirs(MODELS_DIR)
3737
if LANGUAGES[lang] in os.listdir(MODELS_DIR):
3838
msg = "Already downloaded a model for the" \

test/test_language.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,25 @@
33
from spacy.lang.en import EnglishDefaults
44
from spacy.language import BaseDefaults
55

6+
from spacy_udpipe import download
67
from spacy_udpipe.language import load
78
from spacy_udpipe.util import get_defaults
89

910

11+
@pytest.fixture(autouse=True)
12+
def download_en():
13+
download("en")
14+
15+
1016
@pytest.fixture
1117
def lang():
1218
return "en"
1319

1420

1521
def tags_equal(act, exp):
16-
"""Check if each actual tag in act is equal to one or more expected tags in exp."""
17-
return all(a == e if isinstance(e, str) else a in e for a, e in zip(act, exp))
22+
"""Check if each actual tag is equal to one or more expected tags."""
23+
return all(a == e if isinstance(e, str) else a in e
24+
for a, e in zip(act, exp))
1825

1926

2027
def test_get_defaults():
@@ -29,19 +36,30 @@ def test_spacy_udpipe(lang):
2936
text = "Testing one, two, three. This is a test."
3037
doc = nlp(text)
3138

32-
pos_actual = ['VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT',
33-
('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN', 'PUNCT']
39+
pos_actual = [('VERB', 'PROPN'), 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM',
40+
'PUNCT',
41+
('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN',
42+
'PUNCT']
3443
# test token attributes
35-
assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three', '.',
36-
'This', 'is', 'a', 'test', '.']
37-
assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three', '.',
38-
'this', 'be', 'a', 'test', '.']
44+
assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three', # noqa: E501
45+
'.',
46+
'This', 'is', 'a', 'test',
47+
'.']
48+
assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three',
49+
'.',
50+
'this', 'be', 'a', 'test',
51+
'.']
3952
assert tags_equal([t.pos_ for t in doc], pos_actual)
40-
assert [t.tag_ for t in doc] == ['V', 'N', 'FF', 'N', 'FF', 'N', 'FS',
41-
'PD', 'V', 'RI', 'S', 'FS'] # CoNNL xpostag-s, custom for each UD treebank
42-
assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod', 'punct',
43-
'nsubj', 'cop', 'det', 'ROOT', 'punct']
44-
assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None,
53+
# CoNNL xpostag-s, custom for each UD treebank
54+
assert [t.tag_ for t in doc] == ['NNP', 'CD', ',', 'CD', ',', 'CD',
55+
'.',
56+
'DT', 'VBZ', 'DT', 'NN',
57+
'.']
58+
assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod', # noqa: E501
59+
'punct',
60+
'nsubj', 'cop', 'det', 'ROOT',
61+
'punct']
62+
assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None, # noqa: E501
4563
True, None, None, None, None]
4664
assert any([t.is_stop for t in doc])
4765
# test doc attributes
@@ -52,7 +70,6 @@ def test_spacy_udpipe(lang):
5270
# test pipe
5371
docs = list(nlp.pipe(["Testing one, two, three.", "This is a test."]))
5472
assert docs[0].text == "Testing one, two, three."
55-
assert [t.pos_ for t in docs[0]] == [
56-
'VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT']
73+
assert [t.pos_ for t in docs[0]] == ['PROPN', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT'] # noqa: E501
5774
assert docs[1].text == "This is a test."
5875
assert tags_equal([t.pos_ for t in docs[1]], pos_actual[-5:])

0 commit comments

Comments
 (0)