Feature/GitHub workflows (#8)

asajatovic · web-flow · commit be8f8d0c9127 · 2019-12-16T20:58:28.000+01:00
Adds GitHub Actions for building and uploading a package
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -0,0 +1,38 @@
+name: Python package
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    branches:
+      - master
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 4
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 **/*.py --count --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 **/*.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pip install pytest
+        python -m pytest -vvv test
diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml
@@ -0,0 +1,26 @@
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: '3.6'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
diff --git a/README.md b/README.md
@@ -43,7 +43,8 @@ Pull requests are welcome. For major changes, please open an issue first to disc
 
 Please make sure to update tests as appropriate.
 
-To start the tests, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.
+Tests are run automatically for each pull request on the master branch.
+To start the tests locally, just run [`pytest`](https://docs.pytest.org/en/latest/contents.html) in the root source directory.
 
 ## License
 [MIT](https://choosealicense.com/licenses/mit/)  © TakeLab
diff --git a/setup.py b/setup.py
@@ -14,12 +14,12 @@
 with open(langs_path, "r") as f:
     LANGUAGES = json.load(f)
 
-ENTRY_LANGS = set("udpipe_{} = spacy_udpipe:UDPipeLanguage".format(s.split('-')[0])
+ENTRY_LANGS = set(f"udpipe_{s.split('-')[0]} = spacy_udpipe:UDPipeLanguage"
                   for s in LANGUAGES.keys())
 
 setuptools.setup(
     name="spacy-udpipe",
-    version="0.0.5",
+    version="0.1.0",
     description="Use fast UDPipe models directly in spaCy",
     long_description=long_description,
     long_description_content_type="text/markdown",
diff --git a/spacy_udpipe/language.py b/spacy_udpipe/language.py
@@ -82,10 +82,10 @@ class UDPipeTokenizer(object):
     >>> nlp = spacy.load('/path/to/model', udpipe_model=udpipe_model)
     """
 
-    to_disk = lambda self, *args, **kwargs: None
-    from_disk = lambda self, *args, **kwargs: None
-    to_bytes = lambda self, *args, **kwargs: None
-    from_bytes = lambda self, *args, **kwargs: None
+    to_disk = lambda self, *args, **kwargs: None  # noqa: E731
+    from_disk = lambda self, *args, **kwargs: None  # noqa: E731
+    to_bytes = lambda self, *args, **kwargs: None  # noqa: E731
+    from_bytes = lambda self, *args, **kwargs: None  # noqa: E731
     _ws_pattern = re.compile(r"\s+")
 
     def __init__(self, model, vocab):
@@ -217,15 +217,15 @@ def __init__(self, lang, path=None, meta=None):
             raise Exception(msg)
         self._lang = lang.split('-')[0]
         if meta is None:
-            self._meta = {'authors': ("Milan Straka, "
-                                      "Jana Straková"),
+            self._meta = {'author': ("Milan Straka & "
+                                     "Jana Straková"),
                           'description': "UDPipe pretrained model.",
                           'email': 'straka@ufal.mff.cuni.cz',
                           'lang': 'udpipe_' + self._lang,
                           'license': 'CC BY-NC-SA 4.0',
                           'name': path.split('/')[-1],
                           'parent_package': 'spacy_udpipe',
-                          'pipeline': 'Tokenizer, POS Tagger, Lemmatizer, Parser',
+                          'pipeline': 'Tokenizer, Tagger, Lemmatizer, Parser',
                           'source': 'Universal Dependencies 2.4',
                           'url': 'http://ufal.mff.cuni.cz/udpipe',
                           'version': '1.2.0'
@@ -277,15 +277,15 @@ def tokenize(self, text):
         return self._read(text, tokenizer)
 
     def tag(self, sentence):
-        """Assing part-of-speech tags (inplace).
+        """Assign part-of-speech tags (inplace).
 
         sentence (ufal.udpipe.Sentence): Input sentence.
         RETURNS (ufal.udpipe.Sentence): Tagged sentence.
         """
         self.model.tag(sentence, self.model.DEFAULT)
 
     def parse(self, sentence):
-        """Assing dependency parse relations (inplace).
+        """Assign dependency parse relations (inplace).
 
         sentence (ufal.udpipe.Sentence): Input sentence.
         RETURNS (ufal.udpipe.Sentence): Tagged sentence.
diff --git a/spacy_udpipe/languages.json b/spacy_udpipe/languages.json
@@ -109,4 +109,4 @@
     "vi": "vietnamese-vtb-ud-2.4-190531.udpipe",
     "ug": "uyghur-udt-ud-2.4-190531.udpipe",
     "ur": "urdu-udtb-ud-2.4-190531.udpipe"
-}
+}
diff --git a/spacy_udpipe/util.py b/spacy_udpipe/util.py
@@ -7,7 +7,7 @@
 from spacy.language import Language
 from spacy.util import get_lang_class
 
-BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/"
+BASE_URL = "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-2998/"  # noqa: E501
 MODELS_DIR = os.path.join(Path(__file__).parent, "models")
 langs_path = os.path.join(Path(__file__).parent, "languages.json")
 with open(langs_path, "r") as f:
@@ -32,7 +32,7 @@ def download(lang):
     _check_language(lang)
     try:
         _check_models_dir(lang)
-    except:
+    except Exception:
         os.makedirs(MODELS_DIR)
     if LANGUAGES[lang] in os.listdir(MODELS_DIR):
         msg = "Already downloaded a model for the" \
diff --git a/test/test_language.py b/test/test_language.py
@@ -3,18 +3,25 @@
 from spacy.lang.en import EnglishDefaults
 from spacy.language import BaseDefaults
 
+from spacy_udpipe import download
 from spacy_udpipe.language import load
 from spacy_udpipe.util import get_defaults
 
 
+@pytest.fixture(autouse=True)
+def download_en():
+    download("en")
+
+
 @pytest.fixture
 def lang():
     return "en"
 
 
 def tags_equal(act, exp):
-    """Check if each actual tag in act is equal to one or more expected tags in exp."""
-    return all(a == e if isinstance(e, str) else a in e for a, e in zip(act, exp))
+    """Check if each actual tag is equal to one or more expected tags."""
+    return all(a == e if isinstance(e, str) else a in e
+               for a, e in zip(act, exp))
 
 
 def test_get_defaults():
@@ -29,19 +36,30 @@ def test_spacy_udpipe(lang):
     text = "Testing one, two, three. This is a test."
     doc = nlp(text)
 
-    pos_actual = ['VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT',
-                  ('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN', 'PUNCT']
+    pos_actual = [('VERB', 'PROPN'), 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM',
+                  'PUNCT',
+                  ('PRON', 'DET'), ('AUX', 'VERB'), 'DET', 'NOUN',
+                  'PUNCT']
     # test token attributes
-    assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three', '.',
-                                     'This', 'is', 'a', 'test', '.']
-    assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three', '.',
-                                       'this', 'be', 'a', 'test', '.']
+    assert [t.text for t in doc] == ['Testing', 'one', ',', 'two', ',', 'three',  # noqa: E501
+                                     '.',
+                                     'This', 'is', 'a', 'test',
+                                     '.']
+    assert [t.lemma_ for t in doc] == ['test', 'one', ',', 'two', ',', 'three',
+                                       '.',
+                                       'this', 'be', 'a', 'test',
+                                       '.']
     assert tags_equal([t.pos_ for t in doc], pos_actual)
-    assert [t.tag_ for t in doc] == ['V', 'N', 'FF', 'N', 'FF', 'N', 'FS',
-                                     'PD', 'V', 'RI', 'S', 'FS']  # CoNNL xpostag-s, custom for each UD treebank
-    assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod', 'punct',
-                                     'nsubj', 'cop', 'det', 'ROOT', 'punct']
-    assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None,
+    # CoNNL xpostag-s, custom for each UD treebank
+    assert [t.tag_ for t in doc] == ['NNP', 'CD', ',', 'CD', ',', 'CD',
+                                     '.',
+                                     'DT', 'VBZ', 'DT', 'NN',
+                                     '.']
+    assert [t.dep_ for t in doc] == ['ROOT', 'nummod', 'punct', 'nummod', 'punct', 'nummod',  # noqa: E501
+                                     'punct',
+                                     'nsubj', 'cop', 'det', 'ROOT',
+                                     'punct']
+    assert [t.is_sent_start for t in doc] == [True, None, None, None, None, None, None,  # noqa: E501
                                               True, None, None, None, None]
     assert any([t.is_stop for t in doc])
     # test doc attributes
@@ -52,7 +70,6 @@ def test_spacy_udpipe(lang):
     # test pipe
     docs = list(nlp.pipe(["Testing one, two, three.", "This is a test."]))
     assert docs[0].text == "Testing one, two, three."
-    assert [t.pos_ for t in docs[0]] == [
-        'VERB', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT']
+    assert [t.pos_ for t in docs[0]] == ['PROPN', 'NUM', 'PUNCT', 'NUM', 'PUNCT', 'NUM', 'PUNCT']  # noqa: E501
     assert docs[1].text == "This is a test."
     assert tags_equal([t.pos_ for t in docs[1]], pos_actual[-5:])