3
3
from spacy .lang .en import EnglishDefaults
4
4
from spacy .language import BaseDefaults
5
5
6
+ from spacy_udpipe import download
6
7
from spacy_udpipe .language import load
7
8
from spacy_udpipe .util import get_defaults
8
9
9
10
11
+ @pytest .fixture (autouse = True )
12
+ def download_en ():
13
+ download ("en" )
14
+
15
+
10
16
@pytest .fixture
11
17
def lang ():
12
18
return "en"
13
19
14
20
15
21
def tags_equal (act , exp ):
16
- """Check if each actual tag in act is equal to one or more expected tags in exp."""
17
- return all (a == e if isinstance (e , str ) else a in e for a , e in zip (act , exp ))
22
+ """Check if each actual tag is equal to one or more expected tags."""
23
+ return all (a == e if isinstance (e , str ) else a in e
24
+ for a , e in zip (act , exp ))
18
25
19
26
20
27
def test_get_defaults ():
@@ -29,19 +36,30 @@ def test_spacy_udpipe(lang):
29
36
text = "Testing one, two, three. This is a test."
30
37
doc = nlp (text )
31
38
32
- pos_actual = ['VERB' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' ,
33
- ('PRON' , 'DET' ), ('AUX' , 'VERB' ), 'DET' , 'NOUN' , 'PUNCT' ]
39
+ pos_actual = [('VERB' , 'PROPN' ), 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' , 'NUM' ,
40
+ 'PUNCT' ,
41
+ ('PRON' , 'DET' ), ('AUX' , 'VERB' ), 'DET' , 'NOUN' ,
42
+ 'PUNCT' ]
34
43
# test token attributes
35
- assert [t .text for t in doc ] == ['Testing' , 'one' , ',' , 'two' , ',' , 'three' , '.' ,
36
- 'This' , 'is' , 'a' , 'test' , '.' ]
37
- assert [t .lemma_ for t in doc ] == ['test' , 'one' , ',' , 'two' , ',' , 'three' , '.' ,
38
- 'this' , 'be' , 'a' , 'test' , '.' ]
44
+ assert [t .text for t in doc ] == ['Testing' , 'one' , ',' , 'two' , ',' , 'three' , # noqa: E501
45
+ '.' ,
46
+ 'This' , 'is' , 'a' , 'test' ,
47
+ '.' ]
48
+ assert [t .lemma_ for t in doc ] == ['test' , 'one' , ',' , 'two' , ',' , 'three' ,
49
+ '.' ,
50
+ 'this' , 'be' , 'a' , 'test' ,
51
+ '.' ]
39
52
assert tags_equal ([t .pos_ for t in doc ], pos_actual )
40
- assert [t .tag_ for t in doc ] == ['V' , 'N' , 'FF' , 'N' , 'FF' , 'N' , 'FS' ,
41
- 'PD' , 'V' , 'RI' , 'S' , 'FS' ] # CoNNL xpostag-s, custom for each UD treebank
42
- assert [t .dep_ for t in doc ] == ['ROOT' , 'nummod' , 'punct' , 'nummod' , 'punct' , 'nummod' , 'punct' ,
43
- 'nsubj' , 'cop' , 'det' , 'ROOT' , 'punct' ]
44
- assert [t .is_sent_start for t in doc ] == [True , None , None , None , None , None , None ,
53
+ # CoNNL xpostag-s, custom for each UD treebank
54
+ assert [t .tag_ for t in doc ] == ['NNP' , 'CD' , ',' , 'CD' , ',' , 'CD' ,
55
+ '.' ,
56
+ 'DT' , 'VBZ' , 'DT' , 'NN' ,
57
+ '.' ]
58
+ assert [t .dep_ for t in doc ] == ['ROOT' , 'nummod' , 'punct' , 'nummod' , 'punct' , 'nummod' , # noqa: E501
59
+ 'punct' ,
60
+ 'nsubj' , 'cop' , 'det' , 'ROOT' ,
61
+ 'punct' ]
62
+ assert [t .is_sent_start for t in doc ] == [True , None , None , None , None , None , None , # noqa: E501
45
63
True , None , None , None , None ]
46
64
assert any ([t .is_stop for t in doc ])
47
65
# test doc attributes
@@ -52,7 +70,6 @@ def test_spacy_udpipe(lang):
52
70
# test pipe
53
71
docs = list (nlp .pipe (["Testing one, two, three." , "This is a test." ]))
54
72
assert docs [0 ].text == "Testing one, two, three."
55
- assert [t .pos_ for t in docs [0 ]] == [
56
- 'VERB' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' ]
73
+ assert [t .pos_ for t in docs [0 ]] == ['PROPN' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' , 'NUM' , 'PUNCT' ] # noqa: E501
57
74
assert docs [1 ].text == "This is a test."
58
75
assert tags_equal ([t .pos_ for t in docs [1 ]], pos_actual [- 5 :])
0 commit comments