@@ -18,13 +18,27 @@ def load(lang):
18
18
mimicks spacy.load.
19
19
20
20
lang (unicode): ISO 639-1 language code or shorthand UDPipe model name.
21
- RETURNS (spacy.language.Language): The UDPipeLanguage object.
21
+ RETURNS (spacy.language.Language): The UDPipeLanguage object.
22
22
"""
23
23
model = UDPipeModel (lang )
24
24
nlp = UDPipeLanguage (model )
25
25
return nlp
26
26
27
27
28
+ def load_from_path (lang , path , meta = None ):
29
+ """Convenience function for initializing the Language class and loading
30
+ a custom UDPipe model via the path argument.
31
+
32
+ lang (unicode): ISO 639-1 language code.
33
+ path (unicode): Path to the UDPipe model.
34
+ meta (dict): Meta-information about the UDPipe model.
35
+ RETURNS (spacy.language.Language): The UDPipeLanguage object.
36
+ """
37
+ model = UDPipeModel (lang , path , meta )
38
+ nlp = UDPipeLanguage (model )
39
+ return nlp
40
+
41
+
28
42
class UDPipeLanguage (Language ):
29
43
30
44
def __init__ (self , udpipe_model , meta = None , ** kwargs ):
@@ -93,7 +107,7 @@ def __call__(self, text):
93
107
udpipe_sents = self .model (text ) if text else [Sentence ()]
94
108
text = " " .join (s .getText () for s in udpipe_sents )
95
109
tokens , heads = self .get_tokens_with_heads (udpipe_sents )
96
- if not len ( tokens ) :
110
+ if not tokens :
97
111
return Doc (self .vocab )
98
112
99
113
words = []
@@ -186,32 +200,38 @@ def check_aligned(self, text, tokens):
186
200
187
201
class UDPipeModel :
188
202
189
- def __init__ (self , lang ):
203
+ def __init__ (self , lang , path = None , meta = None ):
190
204
"""Load UDPipe model for given language.
191
205
192
206
lang (unicode): ISO 639-1 language code or shorthand UDPipe model name.
207
+ path (unicode): Path to UDPipe model.
208
+ meta (dict): Meta-information about the UDPipe model.
193
209
RETURNS (UDPipeModel): Language specific UDPipeModel.
194
210
"""
195
- path = get_path (lang )
211
+ if path is None :
212
+ path = get_path (lang )
196
213
self .model = Model .load (path )
197
- if not self .model :
214
+ if self .model is None :
198
215
msg = "Cannot load UDPipe model from " \
199
216
"file '{}'" .format (path )
200
217
raise Exception (msg )
201
218
self ._lang = lang .split ('-' )[0 ]
202
- self ._meta = {'authors' : ("Milan Straka, "
203
- "Jana Straková" ),
204
- 'description' : "UDPipe pretrained model." ,
205
-
206
- 'lang' : 'udpipe_' + self ._lang ,
207
- 'license' : 'CC BY-NC-SA 4.0' ,
208
- 'name' : path .split ('/' )[- 1 ],
209
- 'parent_package' : 'spacy_udpipe' ,
210
- 'pipeline' : 'Tokenizer, POS Tagger, Lemmatizer, Parser' ,
211
- 'source' : 'Universal Dependencies 2.4' ,
212
- 'url' : 'http://ufal.mff.cuni.cz/udpipe' ,
213
- 'version' : '1.2.0'
214
- }
219
+ if meta is None :
220
+ self ._meta = {'authors' : ("Milan Straka, "
221
+ "Jana Straková" ),
222
+ 'description' : "UDPipe pretrained model." ,
223
+
224
+ 'lang' : 'udpipe_' + self ._lang ,
225
+ 'license' : 'CC BY-NC-SA 4.0' ,
226
+ 'name' : path .split ('/' )[- 1 ],
227
+ 'parent_package' : 'spacy_udpipe' ,
228
+ 'pipeline' : 'Tokenizer, POS Tagger, Lemmatizer, Parser' ,
229
+ 'source' : 'Universal Dependencies 2.4' ,
230
+ 'url' : 'http://ufal.mff.cuni.cz/udpipe' ,
231
+ 'version' : '1.2.0'
232
+ }
233
+ else :
234
+ self ._meta = meta
215
235
216
236
def __call__ (self , text ):
217
237
"""Tokenize, tag and parse the text and return it in an UDPipe
0 commit comments