3
3
"""
4
4
5
5
import sys
6
+ from importlib import import_module
7
+ from warnings import warn
6
8
7
9
import six
8
- from lxml import etree , html
10
+ from lxml import etree
9
11
10
12
from .utils import flatten , iflatten , extract_regex , shorten
11
13
from .csstranslator import HTMLTranslator , GenericTranslator
12
14
13
15
16
+ def _load_object (path ):
17
+ """Load an object given its absolute object path, and return it.
18
+
19
+ `path` can point to a class, function, variable or a class instance. For
20
+ example: ``'parsel.parser.html.HTML_PARSER'``.
21
+ """
22
+
23
+ try :
24
+ dot = path .rindex ('.' )
25
+ except ValueError :
26
+ raise ValueError ("Error loading object '%s': not a full path" % path )
27
+
28
+ module , name = path [:dot ], path [dot + 1 :]
29
+ mod = import_module (module )
30
+
31
+ try :
32
+ obj = getattr (mod , name )
33
+ except AttributeError :
34
+ raise NameError ("Module '%s' doesn't define any object named '%s'" % (module , name ))
35
+
36
+ return obj
37
+
38
+
14
39
class CannotRemoveElementWithoutRoot (Exception ):
15
40
pass
16
41
@@ -21,14 +46,16 @@ class CannotRemoveElementWithoutParent(Exception):
21
46
22
47
class SafeXMLParser (etree .XMLParser ):
23
48
def __init__ (self , * args , ** kwargs ):
49
+ warn ('parsel.selector.SafeXMLParser is deprecated' ,
50
+ DeprecationWarning , stacklevel = 2 )
24
51
kwargs .setdefault ('resolve_entities' , False )
25
52
super (SafeXMLParser , self ).__init__ (* args , ** kwargs )
26
53
27
54
_ctgroup = {
28
- 'html' : {'_parser' : html .HTMLParser ,
55
+ 'html' : {'_parser' : 'parsel.parser. html.HTML_PARSER' ,
29
56
'_csstranslator' : HTMLTranslator (),
30
57
'_tostring_method' : 'html' },
31
- 'xml' : {'_parser' : SafeXMLParser ,
58
+ 'xml' : {'_parser' : 'parsel.parser.xml.XML_PARSER' ,
32
59
'_csstranslator' : GenericTranslator (),
33
60
'_tostring_method' : 'xml' },
34
61
}
@@ -46,6 +73,8 @@ def _st(st):
46
73
def create_root_node (text , parser_cls , base_url = None ):
47
74
"""Create root node for text using given parser class.
48
75
"""
76
+ warn ('parsel.selector.create_root_node is deprecated' ,
77
+ DeprecationWarning , stacklevel = 2 )
49
78
body = text .strip ().replace ('\x00 ' , '' ).encode ('utf8' ) or b'<html/>'
50
79
parser = parser_cls (recover = True , encoding = 'utf8' )
51
80
root = etree .fromstring (body , parser = parser , base_url = base_url )
@@ -198,7 +227,7 @@ class Selector(object):
198
227
def __init__ (self , text = None , type = None , namespaces = None , root = None ,
199
228
base_url = None , _expr = None ):
200
229
self .type = st = _st (type or self ._default_type )
201
- self ._parser = _ctgroup [st ]['_parser' ]
230
+ self ._parser = _load_object ( _ctgroup [st ]['_parser' ])
202
231
self ._csstranslator = _ctgroup [st ]['_csstranslator' ]
203
232
self ._tostring_method = _ctgroup [st ]['_tostring_method' ]
204
233
@@ -219,7 +248,7 @@ def __getstate__(self):
219
248
raise TypeError ("can't pickle Selector objects" )
220
249
221
250
def _get_root (self , text , base_url = None ):
222
- return create_root_node ( text , self ._parser , base_url = base_url )
251
+ return self ._parser . parse ( text = text , base_url = base_url )
223
252
224
253
def xpath (self , query , namespaces = None , ** kwargs ):
225
254
"""
0 commit comments