|
9 | 9 |
|
10 | 10 | from . import get_corpus_dir |
11 | 11 | from .transformations import (filename_to_doi, _get_base_page, LANDING_PAGE_SUFFIX, |
12 | | - URL_SUFFIX, plos_page_dict, doi_url) |
| 12 | + URL_SUFFIX, plos_page_dict, doi_url, doi_to_url, doi_to_path) |
13 | 13 | from .plos_regex import validate_doi |
14 | 14 | from .elements import (parse_article_date, get_contrib_info, |
15 | 15 | Journal, License, match_contribs_to_dicts) |
| 16 | +from .utils import dedent |
16 | 17 |
|
17 | 18 |
|
18 | | -class Article(): |
| 19 | +class Article: |
19 | 20 | """The primary object of a PLOS article, initialized by a valid PLOS DOI. |
20 | 21 |
|
21 | 22 | """ |
@@ -44,6 +45,61 @@ def __init__(self, doi, directory=None): |
44 | 45 | self.reset_memoized_attrs() |
45 | 46 | self._editor = None |
46 | 47 |
|
| 48 | + def __str__(self, exclude_refs=True): |
| 49 | + """Output when you print an article object on the command line. |
| 50 | +
|
| 51 | + For parsing and viewing the XML of a local article. Should not be used for hashing |
| 52 | + Excludes <back> element (including references list) for easier viewing |
| 53 | + :param exclude_refs: remove references from the article tree (eases print viewing) |
| 54 | + """ |
| 55 | + parser = et.XMLParser(remove_blank_text=True) |
| 56 | + tree = et.parse(self.filename, parser) |
| 57 | + if exclude_refs: |
| 58 | + root = tree.getroot() |
| 59 | + back = tree.xpath('./back') |
| 60 | + root.remove(back[0]) |
| 61 | + local_xml = et.tostring(tree, |
| 62 | + method='xml', |
| 63 | + encoding='unicode', |
| 64 | + pretty_print=True) |
| 65 | + return local_xml |
| 66 | + |
| 67 | + def __repr__(self): |
| 68 | + """Value of an article object when you call it directly on the command line. |
| 69 | +
|
| 70 | + Shows the DOI and title of the article |
| 71 | + :returns: DOI and title |
| 72 | + :rtype: {str} |
| 73 | + """ |
| 74 | + out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) |
| 75 | + return out |
| 76 | + |
| 77 | + |
| 78 | + def _repr_html_(self): |
| 79 | + """Nice display for Jupyter notebook""" |
| 80 | + |
| 81 | + titlestyle = 'display:inline-flex;' |
| 82 | + titletextstyle = 'margin-left:.5em;' |
| 83 | + titlelink = ('<span style="{titlestyle}"><a href="{url}">' |
| 84 | + '<em>{title}</em></a></span>').format( |
| 85 | + url=self.page, |
| 86 | + title=self.title, |
| 87 | + titlestyle=titlestyle+titletextstyle, |
| 88 | + ) |
| 89 | + |
| 90 | + doilink = '<span><a href="{url}"><code>{doi}</code></a></span>'.format( |
| 91 | + url=self.doi_link(), |
| 92 | + doi=self.doi, |
| 93 | + ) |
| 94 | + out = dedent("""<div> |
| 95 | + <span style="{titlestyle}">Title: {titlelink}</span></br> |
| 96 | + <span>DOI: <span>{doilink} |
| 97 | + </div> |
| 98 | + """).format(doilink=doilink, titlelink=titlelink, titlestyle=titlestyle) |
| 99 | + |
| 100 | + return out |
| 101 | + |
| 102 | + |
47 | 103 | def reset_memoized_attrs(self): |
48 | 104 | """Reset attributes to None when instantiating a new article object. |
49 | 105 |
|
@@ -109,34 +165,6 @@ def doi(self, d): |
109 | 165 | self.reset_memoized_attrs() |
110 | 166 | self._doi = d |
111 | 167 |
|
112 | | - def __str__(self, exclude_refs=True): |
113 | | - """Output when you print an article object on the command line. |
114 | | -
|
115 | | - For parsing and viewing the XML of a local article. Should not be used for hashing |
116 | | - Excludes <back> element (including references list) for easier viewing |
117 | | - :param exclude_refs: remove references from the article tree (eases print viewing) |
118 | | - """ |
119 | | - parser = et.XMLParser(remove_blank_text=True) |
120 | | - tree = et.parse(self.filename, parser) |
121 | | - if exclude_refs: |
122 | | - root = tree.getroot() |
123 | | - back = tree.xpath('./back') |
124 | | - root.remove(back[0]) |
125 | | - local_xml = et.tostring(tree, |
126 | | - method='xml', |
127 | | - encoding='unicode', |
128 | | - pretty_print=True) |
129 | | - return local_xml |
130 | | - |
131 | | - def __repr__(self): |
132 | | - """Value of an article object when you call it directly on the command line. |
133 | | -
|
134 | | - Shows the DOI and title of the article |
135 | | - :returns: DOI and title |
136 | | - :rtype: {str} |
137 | | - """ |
138 | | - out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) |
139 | | - return out |
140 | 168 |
|
141 | 169 | def doi_link(self): |
142 | 170 | """The link of the DOI, which redirects to the journal URL.""" |
|
0 commit comments