Skip to content

Commit 7963d2b

Browse files
committed
fix filename_to_doi and download functions
`filename_to_doi` no longer allows passing through a DOI. new `download_xml` function is based on DOI and not file, and is passed into `download_updated_xml`.
1 parent 862bedd commit 7963d2b

File tree

2 files changed

+17
-23
lines changed

2 files changed

+17
-23
lines changed

allofplos/corpus/plos_corpus.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,16 @@ def compare_article_pubdate(doi, days=22, directory=None):
278278
print("Pubdate error in {}".format(doi))
279279

280280

281+
def download_xml(doi, tempdir=newarticledir):
282+
"""For a given DOI, download its remote XML file to tempdir."""
283+
art = Article(doi, directory=tempdir)
284+
with open(art.filename, 'w', encoding='utf8') as f:
285+
f.write(art.get_remote_xml())
286+
return art
287+
288+
281289
def download_updated_xml(article_file,
282-
tempdir=newarticledir,
283-
vor_check=False):
290+
tempdir=newarticledir):
284291
"""
285292
For an article file, compare local XML to remote XML
286293
If they're different, download new version of article
@@ -305,21 +312,9 @@ def download_updated_xml(article_file,
305312

306313
if articleXML_remote == articleXML_local:
307314
updated = False
308-
get_new = False
309315
else:
310-
get_new = True
311-
if vor_check:
312-
# make sure that update is to a VOR for uncorrected proof
313-
get_new = False
314-
if article.remote_proof == 'vor_update':
315-
get_new = True
316-
# else:
317-
# updated = False
318-
if get_new:
319-
article_new = Article(article.doi, directory=tempdir)
320-
with open(article_new.filename, 'w', encoding='utf8') as f:
321-
f.write(articleXML_remote)
322-
updated = True
316+
article_new = download_xml(article.doi, tempdir=tempdir)
317+
updated = True
323318
return updated
324319

325320

@@ -496,10 +491,10 @@ def download_vor_updates(directory=None, tempdir=newarticledir,
496491
if vor_updates_available is None:
497492
vor_updates_available = check_for_vor_updates()
498493
vor_updated_article_list = []
499-
for article in tqdm(vor_updates_available, disable=None):
500-
updated = download_updated_xml(article, vor_check=True)
494+
for doi in tqdm(vor_updates_available, disable=None):
495+
updated = download_updated_xml(doi_to_path(doi), tempdir=tempdir)
501496
if updated:
502-
vor_updated_article_list.append(article)
497+
vor_updated_article_list.append(doi)
503498

504499
old_uncorrected_proofs = get_uncorrected_proofs()
505500
new_uncorrected_proofs_list = list(old_uncorrected_proofs - set(vor_updated_article_list))
@@ -547,7 +542,7 @@ def remote_proofs_direct_check(tempdir=newarticledir, article_list=None):
547542
print("Checking directly for additional VOR updates...")
548543
for doi in tqdm(article_list, disable=None):
549544
f = doi_to_path(doi)
550-
updated = download_updated_xml(f, vor_check=True)
545+
updated = download_updated_xml(f)
551546
if updated:
552547
proofs_download_list.append(doi)
553548
if proofs_download_list:

allofplos/transformations.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,8 @@ def filename_to_doi(filename):
100100
doi = PREFIX + article
101101
elif validate_filename(filename):
102102
doi = PREFIX + os.path.splitext(filename)[0]
103-
# NOTE: A filename should never validate as a DOI, so the next elif is wrong.
104-
elif validate_doi(filename):
105-
doi = filename
103+
else:
104+
doi = ''
106105
return doi
107106

108107

0 commit comments

Comments
 (0)