Skip to content

Commit 883bc77

Browse files
authored
v0.1 (#5)
* better errors * error handling when missing metadata * remove test ticket * original format tabular files
1 parent 18e5238 commit 883bc77

File tree

8 files changed

+68
-22
lines changed

8 files changed

+68
-22
lines changed

.github/workflows/build.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
name: Build
2-
on: push
1+
on:
2+
push:
3+
tags:
4+
- 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
35

6+
name: Build
47
jobs:
58

69
createrelease:
@@ -15,7 +18,7 @@ jobs:
1518
with:
1619
tag_name: ${{ github.ref }}
1720
release_name: Release ${{ github.ref }}
18-
draft: false
21+
draft: true
1922
prerelease: false
2023
- name: Output Release URL File
2124
run: echo "${{ steps.create_release.outputs.upload_url }}" > release_url.txt
@@ -42,15 +45,15 @@ jobs:
4245
ASSET_MIME: application/zip
4346
- os: windows-latest
4447
TARGET: windows
45-
CMD_BUILD: pyinstaller -F -w -n dvcurator gui.spec
48+
CMD_BUILD: pyinstaller gui.spec
4649
OUT_FILE_NAME: dvcurator.exe
4750
ASSET_MIME: application/vnd.microsoft.portable-executable
4851
steps:
4952
- uses: actions/checkout@v1
50-
- name: Set up Python 3.8
53+
- name: Set up Python 3.10
5154
uses: actions/setup-python@v2
5255
with:
53-
python-version: 3.8
56+
python-version: 3.10.4
5457
- name: Install dependencies
5558
run: |
5659
python -m pip install --upgrade pip

dvcurator/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
import sys, gui
2-
sys.exit(gui.main())
1+
import sys, dvcurator.gui
2+
sys.exit(dvcurator.gui.main())

dvcurator/dataverse.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
def get_citation(host, doi, token=""):
66
import requests
77

8+
if not doi.startswith("doi:"):
9+
print("Error: DOIs should start with \"doi:\"")
10+
return None
11+
812
# Scrape data and metadata from dataverse
913
dataset_url = 'https://' + host
1014
dataset_url += '/api/datasets/:persistentId/?persistentId=' + doi
@@ -13,6 +17,11 @@ def get_citation(host, doi, token=""):
1317
else:
1418
key = {'X-Dataverse-Key': token}
1519
dataset = requests.get(dataset_url, headers=key)
20+
21+
if (dataset.json()['status']=="ERROR"):
22+
print("Error: " + dataset.json()['message'])
23+
return None
24+
1625
citation=dataset.json()['data']['latestVersion']['metadataBlocks']['citation']['fields']
1726
fields = [] # Make an index of all the metadata fields
1827
values = []
@@ -36,6 +45,7 @@ def download_dataset(host, doi, token, folder_name, dropbox):
3645

3746
zip_url = 'https://' + host
3847
zip_url += '/api/access/dataset/:persistentId/?persistentId=' + doi
48+
zip_url += '&format=original'
3949
if token:
4050
key = {'X-Dataverse-Key': token}
4151
r = requests.get(zip_url, headers=key, allow_redirects=True, stream=True)

dvcurator/github.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,21 @@
22
# -*- coding: utf-8 -*-
33
#
44

5+
def check_repo(repo, key=None):
6+
import requests
7+
github='https://api.github.com'
8+
project_url = github + "/repos/" + repo + "/issues"
9+
if (not key):
10+
projects = requests.get(project_url + "?per_page=100")
11+
else:
12+
key = {'Authorization': "token " + key}
13+
projects = requests.get(project_url + "?per_page=100", headers=key)
14+
15+
if (projects.status_code==404):
16+
return False
17+
else:
18+
return True
19+
520
def search_existing(project_name, repo, key=None):
621
import json, requests
722
github='https://api.github.com'
@@ -15,7 +30,7 @@ def search_existing(project_name, repo, key=None):
1530
else:
1631
key = {'Authorization': "token " + key}
1732
projects = requests.get(project_url + "?per_page=100", headers=key)
18-
33+
1934
# Take the first three words ("lastname - first-of-title") to search
2035
project_name = ' '.join(project_name.split()[:3])
2136

dvcurator/gui.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import tkinter as tk
1010
import sys
11-
import github, dataverse, pdf_metadata
11+
import dvcurator.github, dvcurator.dataverse, dvcurator.pdf_metadata
1212

1313
def project_name(last_name, title):
1414
import re
@@ -80,9 +80,17 @@ def load_citation(self):
8080
return
8181

8282
#from . import dataverse
83-
self.citation = dataverse.get_citation(self.host.get(), self.doi.get(), self.dv_token.get())
83+
self.citation = dvcurator.dataverse.get_citation(self.host.get(), self.doi.get(), self.dv_token.get())
84+
if (not self.citation):
85+
return
86+
8487
# citation['depositor'].split(', ')[0] is the last name of the depositor
8588
self.folder_name = project_name(self.citation['depositor'].split(', ')[0], self.citation['title'])
89+
90+
special_characters = ['!','#','$','%', '&','@','[',']',']','_',':',';',"'"]
91+
for i in special_characters:
92+
self.folder_name = self.folder_name.replace(i,'')
93+
8694
print(self.folder_name)
8795

8896
# Enable the next step buttons
@@ -93,7 +101,7 @@ def load_citation(self):
93101

94102
def download_extract(self):
95103
#from . import dataverse
96-
extracted_path = dataverse.download_dataset(self.host.get(), self.doi.get(), self.dv_token.get(), self.folder_name, self.dropbox.get())
104+
extracted_path = dvcurator.dataverse.download_dataset(self.host.get(), self.doi.get(), self.dv_token.get(), self.folder_name, self.dropbox.get())
97105
if not extracted_path:
98106
print("Error: folder may already exist")
99107
else:
@@ -110,15 +118,19 @@ def make_github(self):
110118
print("Error: no github repository specified")
111119
return
112120

121+
if (not dvcurator.github.check_repo(self.repo.get())):
122+
print("Error: github repository doesn't exist")
123+
return
124+
113125
#from . import github
114-
existing = github.search_existing(self.folder_name, self.repo.get(), self.gh_token.get())
126+
existing = dvcurator.github.search_existing(self.folder_name, self.repo.get(), self.gh_token.get())
115127
if (existing):
116128
print("Error: existing github issues!!")
117129
return
118130

119131

120132
# Create github project + issues
121-
self.project = github.create_project(self.doi.get(), self.citation, self.folder_name, self.repo.get(), self.gh_token.get())
133+
self.project = dvcurator.github.create_project(self.doi.get(), self.citation, self.folder_name, self.repo.get(), self.gh_token.get())
122134
print("Created project: " + self.folder_name)
123135
# Get internal issue templates from selected checkboxes
124136
for issue in self.issues_selected:
@@ -128,7 +140,7 @@ def make_github(self):
128140
path = os.path.join(sys._MEIPASS, "issues", path)
129141
else:
130142
path = resource_filename("dvcurator", "issues/" + path)
131-
github.add_issue(self.folder_name, path, self.repo.get(), self.project, self.gh_token.get())
143+
dvcurator.github.add_issue(self.folder_name, path, self.repo.get(), self.project, self.gh_token.get())
132144
print(issue.get() + " added to project")
133145

134146
def set_metadata(self):
@@ -138,7 +150,7 @@ def set_metadata(self):
138150
metadata_path = pdf_metadata.make_metadata_folder(self.dropbox.get(), self.folder_name)
139151
if (not metadata_path): # Errors are outputted by pdf_metadata
140152
return
141-
pdf_metadata.standard_metadata(metadata_path, self.citation['depositor'])
153+
dvcurator.pdf_metadata.standard_metadata(metadata_path, self.citation['depositor'])
142154
print("PDF metadata updated in new folder")
143155
else:
144156
print("Error: Dropbox folder invalid")

dvcurator/issues/test.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

dvcurator/pdf_metadata.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,18 @@ def standard_metadata(edit_path, author):
4949
for path in pdfs:
5050
pdf = pikepdf.open(path, allow_overwriting_input=True)
5151
# Clean out all existing metadata
52-
#del pdf.Root.Metadata
53-
#def pdf.docinfo
52+
try:
53+
del pdf.Root.Metadata
54+
except:
55+
pass
56+
57+
del pdf.docinfo
5458

59+
# Write new metadata
5560
with pdf.open_metadata() as meta:
5661
if meta.pdfa_status:
5762
print("Warning: Edited PDF claims PDF/A")
58-
#meta['dc:title'] = os.path.basename(path)
63+
meta['dc:title'] = os.path.basename(path)
5964
meta['dc:creator'] = author
6065
meta['pdf:Author'] = author
6166
meta['dc:description'] = "QDR Data Project"
@@ -64,4 +69,4 @@ def standard_metadata(edit_path, author):
6469

6570
pdf.save(path)
6671
return True
67-
#print("Metadata written to '%s'" %path)
72+
print("Metadata written to '%s'" %path)

gui.spec

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ a = Analysis(['dvcurator\\gui.py'],
88
pathex=['dvcurator/'],
99
binaries=[],
1010
datas=[('dvcurator\\issues', 'issues')],
11-
hiddenimports=[],
11+
hiddenimports=['dvcurator'],
1212
hookspath=[],
1313
hooksconfig={},
1414
runtime_hooks=[],
@@ -27,6 +27,8 @@ exe = EXE(pyz,
2727
a.datas,
2828
[],
2929
name='dvcurator',
30+
onefile=True,
31+
windowed=True,
3032
debug=False,
3133
bootloader_ignore_signals=False,
3234
strip=False,

0 commit comments

Comments
 (0)