Skip to content

Commit 259c7d3

Browse files
committed
improved project name handling
1 parent 80e41bd commit 259c7d3

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

dvcurator/rename.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,22 @@ def project_name(citation):
2424
# What did this line do?
2525
# title = re.match("^(.+?\\s){1,5}", title).group(0).rstrip()
2626
title = re.sub("^[^a-zA-Z]?", "", title) # get rid of any beginning non-letter chars
27+
# limit to only the content before the first colon
2728
title = re.sub(":.+", '', title)
28-
29+
# if there's more than 5 words, limit it to the first 5
30+
words = title.split()
31+
if len(words) > 5:
32+
title = ' '.join(words[:5])
33+
2934
folder_name = author1_last_name + " - " + title
3035

31-
special_characters = ['!','#','$','%', '&','@','[',']',']','_',':',';',"'"]
32-
for i in special_characters:
33-
folder_name = folder_name.replace(i,'')
36+
# remove special characters that are not allowed in folder names
37+
invalid_chars = r'[<>:"/\\|?*\'\[\]\{\}!#$%&@;=+,]'
38+
folder_name = re.sub(invalid_chars, '', folder_name)
39+
40+
# replace multiple spaces with one space
41+
folder_name = re.sub(r'\s+', ' ', folder_name).strip()
42+
folder_name = folder_name.strip() # Remove leading/trailing whitespace
3443

3544
return folder_name
3645

test.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,18 @@ def test_download(self):
5454
self.assertTrue(os.path.isdir(path))
5555
self.assertTrue(os.path.exists(os.path.join(path, os.pardir, os.pardir, "Original Deposit.zip")))
5656
self.assertTrue(os.path.exists(os.path.join(path, os.pardir, os.pardir, "Original metadata.json")))
57-
5857
self.assertTrue(os.path.exists(os.path.join(path, "README_VandeVusse-Mueller.txt")))
59-
6058
# Try again, we should fail the second time
6159
self.assertIsNone(dataverse.download_dataset(metadata, f.name))
60+
f.cleanup()
6261

62+
# try a harder example, with messed up filenames
63+
f = tempfile.TemporaryDirectory()
64+
metadata = dataverse.get_metadata("doi:10.5064/F68G8HMM")
65+
path = dataverse.download_dataset(metadata, f.name)
66+
self.assertTrue(os.path.isdir(path))
67+
self.assertTrue(os.path.exists(os.path.join(path, os.pardir, os.pardir, "Original Deposit.zip")))
68+
self.assertTrue(os.path.exists(os.path.join(path, os.pardir, os.pardir, "Original metadata.json")))
6369
f.cleanup()
6470

6571
class TestGithubAPI(unittest.TestCase):
@@ -77,10 +83,28 @@ def test_version(self):
7783
class TestRename(unittest.TestCase):
7884

7985
def test_projectname(self):
86+
# test limiting to before the first colon and removing "Data for:"
8087
metadata = dataverse.get_metadata("doi:10.5064/F6AQGERV")
8188
citation = dataverse.get_citation(metadata)
8289
self.assertIsNotNone(citation)
8390
self.assertEqual(rename.project_name(citation), "Haney - Child Support Adjudication")
91+
# try the limiting to 5 words feature
92+
metadata = dataverse.get_metadata("doi:10.5064/F6ZXIJS5")
93+
citation = dataverse.get_citation(metadata)
94+
self.assertIsNotNone(citation)
95+
self.assertEqual(rename.project_name(citation), "Guastaferro - Adapting a Selective Parent-Focused Child")
96+
# test special character removal
97+
metadata = dataverse.get_metadata("doi:10.5064/F6MBCJ8M")
98+
citation = dataverse.get_citation(metadata)
99+
self.assertIsNotNone(citation)
100+
self.assertEqual(rename.project_name(citation), "Berntzen - Monster or Hero Far-right Responses")
101+
# make a temporary directory named the citation and test it exists
102+
d = tempfile.TemporaryDirectory()
103+
new_folder_name = rename.project_name(citation)
104+
new_folder_path = os.path.join(d.name, new_folder_name, "QDR Prepared", "1_extract")
105+
os.makedirs(new_folder_path)
106+
self.assertTrue(os.path.exists(new_folder_path)) # Ensure it was successfully made
107+
d.cleanup()
84108

85109
def test_rename(self):
86110
f = tempfile.TemporaryDirectory()

0 commit comments

Comments
 (0)