@@ -34,9 +34,16 @@ class Builder(tfds.core.GeneratorBasedBuilder):
34
34
VERSION = tfds .core .Version ("2.1.0" )
35
35
RELEASE_NOTES = {
36
36
"1.0.0" : "Initial release." ,
37
- "2.0.0" : "Update the dataset with valid URLs." ,
38
- "2.1.0" : "Update the dataset with cleaned URLs." ,
37
+ "2.0.0" : "[Do not use] Update the dataset with valid URLs." ,
38
+ "2.1.0" : (
39
+ "Update the dataset with the correct URLs. The URLs in this version"
40
+ " come from HuggingFace's dataset repo, which is curated by the same"
41
+ " author: https://huggingface.co/datasets/alexfabbri/multi_news."
42
+ ),
39
43
}
44
+ BLOCKED_VERSIONS = tfds .core .utils .BlockedVersions (
45
+ versions = {"2.0.0" : "The URLs of this version are invalid." }
46
+ )
40
47
41
48
def _info (self ) -> tfds .core .DatasetInfo :
42
49
"""Returns the dataset metadata."""
@@ -77,9 +84,10 @@ def _generate_examples(self, src_file, tgt_file):
77
84
).open () as tgt_f :
78
85
for i , (src_line , tgt_line ) in enumerate (zip (src_f , tgt_f )):
79
86
yield i , {
80
- # In original file, each line has one example and natural newline
81
- # tokens "\n" are being replaced with "NEWLINE_CHAR". Here restore
82
- # the natural newline token to avoid special vocab "NEWLINE_CHAR".
87
+ # In the original file, each line has one example and natural
88
+ # newline tokens "\n" are being replaced with "NEWLINE_CHAR"
89
+ # Here, we restore the natural newline token to avoid the special
90
+ # vocab token "NEWLINE_CHAR".
83
91
_DOCUMENT : src_line .strip ().replace ("NEWLINE_CHAR" , "\n " ),
84
92
_SUMMARY : tgt_line .strip ().lstrip (),
85
93
}
0 commit comments