@@ -201,12 +201,12 @@ def flatten(self, max_depth=16) -> "DatasetDict":
201
201
>>> from datasets import load_dataset
202
202
>>> ds = load_dataset("rajpurkar/squad")
203
203
>>> ds["train"].features
204
- {'id': Value(dtype= 'string'),
205
- 'title': Value(dtype= 'string'),
206
- 'context': Value(dtype= 'string'),
207
- 'question': Value(dtype= 'string'),
208
- 'answers.text': List(feature= Value(dtype= 'string'), length=-1 ),
209
- 'answers.answer_start': List(feature= Value(dtype= 'int32'), length=-1 )}
204
+ {'id': Value('string'),
205
+ 'title': Value('string'),
206
+ 'context': Value('string'),
207
+ 'question': Value('string'),
208
+ 'answers.text': List(Value('string')),
209
+ 'answers.answer_start': List(Value('int32'))}
210
210
>>> ds.flatten()
211
211
DatasetDict({
212
212
train: Dataset({
@@ -290,14 +290,14 @@ def cast(self, features: Features) -> "DatasetDict":
290
290
>>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
291
291
>>> ds["train"].features
292
292
{'label': ClassLabel(names=['neg', 'pos']),
293
- 'text': Value(dtype= 'string')}
293
+ 'text': Value('string')}
294
294
>>> new_features = ds["train"].features.copy()
295
295
>>> new_features['label'] = ClassLabel(names=['bad', 'good'])
296
296
>>> new_features['text'] = Value('large_string')
297
297
>>> ds = ds.cast(new_features)
298
298
>>> ds["train"].features
299
299
{'label': ClassLabel(names=['bad', 'good']),
300
- 'text': Value(dtype= 'large_string')}
300
+ 'text': Value('large_string')}
301
301
```
302
302
"""
303
303
self ._check_values_type ()
@@ -322,11 +322,11 @@ def cast_column(self, column: str, feature) -> "DatasetDict":
322
322
>>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
323
323
>>> ds["train"].features
324
324
{'label': ClassLabel(names=['neg', 'pos']),
325
- 'text': Value(dtype= 'string')}
325
+ 'text': Value('string')}
326
326
>>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
327
327
>>> ds["train"].features
328
328
{'label': ClassLabel(names=['bad', 'good']),
329
- 'text': Value(dtype= 'string')}
329
+ 'text': Value('string')}
330
330
```
331
331
"""
332
332
self ._check_values_type ()
@@ -513,14 +513,14 @@ def class_encode_column(self, column: str, include_nulls: bool = False) -> "Data
513
513
>>> from datasets import load_dataset
514
514
>>> ds = load_dataset("boolq")
515
515
>>> ds["train"].features
516
- {'answer': Value(dtype= 'bool'),
517
- 'passage': Value(dtype= 'string'),
518
- 'question': Value(dtype= 'string')}
516
+ {'answer': Value('bool'),
517
+ 'passage': Value('string'),
518
+ 'question': Value('string')}
519
519
>>> ds = ds.class_encode_column("answer")
520
520
>>> ds["train"].features
521
521
{'answer': ClassLabel(num_classes=2, names=['False', 'True']),
522
- 'passage': Value(dtype= 'string'),
523
- 'question': Value(dtype= 'string')}
522
+ 'passage': Value('string'),
523
+ 'question': Value('string')}
524
524
```
525
525
"""
526
526
self ._check_values_type ()
@@ -2381,11 +2381,11 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDatasetDict
2381
2381
>>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
2382
2382
>>> ds["train"].features
2383
2383
{'label': ClassLabel(names=['neg', 'pos']),
2384
- 'text': Value(dtype= 'string')}
2384
+ 'text': Value('string')}
2385
2385
>>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
2386
2386
>>> ds["train"].features
2387
2387
{'label': ClassLabel(names=['bad', 'good']),
2388
- 'text': Value(dtype= 'string')}
2388
+ 'text': Value('string')}
2389
2389
```
2390
2390
"""
2391
2391
return IterableDatasetDict (
@@ -2417,14 +2417,14 @@ def cast(
2417
2417
>>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
2418
2418
>>> ds["train"].features
2419
2419
{'label': ClassLabel(names=['neg', 'pos']),
2420
- 'text': Value(dtype= 'string')}
2420
+ 'text': Value('string')}
2421
2421
>>> new_features = ds["train"].features.copy()
2422
2422
>>> new_features['label'] = ClassLabel(names=['bad', 'good'])
2423
2423
>>> new_features['text'] = Value('large_string')
2424
2424
>>> ds = ds.cast(new_features)
2425
2425
>>> ds["train"].features
2426
2426
{'label': ClassLabel(names=['bad', 'good']),
2427
- 'text': Value(dtype= 'large_string')}
2427
+ 'text': Value('large_string')}
2428
2428
```
2429
2429
"""
2430
2430
return IterableDatasetDict ({k : dataset .cast (features = features ) for k , dataset in self .items ()})
0 commit comments