@@ -1282,6 +1282,66 @@ def analyzer_fn(inputs):
1282
1282
self .assertAnalyzerOutputs (
1283
1283
input_data , input_metadata , analyzer_fn , expected_outputs )
1284
1284
1285
+ def testMaxWithSparseTensorReduceTrue (self ):
1286
+
1287
+ def analyzer_fn (inputs ):
1288
+ return {'max' : tft .max (inputs ['sparse' ])}
1289
+
1290
+ input_data = [{
1291
+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1292
+ }, {
1293
+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1294
+ }]
1295
+ input_metadata = dataset_metadata .DatasetMetadata ({
1296
+ 'sparse' :
1297
+ sch .ColumnSchema (
1298
+ tf .float32 , [4 ],
1299
+ sch .SparseColumnRepresentation (
1300
+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1301
+ })
1302
+ expected_outputs = {'max' : np .array (3. , np .float32 )}
1303
+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1304
+ expected_outputs )
1305
+
1306
+ @tft_unit .parameters (
1307
+ (tf .int32 ,),
1308
+ (tf .int64 ,),
1309
+ (tf .float32 ,),
1310
+ (tf .float64 ,),
1311
+ )
1312
+ def testMaxWithSparseTensorReduceFalse (self , input_dtype ):
1313
+
1314
+ def analyzer_fn (inputs ):
1315
+ return {'max' : tft .max (inputs ['sparse' ], False )}
1316
+
1317
+ input_data = [{
1318
+ 'sparse' : ([0 , 1 ], [- 1. , 1. ])
1319
+ }, {
1320
+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1321
+ }]
1322
+ input_metadata = dataset_metadata .DatasetMetadata ({
1323
+ 'sparse' :
1324
+ sch .ColumnSchema (
1325
+ input_dtype , [4 ],
1326
+ sch .SparseColumnRepresentation (
1327
+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1328
+ })
1329
+ if input_dtype == tf .float32 or input_dtype == tf .float64 :
1330
+ expected_outputs = {
1331
+ 'max' :
1332
+ np .array ([- 1. , 2. , float ('nan' ), 3. ], input_dtype .as_numpy_dtype )
1333
+ }
1334
+ else :
1335
+ expected_outputs = {
1336
+ 'max' :
1337
+ np .array (
1338
+ [- 1 , 2 , np .iinfo (input_dtype .as_numpy_dtype ).min , 3 ],
1339
+ input_dtype .as_numpy_dtype )
1340
+ }
1341
+
1342
+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1343
+ expected_outputs )
1344
+
1285
1345
def testNumericMeanWithSparseTensorReduceTrue (self ):
1286
1346
1287
1347
def analyzer_fn (inputs ):
@@ -1341,6 +1401,70 @@ def analyzer_fn(inputs):
1341
1401
self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1342
1402
expected_outputs )
1343
1403
1404
+ @tft_unit .parameters (
1405
+ (tf .int32 ,),
1406
+ (tf .int64 ,),
1407
+ (tf .float32 ,),
1408
+ (tf .float64 ,),
1409
+ )
1410
+ def testVarWithSparseTensorReduceInstanceDimsTrue (self , input_dtype ):
1411
+
1412
+ def analyzer_fn (inputs ):
1413
+ return {'var' : tft .var (inputs ['sparse' ])}
1414
+
1415
+ input_data = [{
1416
+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1417
+ }, {
1418
+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1419
+ }]
1420
+ input_metadata = dataset_metadata .DatasetMetadata ({
1421
+ 'sparse' :
1422
+ sch .ColumnSchema (
1423
+ input_dtype , [4 ],
1424
+ sch .SparseColumnRepresentation (
1425
+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1426
+ })
1427
+ if input_dtype == tf .float64 :
1428
+ expected_outputs = {'var' : np .array (1.25 , np .float64 )}
1429
+ else :
1430
+ expected_outputs = {'var' : np .array (1.25 , np .float32 )}
1431
+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1432
+ expected_outputs )
1433
+
1434
+ @tft_unit .parameters (
1435
+ (tf .int32 ,),
1436
+ (tf .int64 ,),
1437
+ (tf .float32 ,),
1438
+ (tf .float64 ,),
1439
+ )
1440
+ def testVarWithSparseTensorReduceInstanceDimsFalse (self , input_dtype ):
1441
+
1442
+ def analyzer_fn (inputs ):
1443
+ return {'var' : tft .var (inputs ['sparse' ], reduce_instance_dims = False )}
1444
+
1445
+ input_data = [{
1446
+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1447
+ }, {
1448
+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1449
+ }]
1450
+ input_metadata = dataset_metadata .DatasetMetadata ({
1451
+ 'sparse' :
1452
+ sch .ColumnSchema (
1453
+ input_dtype , [4 ],
1454
+ sch .SparseColumnRepresentation (
1455
+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1456
+ })
1457
+ if input_dtype == tf .float64 :
1458
+ expected_outputs = {
1459
+ 'var' : np .array ([0. , .25 , float ('nan' ), 0. ], np .float64 )
1460
+ }
1461
+ else :
1462
+ expected_outputs = {
1463
+ 'var' : np .array ([0. , .25 , float ('nan' ), 0. ], np .float32 )
1464
+ }
1465
+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1466
+ expected_outputs )
1467
+
1344
1468
def testNumericAnalyzersWithSparseInputs (self ):
1345
1469
def repeat (in_tensor , value ):
1346
1470
batch_size = tf .shape (in_tensor )[0 ]
@@ -1358,11 +1482,6 @@ def min_fn(inputs):
1358
1482
return {'min' : repeat (inputs ['a' ], tft .min (inputs ['a' ]))}
1359
1483
_ = input_dataset | beam_impl .AnalyzeDataset (min_fn )
1360
1484
1361
- with self .assertRaises (TypeError ):
1362
- def max_fn (inputs ):
1363
- return {'max' : repeat (inputs ['a' ], tft .max (inputs ['a' ]))}
1364
- _ = input_dataset | beam_impl .AnalyzeDataset (max_fn )
1365
-
1366
1485
with self .assertRaises (TypeError ):
1367
1486
def sum_fn (inputs ):
1368
1487
return {'sum' : repeat (inputs ['a' ], tft .sum (inputs ['a' ]))}
@@ -1373,11 +1492,6 @@ def size_fn(inputs):
1373
1492
return {'size' : repeat (inputs ['a' ], tft .size (inputs ['a' ]))}
1374
1493
_ = input_dataset | beam_impl .AnalyzeDataset (size_fn )
1375
1494
1376
- with self .assertRaises (TypeError ):
1377
- def var_fn (inputs ):
1378
- return {'var' : repeat (inputs ['a' ], tft .var (inputs ['a' ]))}
1379
- _ = input_dataset | beam_impl .AnalyzeDataset (var_fn )
1380
-
1381
1495
def testStringToTFIDF (self ):
1382
1496
def preprocessing_fn (inputs ):
1383
1497
inputs_as_ints = tft .compute_and_apply_vocabulary (
0 commit comments