|
8305 | 8305 | ),
|
8306 | 8306 | ),
|
8307 | 8307 |
|
| 8308 | + 'rotary_emb_v2': dict( |
| 8309 | + name=['rotary_emb_v2'], |
| 8310 | + interface=['CustomizedTest'], |
| 8311 | + dtype=[np.float32, np.float16], |
| 8312 | + para=dict( |
| 8313 | + dim=[128,] |
| 8314 | + ), |
| 8315 | + tensor_para=dict( |
| 8316 | + gen_fn='Genfunc.randn', |
| 8317 | + args=[ |
| 8318 | + { |
| 8319 | + "ins": ['query'], |
| 8320 | + "shape": ((8, 4096),), |
| 8321 | + }, |
| 8322 | + { |
| 8323 | + "ins": ['key'], |
| 8324 | + "shape": ((8, 4096),), |
| 8325 | + }, |
| 8326 | + { |
| 8327 | + "ins": ['cos'], |
| 8328 | + "shape": ((8, 1, 128),), |
| 8329 | + }, |
| 8330 | + { |
| 8331 | + "ins": ['sin'], |
| 8332 | + "shape": ((8, 1, 128),), |
| 8333 | + }, |
| 8334 | + ], |
| 8335 | + ), |
| 8336 | + ), |
| 8337 | + |
8308 | 8338 | 'rms_norm_default': dict(
|
8309 | 8339 | name=['rms_norm'],
|
8310 | 8340 | atol=1e-4,
|
|
8551 | 8581 | ),
|
8552 | 8582 | ),
|
8553 | 8583 |
|
| 8584 | + 'prompt_flash_attention': dict( |
| 8585 | + name=['prompt_flash_attention'], |
| 8586 | + interface=['CustomizedTest'], |
| 8587 | + atol=1e-2, |
| 8588 | + rtol=1e-2, |
| 8589 | + para=dict( |
| 8590 | + maxInputLen=[2,], |
| 8591 | + actualSeqLengths=[[2,2],], |
| 8592 | + numHeads=[32,], |
| 8593 | + numKeyValueHeads=[32,], |
| 8594 | + dim=[128,], |
| 8595 | + ), |
| 8596 | + tensor_para=dict( |
| 8597 | + args=[ |
| 8598 | + { |
| 8599 | + "ins": ["query"], |
| 8600 | + "shape": ((4, 4096),), |
| 8601 | + "dtype": [np.float16,], |
| 8602 | + }, |
| 8603 | + { |
| 8604 | + "ins": ["key"], |
| 8605 | + "shape": ((4, 4096),), |
| 8606 | + "dtype": [np.float16,], |
| 8607 | + }, |
| 8608 | + { |
| 8609 | + "ins": ["value"], |
| 8610 | + "shape": ((4, 4096),), |
| 8611 | + "dtype": [np.float16,], |
| 8612 | + }, |
| 8613 | + { |
| 8614 | + "ins": ["attenMask"], |
| 8615 | + "value": ([[[False, True], |
| 8616 | + [False, False]], |
| 8617 | + [[False, True], |
| 8618 | + [False, False]]],), |
| 8619 | + "dtype": [np.bool_,], |
| 8620 | + "gen_policy": "gen_tensor_by_value" |
| 8621 | + }, |
| 8622 | + ] |
| 8623 | + ), |
| 8624 | + ), |
| 8625 | + |
| 8626 | + 'paged_attention': dict( |
| 8627 | + name=['paged_attention'], |
| 8628 | + interface=['CustomizedTest'], |
| 8629 | + atol=1e-2, |
| 8630 | + rtol=1e-2, |
| 8631 | + para=dict( |
| 8632 | + actualSeqLengths=[[150,],], |
| 8633 | + numHeads=[32,], |
| 8634 | + numKeyValueHeads=[32,], |
| 8635 | + dim=[128,], |
| 8636 | + blockSize=[128,], |
| 8637 | + ), |
| 8638 | + tensor_para=dict( |
| 8639 | + args=[ |
| 8640 | + { |
| 8641 | + "ins": ["query"], |
| 8642 | + "shape": ((1, 4096),), |
| 8643 | + "dtype": [np.float16,], |
| 8644 | + }, |
| 8645 | + { |
| 8646 | + "ins": ["key"], |
| 8647 | + "shape": ((1026, 4096),), |
| 8648 | + "dtype": [np.float16,], |
| 8649 | + }, |
| 8650 | + { |
| 8651 | + "ins": ["value"], |
| 8652 | + "shape": ((1026, 4096),), |
| 8653 | + "dtype": [np.float16,], |
| 8654 | + }, |
| 8655 | + { |
| 8656 | + "ins": ["blockTable"], |
| 8657 | + "value": ([[0, 1],],), |
| 8658 | + "dtype": [np.int32,], |
| 8659 | + "gen_policy": "gen_tensor_by_value" |
| 8660 | + }, |
| 8661 | + ] |
| 8662 | + ), |
| 8663 | + ), |
| 8664 | + |
| 8665 | + 'apply_penalty_v2': dict( |
| 8666 | + name=['apply_penalty_v2'], |
| 8667 | + interface=['CustomizedTest'], |
| 8668 | + tensor_para=dict( |
| 8669 | + args=[ |
| 8670 | + { |
| 8671 | + "ins": ['logits'], |
| 8672 | + "value": ([[0.1, 0.5, 0.4, 0.3, 0.5], |
| 8673 | + [0.2, 0.4, 0.0, 0.0, 0.0], |
| 8674 | + [0.3, 0.4, 0.5, 0.3, 0.0]],), |
| 8675 | + "dtype": [np.float16, np.float32], |
| 8676 | + "gen_policy": "gen_tensor_by_value" |
| 8677 | + }, |
| 8678 | + { |
| 8679 | + "ins": ["presence_penalty"], |
| 8680 | + "value": ([0.1, 0.1, 0.1, 0.1, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0],), |
| 8681 | + "dtype": [np.float16, np.float32], |
| 8682 | + "gen_policy": "gen_tensor_by_value" |
| 8683 | + }, |
| 8684 | + { |
| 8685 | + "ins": ["frequency_penalty"], |
| 8686 | + "value": ([0.1, 0.1, 0.1, 0.1, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0],), |
| 8687 | + "dtype": [np.float16, np.float32], |
| 8688 | + "gen_policy": "gen_tensor_by_value" |
| 8689 | + }, |
| 8690 | + { |
| 8691 | + "ins": ["repetition_penalty"], |
| 8692 | + "value": ([0.1, 0.1, 0.1, 0.1, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0],), |
| 8693 | + "dtype": [np.float16, np.float32], |
| 8694 | + "gen_policy": "gen_tensor_by_value" |
| 8695 | + }, |
| 8696 | + { |
| 8697 | + "ins": ["p_token_ids"], |
| 8698 | + "value": ([0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11],), |
| 8699 | + "dtype": [np.int32, np.int32], |
| 8700 | + "gen_policy": "gen_tensor_by_value" |
| 8701 | + }, |
| 8702 | + { |
| 8703 | + "ins": ["p_token_counts"], |
| 8704 | + "value": ([3, 3, 2, 2, 1, 3, 3, 3, 3, 2, 2],), |
| 8705 | + "dtype": [np.int32, np.int32], |
| 8706 | + "gen_policy": "gen_tensor_by_value" |
| 8707 | + }, |
| 8708 | + ] |
| 8709 | + ) |
| 8710 | + ), |
| 8711 | + |
8554 | 8712 | 'token_attention': dict(
|
8555 | 8713 | name=['token_attention'],
|
8556 | 8714 | interface=['CustomizedTest'],
|
|
0 commit comments