Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/_runs-on-nv-step1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,13 @@ jobs:
&& source ${ENV_PATH}/pt2.0_diopi \
&& python main.py --mode gen_data" \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1} && git clean -xdf ${GEN_DATA} && exit 1 )
source ~/Aoss_env.sh
ads-cli cp ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/ s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
elif [[ "${GETRUNNER}" == *diopi* ]];then
ssh SH1424 """
set -e
source ${ENV_PATH}/pt2.0_diopi
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && cd ${BUILD_TEST1} && cd diopi_test/python &&
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_V100} --time=20 --gres=gpu:1 bash -c 'python main.py --mode gen_data' \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1} && git clean -xdf ${GEN_DATA} && exit 1 )
source ~/Aoss_env.sh
ads-cli cp ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/ s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
"""
else
ln -s ${GEN_DATA_PATH}/${GEN_DATA}/diopi ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
Expand Down
29 changes: 7 additions & 22 deletions .github/workflows/data-cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,10 @@ jobs:
mkdir -p ${DATA_DIR}/source/${GEN_DATA}
rsync -a --delete ${CLUSTER_V100}:${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
"""
# ssh ${CLUSTER_ASCEND_910B} """
# mkdir -p ${DATA_DIR}/source/${GEN_DATA}
# rsync -a --delete ${CLUSTER_V100}:${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
# """
source ~/Aoss_env.sh
ads-cli --dryrun --deleteSrc cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss.cn-sh-01c.sensecoreapi-oss.cn${DATA_DIR}/source/${GEN_DATA}/diopi/ /dev/null 2>&1 >/dev/null
ads-cli cp ${DATA_DIR}/source/${GEN_DATA}/diopi/ s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss.cn-sh-01c.sensecoreapi-oss.cn${DATA_DIR}/source/${GEN_DATA}/diopi/
# ssh ${CLUSTER_SUPA} """
# rsync -a ${CLUSTER_V100}:${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
# """

Copy-Gen-Data-Ascend-910b:
name: Copy-Gen-Data-Ascend-910b
runs-on: tps-ascend-ci-910b
needs: CheckAndRsync
if: needs.CheckAndRsync.outputs.to_gen_data == 'true'
steps:
- name: Copy Gen-Data
run: |
set -e
source ~/Aoss_env.sh
ads-cli cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss-internal.cn-sh-01c.sensecoreapi-oss.cn${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
ssh ${CLUSTER_ASCEND_910B} """
mkdir -p ${DATA_DIR}/source/${GEN_DATA}
rsync -a --delete ${CLUSTER_V100}:${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
"""
# ssh ${CLUSTER_SUPA} """
# rsync -a ${CLUSTER_V100}:${DATA_DIR}/source/${GEN_DATA}/diopi/ ${DATA_DIR}/source/${GEN_DATA}/diopi/
# """
17 changes: 3 additions & 14 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,8 @@ jobs:
&& rsync -a --delete ${GITHUB_WORKSPACE}/source/ ${CLUSTER_SUPA}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ || echo "failure to connect to supa"
ssh ${CLUSTER_1424} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source" \
&& rsync -a --delete ${GITHUB_WORKSPACE}/source/ ${CLUSTER_1424}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ || echo "failure to connect to sh1424"
# ssh ${CLUSTER_ASCEND_910B} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source" \
# && rsync -a --delete ${GITHUB_WORKSPACE}/source/ ${CLUSTER_ASCEND_910B}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ || echo "failure to connect to ascend 910b"
source ~/Aoss_env.sh
ads-cli cp ${GITHUB_WORKSPACE}/source/ s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/
ssh ${CLUSTER_ASCEND_910B} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source" \
&& rsync -a --delete ${GITHUB_WORKSPACE}/source/ ${CLUSTER_ASCEND_910B}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ || echo "failure to connect to ascend 910b"
# ssh ${CLUSTER_KLX} "mkdir -p ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source" \
# && rsync -a --delete ${GITHUB_WORKSPACE}/source/ ${CLUSTER_KLX}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ || echo "failure to connect to kunlunxin"

Expand Down Expand Up @@ -295,12 +293,6 @@ jobs:
needs: [Rsync]
if: ${{ contains( needs.Rsync.outputs.output, 'ASCEND' ) }}
steps:
- name: COPY Source
run: |
set -e
source ~/Aoss_env.sh
ads-cli cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss-internal.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/
ads-cli --dryrun --deleteSrc cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss-internal.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source/ /dev/null 2>&1 >/dev/null
- name: build
run: |
set -e
Expand All @@ -327,10 +319,7 @@ jobs:
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}
if [[ \"${{ needs.Rsync.outputs.output }}\" == *GENDATA* ]];then
# rsync -a ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/${GEN_DATA}/diopi ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
source ~/Aoss_env.sh
ads-cli cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss-internal.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/ ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
ads-cli --dryrun --deleteSrc cp s3://${Platform_ci_aoss_name}:${Platform_ci_aoss_url}@platform.aoss-internal.cn-sh-01c.sensecoreapi-oss.cn${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/ /dev/null 2>&1 >/dev/null
rsync -a ${CLUSTER_V100}:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/${GEN_DATA}/diopi ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
else
ln -s ${GEN_DATA_PATH}/${GEN_DATA}/diopi ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}/diopi_test/python/cache/
fi
Expand Down
8 changes: 7 additions & 1 deletion diopi_test/python/configs/diopi_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8629,7 +8629,7 @@
atol=1e-2,
rtol=1e-2,
para=dict(
actualSeqLengths=[[150,],],
actualSeqLengths=[[5,],],
numHeads=[32,],
numKeyValueHeads=[32,],
dim=[128,],
Expand All @@ -8652,6 +8652,12 @@
"shape": ((1026, 4096),),
"dtype": [np.float16,],
},
{
"ins": ["attenMask"],
"value": [[False, False, False, False, False]],
"dtype": [np.bool_,],
"gen_policy": "gen_tensor_by_value",
},
{
"ins": ["blockTable"],
"value": ([[0, 1],],),
Expand Down
1 change: 1 addition & 0 deletions diopi_test/python/conformance/customized_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ def paged_attention(
query,
key,
value,
attenMask,
actualSeqLengths,
numHeads,
numKeyValueHeads,
Expand Down
2 changes: 2 additions & 0 deletions diopi_test/python/conformance/diopi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6057,6 +6057,7 @@ def paged_attention(
query,
key,
value,
attenMask,
actualSeqLengths,
numHeads,
numKeyValueHeads,
Expand All @@ -6075,6 +6076,7 @@ def paged_attention(
query,
key,
value,
attenMask,
actualSeqLengths,
numHeads,
numKeyValueHeads,
Expand Down
4 changes: 4 additions & 0 deletions impl/ascend/convert_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,7 @@
- diopiMaxPool2dBackward:
tensor_dtype:
indices: (int64)->int32

- diopiStd:
dtype: (float64)->float32
layout: ND
26 changes: 26 additions & 0 deletions impl/ascend/functions/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* @author DeepLink
* @copyright (c) 2023, DeepLink.
*/
#include <numeric>

#include "../aclnn/acl_scalar.hpp"
#include "../aclnn/adaptor.hpp"
Expand Down Expand Up @@ -52,6 +53,31 @@ diopiError_t diopiMean(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC
return diopiSuccess;
}

diopiError_t diopiStd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dim, bool unbiased) {
AscendTensor inputAt(input);
AscendTensor outAt(out);

bool keepdim = false;
if (inputAt.dim() == outAt.dim()) {
keepdim = true;
}

int64_t correction = 0;
if (unbiased) {
correction = 1;
}

if (dim.data == nullptr || dim.len == 0) {
std::vector<int64_t> allDim(inputAt.dim());
std::iota(allDim.begin(), allDim.end(), 0);
diopiSize_t rDim = vectorToDiopiSize(allDim);
DIOPI_ASCEND_CALL_ACLNN(aclnnStd, ctx, input, rDim, correction, keepdim, out);
} else {
DIOPI_ASCEND_CALL_ACLNN(aclnnStd, ctx, input, dim, correction, keepdim, out);
}
return diopiSuccess;
}

diopiError_t diopiAll(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim) {
diopiSize_t inputSize, outSize;
diopiGetTensorShape(input, &inputSize);
Expand Down
1 change: 1 addition & 0 deletions impl/ascend_npu/ascend_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ ascend:
- diopiSqrt
- diopiSqrtInp
- diopiStack
- diopiStd
- diopiTanh
- diopiTanhBackward
- diopiTanhInp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ diopiError_t diopiTokenAttentionInference(diopiContextHandle_t ctx, diopiTensorH
}

diopiError_t diopiPagedAttention(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t q, diopiConstTensorHandle_t k,
diopiConstTensorHandle_t v, diopiSize_t actualSeqLengths, int64_t numHeads, int64_t numKeyValueHeads, int64_t dim,
diopiConstTensorHandle_t blockTable, int64_t blockSize) {
BEGIN_CALL_ACL_OP(out, q, k, v, blockTable);
diopiConstTensorHandle_t v, diopiConstTensorHandle_t attenMask, diopiSize_t actualSeqLengths, int64_t numHeads,
int64_t numKeyValueHeads, int64_t dim, diopiConstTensorHandle_t blockTable, int64_t blockSize) {
BEGIN_CALL_ACL_OP(out, q, k, v, blockTable, attenMask);
at::IntArrayRef actSeqLen(actualSeqLengths.data, actualSeqLengths.len);
TORCH_CHECK(actualSeqLengths.len == qAt.size(0), "The size of the first dimension of q must be equal to the length of actualSeqLengths!");
TORCH_CHECK(actualSeqLengths.len == outAt.size(0), "The size of the first dimension of out must be equal to the length of actualSeqLengths!");
Expand All @@ -59,13 +59,13 @@ diopiError_t diopiPagedAttention(diopiContextHandle_t ctx, diopiTensorHandle_t o
at::TensorList keyTensors = kAt;
at::TensorList valueTensors = vAt;
int64_t innerPrecise = 1;
at::Tensor paddingMask, attenMask, dequantScale1, quantScale1, dequantScale2, quantScale2, quantOffset2, antiquantScale, antiquantOffset, kvPaddingSize;
at::Tensor paddingMask, dequantScale1, quantScale1, dequantScale2, quantScale2, quantOffset2, antiquantScale, antiquantOffset, kvPaddingSize;
EXEC_NPU_NO_FORMAT_CHECK_CMD(aclnnIncreFlashAttentionV4,
qAt,
keyTensors,
valueTensors,
paddingMask,
attenMask,
attenMaskAt,
actSeqLen,
dequantScale1,
quantScale1,
Expand Down
5 changes: 3 additions & 2 deletions proto/include/diopi/functions_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ DIOPI_API diopiError_t diopiTokenSoftmaxReduceVInference(diopiContextHandle_t ct
* @param[in] q Tensor representing the query matrix in the attention mechanism. shape = [sum_batch_seq_len, head_num * head_dim].
* @param[in] k Tensor representing the key matrix in the attention mechanism. shape = [sum_batch_seq_len, head_num * head_dim]
* @param[in] v Tensor representing the value matrix in the attention mechanism. shape = [sum_batch_seq_len, head_num * head_dim]
* @param[in] attenMask Tensor representing the mask matrix in the attention mechanism. shape = [1, single_seq_len]
* @param[in] actual_seq_lengths Tensor representing the sequence length in each batch. shape = [batch_size]
* @param[in] num_heads head number of q and out.
* @param[in] num_kv_heads head number of key and value.
Expand All @@ -646,8 +647,8 @@ DIOPI_API diopiError_t diopiTokenSoftmaxReduceVInference(diopiContextHandle_t ct
* @param[in] block_size Size of eatch block unit.
*/
DIOPI_API diopiError_t diopiPagedAttention(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t q, diopiConstTensorHandle_t k,
diopiConstTensorHandle_t v, diopiSize_t actual_seq_lengths, int64_t num_heads, int64_t num_kv_heads, int64_t dim,
diopiConstTensorHandle_t block_table, int64_t block_size);
diopiConstTensorHandle_t v, diopiConstTensorHandle_t attenMask, diopiSize_t actual_seq_lengths, int64_t num_heads,
int64_t num_kv_heads, int64_t dim, diopiConstTensorHandle_t block_table, int64_t block_size);
/**
* @brief The no pad implementation of
* \text{context_attention_out}(\mathrm{q},\mathrm{k},\mathrm{v})=\text{softmax}(\frac{\mathrm{qk}^\mathrm{T}}{\sqrt{\mathrm{d_k}}})\mathrm{v}. For details,
Expand Down