diff --git a/.cirun.yml b/.cirun.yml index bfc6494d05..0d979ba0f2 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -6,7 +6,7 @@ runners: cloud: "aws" region: "us-east-1" # Cheapest VM on AWS - instance_type: "c7g.large" + instance_type: "c8g.large" # Ubuntu-22.04, ami image machine_image: "ami-0a0c8eebcdd6dcbd0" preemptible: false diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml index 6928312b56..223bdd33cc 100644 --- a/.github/workflows/arm64_graviton.yml +++ b/.github/workflows/arm64_graviton.yml @@ -88,6 +88,7 @@ jobs: run: | case "${{ matrix.build }}" in "make") + sed -i -e "s/while (nthreads_m/if(0)while (nthreads_m/" driver/level3/level3_thread.c make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" ;; "cmake") @@ -119,15 +120,18 @@ jobs: case "${{ matrix.build }}" in "make") MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' - echo "::group::Tests in 'test' directory" - make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'ctest' directory" - make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'utest' directory" - make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" + wget https://gist.github.com/martin-frbg/96d2252fbb71d0d898ede14691b51657/raw/5c6a89cb1560dce50005dd8484483508fabc039f/issue4939.c + gcc -I. -O2 issue4939.c libopenblas.a -lgfortran -lm -o issue4939 + ./issue4939 + #echo "::group::Tests in 'test' directory" + #make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + #echo "::endgroup::" + #echo "::group::Tests in 'ctest' directory" + #make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + #echo "::endgroup::" + #echo "::group::Tests in 'utest' directory" + #make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" + #echo "::endgroup::" ;; "cmake") cd build && ctest diff --git a/interface/gemm.c b/interface/gemm.c index 576e94593c..4b290437f6 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -566,7 +566,11 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS #if USE_SMALL_MATRIX_OPT #if !defined(COMPLEX) +#if defined(ARCH_ARM64) + if(args.m*args.n*args.k<=64*64*64){ +#else if(GEMM_SMALL_MATRIX_PERMIT(transa, transb, args.m, args.n, args.k, *(FLOAT *)(args.alpha), *(FLOAT *)(args.beta))){ +#endif if(*(FLOAT *)(args.beta) == 0.0){ (GEMM_SMALL_KERNEL_B0((transb << 2) | transa))(args.m, args.n, args.k, args.a, args.lda, *(FLOAT *)(args.alpha), args.b, args.ldb, args.c, args.ldc); }else{ diff --git a/param.h b/param.h index fee9195d02..6392421773 100644 --- a/param.h +++ b/param.h @@ -3548,10 +3548,8 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #if defined(XDOUBLE) || defined(DOUBLE) #define SWITCH_RATIO 8 -#define GEMM_PREFERED_SIZE 4 #else #define SWITCH_RATIO 16 -#define GEMM_PREFERED_SIZE 8 #endif #define SGEMM_DEFAULT_UNROLL_M 16