diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index a6d25b50bd..508dbcd0e6 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -229,3 +229,6 @@ In chronological order: * Christopher Daley * [2024-01-24] Optimize GEMV forwarding on ARM64 systems + +* Aniket P. Garade Sushil Pratap Singh Juliya James + * [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE diff --git a/kernel/arm64/KERNEL.ARMV8SVE b/kernel/arm64/KERNEL.ARMV8SVE index bfadf5cba9..133fab9d62 100644 --- a/kernel/arm64/KERNEL.ARMV8SVE +++ b/kernel/arm64/KERNEL.ARMV8SVE @@ -64,8 +64,8 @@ DAXPYKERNEL = daxpy_thunderx2t99.S CAXPYKERNEL = zaxpy.S ZAXPYKERNEL = zaxpy.S -SROTKERNEL = rot.S -DROTKERNEL = rot.S +SROTKERNEL = rot.c +DROTKERNEL = rot.c CROTKERNEL = zrot.S ZROTKERNEL = zrot.S @@ -94,8 +94,8 @@ DCOPYKERNEL = copy_thunderx2t99.c CCOPYKERNEL = copy_thunderx2t99.c ZCOPYKERNEL = copy_thunderx2t99.c -SSWAPKERNEL = swap_thunderx2t99.S -DSWAPKERNEL = swap_thunderx2t99.S +SSWAPKERNEL = swap.c +DSWAPKERNEL = swap.c CSWAPKERNEL = swap_thunderx2t99.S ZSWAPKERNEL = swap_thunderx2t99.S diff --git a/kernel/arm64/rot.c b/kernel/arm64/rot.c new file mode 100644 index 0000000000..09b708494c --- /dev/null +++ b/kernel/arm64/rot.c @@ -0,0 +1,40 @@ +/******************************************************************************* +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#include "common.h" +#include "rot_kernel_sve.c" +#include "rot_kernel_c.c" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + if (n <= 0) + return (0); + if (inc_x == 1 && inc_y == 1) + rot_kernel_sve(n, x, y, c, s); + else + rot_kernel_c(n, x, inc_x, y, inc_y, c, s); + return (0); +} \ No newline at end of file diff --git a/kernel/arm64/rot_kernel_c.c b/kernel/arm64/rot_kernel_c.c new file mode 100644 index 0000000000..788beed7a5 --- /dev/null +++ b/kernel/arm64/rot_kernel_c.c @@ -0,0 +1,44 @@ +/******************************************************************************* +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#include "common.h" + +static int rot_kernel_c(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + BLASLONG i = 0; + BLASLONG ix = 0, iy = 0; + FLOAT temp; + while (i < n) + { + temp = c * x[ix] + s * y[iy]; + y[iy] = c * y[iy] - s * x[ix]; + x[ix] = temp; + ix += inc_x; + iy += inc_y; + i++; + } + return (0); +} \ No newline at end of file diff --git a/kernel/arm64/rot_kernel_sve.c b/kernel/arm64/rot_kernel_sve.c new file mode 100644 index 0000000000..1d54a2907e --- /dev/null +++ b/kernel/arm64/rot_kernel_sve.c @@ -0,0 +1,59 @@ +/******************************************************************************* +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#include "common.h" +#include + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif + +static int rot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) +{ + for (BLASLONG i = 0; i < n; i += SVE_WIDTH) + { + svbool_t pg = SVE_WHILELT((uint64_t)i, (uint64_t)n); + SVE_TYPE x_vec = svld1(pg, &x[i]); + SVE_TYPE y_vec = svld1(pg, &y[i]); + SVE_TYPE cx_vec = svmul_z(pg, x_vec, c); + SVE_TYPE sy_vec = svmul_z(pg, y_vec, s); + SVE_TYPE sx_vec = svmul_z(pg, x_vec, s); + SVE_TYPE cy_vec = svmul_z(pg, y_vec, c); + svst1(pg, &x[i], svadd_z(pg, cx_vec, sy_vec)); + svst1(pg, &y[i], svsub_z(pg, cy_vec, sx_vec)); + } + return (0); +} \ No newline at end of file diff --git a/kernel/arm64/swap.c b/kernel/arm64/swap.c new file mode 100644 index 0000000000..6a9117cf0e --- /dev/null +++ b/kernel/arm64/swap.c @@ -0,0 +1,40 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ +#include "common.h" +#include "swap_kernel_sve.c" +#include "swap_kernel_c.c" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + if (n <= 0) + return 0; + if (inc_x == 1 && inc_y == 1) + swap_kernel_sve(n, x, y); + else + swap_kernel_c(n, x, inc_x, y, inc_y); + return (0); +} \ No newline at end of file diff --git a/kernel/arm64/swap_kernel_c.c b/kernel/arm64/swap_kernel_c.c new file mode 100644 index 0000000000..4029350962 --- /dev/null +++ b/kernel/arm64/swap_kernel_c.c @@ -0,0 +1,46 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ +#include "common.h" +#include + +static int swap_kernel_c(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i = 0; + BLASLONG ix = 0, iy = 0; + FLOAT temp; + + while (i < n) + { + temp = x[ix]; + x[ix] = y[iy]; + y[iy] = temp; + ix += inc_x; + iy += inc_y; + i++; + } + return (0); +} \ No newline at end of file diff --git a/kernel/arm64/swap_kernel_sve.c b/kernel/arm64/swap_kernel_sve.c new file mode 100644 index 0000000000..1efdce48bd --- /dev/null +++ b/kernel/arm64/swap_kernel_sve.c @@ -0,0 +1,62 @@ +/******************************************************************************* +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ +#include "common.h" +#include + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif + +static int swap_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y) +{ + BLASLONG sve_width = SVE_WIDTH; + + for (BLASLONG i = 0; i < n; i += sve_width * 2) + { + svbool_t pg_a = SVE_WHILELT((uint64_t)i, (uint64_t)n); + svbool_t pg_b = SVE_WHILELT((uint64_t)(i + sve_width), (uint64_t)n); + SVE_TYPE x_vec_a = svld1(pg_a, &x[i]); + SVE_TYPE y_vec_a = svld1(pg_a, &y[i]); + SVE_TYPE x_vec_b = svld1(pg_b, &x[i + sve_width]); + SVE_TYPE y_vec_b = svld1(pg_b, &y[i + sve_width]); + svst1(pg_a, &x[i], y_vec_a); + svst1(pg_a, &y[i], x_vec_a); + svst1(pg_b, &x[i + sve_width], y_vec_b); + svst1(pg_b, &y[i + sve_width], x_vec_b); + } + return (0); +}