We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 644ea07 commit 43d38d3Copy full SHA for 43d38d3
common_level3.h
@@ -89,6 +89,27 @@ void strmm_direct_LTLN(BLASLONG M, BLASLONG N,
89
float * A, BLASLONG strideA,
90
float * B, BLASLONG strideB);
91
92
+void ssyrk_direct_alpha_betaUN(BLASLONG N, BLASLONG K,
93
+ float alpha,
94
+ float * A, BLASLONG strideA,
95
+ float beta,
96
+ float * C, BLASLONG strideC);
97
+void ssyrk_direct_alpha_betaUT(BLASLONG N, BLASLONG K,
98
99
100
101
102
+void ssyrk_direct_alpha_betaLN(BLASLONG N, BLASLONG K,
103
104
105
106
107
+void ssyrk_direct_alpha_betaLT(BLASLONG N, BLASLONG K,
108
109
110
111
112
+
113
int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
114
115
int shgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
common_param.h
@@ -263,6 +263,10 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
263
void (*strmm_direct_LNLN) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
264
void (*strmm_direct_LTUN) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
265
void (*strmm_direct_LTLN) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
266
+ void (*ssyrk_direct_alpha_betaUN) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
267
+ void (*ssyrk_direct_alpha_betaUT) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
268
+ void (*ssyrk_direct_alpha_betaLN) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
269
+ void (*ssyrk_direct_alpha_betaLT) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
270
#endif
271
272
common_s.h
@@ -56,6 +56,10 @@
56
#define STRMM_DIRECT_LNLN strmm_direct_LNLN
57
#define STRMM_DIRECT_LTUN strmm_direct_LTUN
58
#define STRMM_DIRECT_LTLN strmm_direct_LTLN
59
+#define SSYRK_DIRECT_ALPHA_BETA_UN ssyrk_direct_alpha_betaUN
60
+#define SSYRK_DIRECT_ALPHA_BETA_UT ssyrk_direct_alpha_betaUT
61
+#define SSYRK_DIRECT_ALPHA_BETA_LN ssyrk_direct_alpha_betaLN
62
+#define SSYRK_DIRECT_ALPHA_BETA_LT ssyrk_direct_alpha_betaLT
63
64
#define SGEMM_ONCOPY sgemm_oncopy
65
#define SGEMM_OTCOPY sgemm_otcopy
@@ -232,6 +236,10 @@
232
236
#define STRMM_DIRECT_LNLN gotoblas -> strmm_direct_LNLN
233
237
#define STRMM_DIRECT_LTUN gotoblas -> strmm_direct_LTUN
234
238
#define STRMM_DIRECT_LTLN gotoblas -> strmm_direct_LTLN
239
+#define SSYRK_DIRECT_ALPHA_BETA_UN gotoblas -> ssyrk_direct_alpha_betaUN
240
+#define SSYRK_DIRECT_ALPHA_BETA_UT gotoblas -> ssyrk_direct_alpha_betaUT
241
+#define SSYRK_DIRECT_ALPHA_BETA_LN gotoblas -> ssyrk_direct_alpha_betaLN
242
+#define SSYRK_DIRECT_ALPHA_BETA_LT gotoblas -> ssyrk_direct_alpha_betaLT
235
243
244
245
#define SGEMM_ONCOPY gotoblas -> sgemm_oncopy
interface/syrk.c
@@ -338,6 +338,23 @@ double NNK;
338
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
339
return;
340
}
341
+#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
342
+#if defined(ARCH_ARM64) && (defined(USE_SSYRK_KERNEL_DIRECT)||defined(DYNAMIC_ARCH))
343
+#if defined(DYNAMIC_ARCH)
344
+ if (support_sme1())
345
+#endif
346
+ if (args.n == 0) return;
347
+ if (order == CblasRowMajor && n == ldc) {
348
+ if (Trans == CblasNoTrans && k == lda) {
349
+ (Uplo == CblasUpper ? SSYRK_DIRECT_ALPHA_BETA_UN : SSYRK_DIRECT_ALPHA_BETA_LN)(n, k, alpha, a, lda, beta, c, ldc);
350
+ return;
351
+ } else if (Trans == CblasTrans && n == lda){
352
+ (Uplo == CblasUpper ? SSYRK_DIRECT_ALPHA_BETA_UT : SSYRK_DIRECT_ALPHA_BETA_LT)(n, k, alpha, a, lda, beta, c, ldc);
353
354
+ }
355
356
357
358
359
360
kernel/CMakeLists.txt
@@ -241,6 +241,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
if (ARM64)
set(USE_DIRECT_STRMM true)
endif()
+ set(USE_DIRECT_SSYRK false)
+ if (ARM64)
246
+ set(USE_DIRECT_SSYRK true)
247
+ endif()
248
set(USE_DIRECT_SGEMM false)
249
if (X86_64 OR ARM64)
250
set(USE_DIRECT_SGEMM true)
@@ -293,6 +297,16 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
293
297
endif ()
294
298
295
299
300
+ if (USE_DIRECT_SSYRK)
301
302
+ set (SSYRKDIRECTKERNEL_ALPHA_BETA ssyrk_direct_alpha_beta_arm64_sme1.c)
303
+ GenerateNamedObjects("${KERNELDIR}/${SSYRKDIRECTKERNEL_ALPHA_BETA}" "" "syrk_direct_alpha_betaUN" false "" "" false SINGLE)
304
+ GenerateNamedObjects("${KERNELDIR}/${SSYRKDIRECTKERNEL_ALPHA_BETA}" "" "syrk_direct_alpha_betaUT" false "" "" false SINGLE)
305
+ GenerateNamedObjects("${KERNELDIR}/${SSYRKDIRECTKERNEL_ALPHA_BETA}" "" "syrk_direct_alpha_betaLN" false "" "" false SINGLE)
306
+ GenerateNamedObjects("${KERNELDIR}/${SSYRKDIRECTKERNEL_ALPHA_BETA}" "" "syrk_direct_alpha_betaLT" false "" "" false SINGLE)
307
+ endif ()
308
309
296
310
foreach (float_type SINGLE DOUBLE)
311
string(SUBSTRING ${float_type} 0 1 float_char)
312
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
kernel/Makefile.L3
@@ -54,6 +54,7 @@ USE_TRMM = 1
54
USE_DIRECT_SGEMM = 1
55
USE_DIRECT_SSYMM = 1
USE_DIRECT_STRMM = 1
+USE_DIRECT_SSYRK = 1
endif
ifeq ($(ARCH), riscv64)
@@ -161,6 +162,16 @@ endif
161
162
163
164
165
+ifdef USE_DIRECT_SSYRK
166
+ifndef SSYRKDIRECTKERNEL_ALPHA_BETA
167
+ifeq ($(ARCH), arm64)
168
+ifeq ($(TARGET_CORE), ARMV9SME)
169
+HAVE_SME = 1
170
+endif
171
+SSYRKDIRECTKERNEL_ALPHA_BETA = ssyrk_direct_alpha_beta_arm64_sme1.c
172
173
174
175
176
ifeq ($(BUILD_BFLOAT16), 1)
177
ifndef BGEMMKERNEL
@@ -261,6 +272,14 @@ SKERNELOBJS += \
261
262
273
274
275
276
277
+SKERNELOBJS += \
278
+ ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) \
279
+ ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX)
280
281
282
283
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
284
DKERNELOBJS += \
285
dgemm_beta$(TSUFFIX).$(SUFFIX) \
@@ -1158,6 +1177,21 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
1158
1177
$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
1159
1178
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@
1160
1179
1180
1181
1182
+$(KDIR)ssyrk_direct_alpha_betaUN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1183
+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -UTRANSA $< -o $@
1184
1185
+$(KDIR)ssyrk_direct_alpha_betaUT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1186
+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DUPPER -DTRANSA $< -o $@
1187
1188
+$(KDIR)ssyrk_direct_alpha_betaLN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1189
+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -UTRANSA $< -o $@
1190
1191
+$(KDIR)ssyrk_direct_alpha_betaLT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYRKDIRECTKERNEL_ALPHA_BETA)
1192
+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UUPPER -DTRANSA $< -o $@
1193
1194
1161
1195
1162
1196
ifdef USE_TRMM
1163
1197
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
0 commit comments