11#include < ggml.h>
22#include < ggml-alloc.h>
33#include < ggml-backend.h>
4- #include < ggml-backend-impl.h>
54
65#include < algorithm>
76#include < array>
@@ -80,14 +79,22 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
8079 im = nullptr ;
8180 }
8281 }
82+
8383 ggml_quantize_chunk (tensor->type , data.data (), dataq.data (), 0 , size/tensor->ne [0 ], tensor->ne [0 ], im);
8484 GGML_ASSERT (ggml_validate_row_data (tensor->type , dataq.data (), dataq.size ()));
85+ // TODO: other cases
86+ // #pragma omp parallel for
87+ // for (int i = 0; i < tensor->ne[1]; i++) {
88+ // ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
89+ // i * tensor->ne[0], 1, tensor->ne[0], im);
90+ // }
91+
8592 ggml_backend_tensor_set (tensor, dataq.data (), 0 , dataq.size ());
8693 } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
8794 // This is going to create some weird integers though.
8895 ggml_backend_tensor_set (tensor, data.data (), 0 , ggml_nbytes (tensor));
8996 } else {
90- GGML_ASSERT ( false );
97+ GGML_ABORT ( " fatal error " );
9198 }
9299}
93100
@@ -125,7 +132,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
125132 tt.to_float (&buf[i], vq.data (), bs);
126133 tv.insert (tv.end (), vq.begin (), vq.end ());
127134 } else {
128- GGML_ASSERT ( false );
135+ GGML_ABORT ( " fatal error " );
129136 }
130137 }
131138 }
@@ -760,7 +767,7 @@ struct test_dup : public test_case {
760767 }
761768
762769 test_dup (ggml_type type = GGML_TYPE_F32,
763- std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 },
770+ std::array<int64_t , 4 > ne = {10 , 10 , 20 , 1 },
764771 std::array<int64_t , 4 > permute = {0 , 0 , 0 , 0 })
765772 : type(type), ne(ne), permute(permute),
766773 _use_permute (permute[0 ] + permute[1 ] + permute[2 ] + permute[3 ] > 0 ) {}
@@ -780,9 +787,11 @@ struct test_cpy : public test_case {
780787 const ggml_type type_src;
781788 const ggml_type type_dst;
782789 const std::array<int64_t , 4 > ne;
790+ const std::array<int64_t , 4 > permute;
791+ bool _src_use_permute;
783792
784793 std::string vars () override {
785- return VARS_TO_STR3 (type_src, type_dst, ne);
794+ return VARS_TO_STR4 (type_src, type_dst, ne, permute );
786795 }
787796
788797 double max_nmse_err () override {
@@ -794,12 +803,17 @@ struct test_cpy : public test_case {
794803 }
795804
796805 test_cpy (ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
797- std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 })
798- : type_src(type_src), type_dst(type_dst), ne(ne) {}
806+ std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 },
807+ std::array<int64_t , 4 > permute = {0 , 0 , 0 , 0 })
808+ : type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
809+ _src_use_permute (permute[0 ] + permute[1 ] + permute[2 ] + permute[3 ] > 0 ) {}
799810
800811 ggml_tensor * build_graph (ggml_context * ctx) override {
801812 ggml_tensor * src = ggml_new_tensor (ctx, type_src, 4 , ne.data ());
802- ggml_tensor * dst = ggml_new_tensor (ctx, type_dst, 4 , ne.data ());
813+ if (_src_use_permute) {
814+ src = ggml_permute (ctx, src, permute[0 ], permute[1 ], permute[2 ], permute[3 ]);
815+ }
816+ ggml_tensor* dst = ggml_new_tensor (ctx, type_dst, 4 , src->ne );
803817 ggml_tensor * out = ggml_cpy (ctx, src, dst);
804818 return out;
805819 }
@@ -1175,6 +1189,7 @@ struct test_soft_max : public test_case {
11751189 }
11761190};
11771191
1192+
11781193// GGML_OP_ROPE
11791194struct test_rope : public test_case {
11801195 const ggml_type type;
@@ -1267,6 +1282,32 @@ struct test_pool2d : public test_case {
12671282 }
12681283};
12691284
1285+ // GGML_OP_CONV_TRANSPOSE_1D
1286+ struct test_conv_transpose_1d : public test_case {
1287+ const std::array<int64_t , 4 > ne_input;
1288+ const std::array<int64_t , 4 > ne_kernel;
1289+
1290+ const int s0; // stride
1291+ const int p0; // padding
1292+ const int d0; // dilation
1293+
1294+ std::string vars () override {
1295+ return VARS_TO_STR5 (ne_input, ne_kernel, s0, p0, d0);
1296+ }
1297+
1298+ test_conv_transpose_1d (std::array<int64_t , 4 > ne_input = {197 , 32 , 1 , 1 }, // [input_width, input_height, input_channels, 1]
1299+ std::array<int64_t , 4 > ne_kernel = {16 , 32 , 32 , 1 }, // [kernel_width, kernel_height, input_channels, 1]
1300+ int s0 = 1 , int p0 = 0 , int d0 = 1 )
1301+ : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0) {}
1302+
1303+ ggml_tensor * build_graph (ggml_context * ctx) override {
1304+ ggml_tensor * input = ggml_new_tensor (ctx, GGML_TYPE_F32, 4 , ne_input.data ());
1305+ ggml_tensor * kernel = ggml_new_tensor (ctx, GGML_TYPE_F32, 4 , ne_kernel.data ());
1306+ ggml_tensor * out = ggml_conv_transpose_1d (ctx, kernel, input, s0, p0, d0);
1307+ return out;
1308+ }
1309+ };
1310+
12701311// GGML_OP_IM2COL
12711312struct test_im2col : public test_case {
12721313 const ggml_type type_input;
@@ -1280,7 +1321,7 @@ struct test_im2col : public test_case {
12801321 // padding
12811322 const int p0;
12821323 const int p1;
1283- // dilatation
1324+ // dilation
12841325 const int d0;
12851326 const int d1;
12861327 // mode
@@ -1393,7 +1434,7 @@ struct test_argsort : public test_case {
13931434 ggml_backend_tensor_set (t, data.data (), r * t->nb [1 ], t->ne [0 ] * sizeof (float ));
13941435 }
13951436 } else {
1396- GGML_ASSERT ( false );
1437+ GGML_ABORT ( " fatal error " );
13971438 }
13981439 }
13991440 }
@@ -1470,19 +1511,21 @@ struct test_group_norm : public test_case {
14701511 const ggml_type type;
14711512 const std::array<int64_t , 4 > ne;
14721513 const int32_t num_groups;
1514+ const float eps;
14731515
14741516 std::string vars () override {
14751517 return VARS_TO_STR3 (type, ne, num_groups);
14761518 }
14771519
14781520 test_group_norm (ggml_type type = GGML_TYPE_F32,
14791521 std::array<int64_t , 4 > ne = {64 , 64 , 320 , 1 },
1480- int32_t num_groups = 32 )
1481- : type(type), ne(ne), num_groups(num_groups) {}
1522+ int32_t num_groups = 32 ,
1523+ float eps = 1e-6f )
1524+ : type(type), ne(ne), num_groups(num_groups), eps(eps) {}
14821525
14831526 ggml_tensor * build_graph (ggml_context * ctx) override {
14841527 ggml_tensor * a = ggml_new_tensor (ctx, type, 4 , ne.data ());
1485- ggml_tensor * out = ggml_group_norm (ctx, a, num_groups);
1528+ ggml_tensor * out = ggml_group_norm (ctx, a, num_groups, eps );
14861529 return out;
14871530 }
14881531};
@@ -2053,6 +2096,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
20532096 GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
20542097 GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
20552098 GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
2099+ GGML_TYPE_BF16,
20562100 };
20572101
20582102 // unary ops
@@ -2097,6 +2141,19 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
20972141
20982142 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
20992143 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
2144+ // test cases for 1D im2col
2145+ test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
2146+ test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
2147+
2148+ test_cases.emplace_back (new test_conv_transpose_1d ());
2149+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 3 , 0 , 1 ));
2150+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 2 , 0 , 1 ));
2151+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 1 , 0 , 1 ));
2152+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,2 ,2 ,1 }, 2 , 0 , 1 ));
2153+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,2 ,2 ,1 }, 1 , 0 , 1 ));
2154+ test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {3 ,1 ,2 ,1 }, 1 , 0 , 1 ));
2155+ test_cases.emplace_back (new test_conv_transpose_1d ({2 ,1 ,1 ,1 }, {3 ,1 ,1 ,1 }, 1 , 0 , 1 ));
2156+
21002157
21012158 test_cases.emplace_back (new test_repeat (GGML_TYPE_F32, {10 , 10 , 10 , 10 }, {1 , 1 , 1 , 1 }));
21022159 test_cases.emplace_back (new test_repeat (GGML_TYPE_F32, {10 , 10 , 10 , 10 }, {2 , 1 , 1 , 1 }));
@@ -2110,12 +2167,22 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
21102167 test_cases.emplace_back (new test_dup (GGML_TYPE_F16));
21112168 test_cases.emplace_back (new test_dup (GGML_TYPE_I32));
21122169 test_cases.emplace_back (new test_dup (GGML_TYPE_I16));
2170+ test_cases.emplace_back (new test_dup (GGML_TYPE_F32, {10 , 10 , 5 , 1 }, {0 , 2 , 1 , 3 }));
2171+ test_cases.emplace_back (new test_dup (GGML_TYPE_F16, {10 , 10 , 5 , 1 }, {0 , 2 , 1 , 3 })); // dup by rows
2172+ test_cases.emplace_back (new test_dup (GGML_TYPE_F32, {10 , 10 , 5 , 1 }, {1 , 0 , 2 , 3 }));
2173+ test_cases.emplace_back (new test_dup (GGML_TYPE_F16, {10 , 10 , 5 , 1 }, {1 , 0 , 2 , 3 })); // dup dst not-contiguous
21132174 test_cases.emplace_back (new test_dup (GGML_TYPE_I16, {10 , 8 , 3 , 1 }, {0 , 2 , 1 , 3 }));
21142175 test_cases.emplace_back (new test_dup (GGML_TYPE_I16, {10 , 8 , 3 , 1 }, {1 , 2 , 0 , 3 }));
21152176
21162177 for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
21172178 for (ggml_type type_dst : all_types) {
21182179 test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 4 , 4 , 4 }));
2180+ test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {0 , 2 , 1 , 3 })); // cpy by rows
2181+ }
2182+ }
2183+ for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
2184+ for (ggml_type type_dst : {GGML_TYPE_F16, GGML_TYPE_F32}) {
2185+ test_cases.emplace_back (new test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {1 , 0 , 2 , 3 })); // cpy not-contiguous
21192186 }
21202187 }
21212188
@@ -2165,6 +2232,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
21652232 test_cases.emplace_back (new test_rms_norm (GGML_TYPE_F32, {64 , 10 , 10 , 10 }, eps));
21662233 }
21672234
2235+ #if 1
21682236 for (ggml_type type_a : base_types) {
21692237 for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
21702238 test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , { 1 , 1 }, {1 , 1 }));
@@ -2184,10 +2252,31 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
21842252 test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 16 , 256 , {10 , 10 }, {2 , 2 }));
21852253 }
21862254 }
2255+ #else
2256+ // m = a rows
2257+ // n = b rows
2258+ // k = cols
2259+ std::uniform_int_distribution<> dist_m(1, 128);
2260+ std::uniform_int_distribution<> dist_n(16, 128);
2261+ std::uniform_int_distribution<> dist_k(1, 16);
2262+ for (int i = 0; i < 1000; i++) {
2263+ for (ggml_type type_a : all_types) {
2264+ for (ggml_type type_b : {GGML_TYPE_F32}) {
2265+ int m = dist_m(rng);
2266+ int n = dist_n(rng);
2267+ int k = dist_k(rng) * ggml_blck_size(type_a);
2268+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1}, {1, 1}));
2269+ }
2270+ }
2271+ }
2272+ #endif
21872273
21882274 for (ggml_type type_a : other_types) {
21892275 for (ggml_type type_b : {GGML_TYPE_F32}) {
2190- test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , { 1 , 1 }, {1 , 1 }));
2276+ if (ggml_blck_size (type_a) != 256 ) {
2277+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , ggml_blck_size (type_a), {1 , 1 }, {1 , 1 }));
2278+ }
2279+ test_cases.emplace_back (new test_mul_mat (type_a, type_b, 16 , 1 , 256 , {1 , 1 }, {1 , 1 }));
21912280 }
21922281 }
21932282
@@ -2247,7 +2336,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
22472336 for (int n = 0; n < 10; ++n) {
22482337 int64_t ne0 = dist_ne0(rng);
22492338 int64_t ne1 = dist_ne1(rng);
2250- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
2339+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
22512340 }
22522341
22532342 exponent <<= 1;
@@ -2266,7 +2355,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
22662355 }
22672356 }
22682357 }
2269-
2358+ test_cases. emplace_back ( new test_soft_max (GGML_TYPE_F32, { 16 , 2 , 32 , 1 }, true , 0 . 1f , 0 . 0f ));
22702359 test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {16 , 2 , 32 , 1 }, false , 0 .1f , 0 .0f ));
22712360 test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {32 , 2 , 32 , 1 }, true , 0 .1f , 0 .0f ));
22722361 test_cases.emplace_back (new test_soft_max (GGML_TYPE_F32, {32 , 2 , 32 , 1 }, true , 0 .1f , 8 .0f ));
@@ -2380,7 +2469,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
23802469 return true ;
23812470 }
23822471
2383- GGML_ASSERT ( false );
2472+ GGML_ABORT ( " fatal error " );
23842473 return false ;
23852474}
23862475
0 commit comments