Skip to content

Commit a55510a

Browse files
committed
Merge remote-tracking branch 'origin/develop' into vpietila/ckb-consistent-naming-of-cmake-test-targets
2 parents a267b42 + cafaeb6 commit a55510a

29 files changed

+1980
-292
lines changed

Jenkinsfile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,28 @@ def failurePatterns = [
2020
[pattern: /cat: .* No such file or directory/, description: "GPU not found"],
2121
]
2222

23+
// Given a pattern, check if the log contains the pattern and return the context.
24+
def checkForPattern(pattern, log) {
25+
def lines = log.split('\n')
26+
for (int i = 0; i < lines.size(); i++) {
27+
if (lines[i] =~ pattern) {
28+
echo "Found pattern match in log for ${pattern}"
29+
30+
// Get the two lines before and after failure.
31+
def contextStart = Math.max(0, i - 2)
32+
def contextEnd = Math.min(lines.size() - 1, i + 2)
33+
def contextLines = []
34+
for (int j = contextStart; j <= contextEnd; j++) {
35+
contextLines.add(lines[j])
36+
}
37+
38+
return [found: true, matchedLine: lines[i], context: contextLines.join('\n')]
39+
}
40+
}
41+
echo "No pattern match found in log for ${pattern}"
42+
return [found: false, matchedLine: "", context: ""]
43+
}
44+
2345
class Version {
2446
int major, minor, patch
2547
@Override

example/ck_tile/20_grouped_convolution/gemm_configs.hpp renamed to example/ck_tile/20_grouped_convolution/conv_configs.hpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#define CK_TILE_PIPELINE_COMPUTE_V4 3
1818
#define CK_TILE_PIPELINE_COMPUTE_V5 4
1919

20-
struct GemmConfigBase
20+
struct ConvConfigBase
2121
{
2222
static constexpr bool kPadM = true;
2323
static constexpr bool kPadN = true;
@@ -29,6 +29,10 @@ struct GemmConfigBase
2929
static constexpr bool TransposeC = false;
3030
static constexpr bool UseStructuredSparsity = false;
3131

32+
static constexpr ck_tile::index_t VectorSizeA = 4;
33+
static constexpr ck_tile::index_t VectorSizeB = 8;
34+
static constexpr ck_tile::index_t VectorSizeC = 8;
35+
3236
static constexpr int kBlockPerCu = 1;
3337
static constexpr ck_tile::index_t TileParitionerGroupNum = 8;
3438
static constexpr ck_tile::index_t TileParitionerM01 = 4;
@@ -37,10 +41,12 @@ struct GemmConfigBase
3741
static constexpr ck_tile::index_t NumWaveGroups = 1;
3842
static constexpr bool Preshuffle = false;
3943
static constexpr bool TiledMMAPermuteN = false;
44+
45+
static constexpr ck_tile::index_t NumGroupsToMerge = 1;
4046
};
4147

4248
template <typename PrecType>
43-
struct GemmConfigMemoryInterwave : public GemmConfigBase
49+
struct ConvConfigMemoryInterwave : public ConvConfigBase
4450
{
4551
// Memory friendly for Interwave scheduler
4652
static constexpr ck_tile::index_t M_Tile = 128;
@@ -61,7 +67,7 @@ struct GemmConfigMemoryInterwave : public GemmConfigBase
6167
};
6268

6369
template <typename PrecType>
64-
struct GemmConfigMemoryIntrawave : public GemmConfigBase
70+
struct ConvConfigMemoryIntrawave : public ConvConfigBase
6571
{
6672
static constexpr ck_tile::index_t M_Tile = 128;
6773
static constexpr ck_tile::index_t N_Tile = 32;
@@ -80,7 +86,7 @@ struct GemmConfigMemoryIntrawave : public GemmConfigBase
8086
};
8187

8288
template <typename PrecType>
83-
struct GemmConfigComputeV3 : public GemmConfigBase
89+
struct ConvConfigComputeV3 : public ConvConfigBase
8490
{
8591
// Compute V3 only support Intrawave scheduler
8692
static constexpr ck_tile::index_t M_Tile = 16;
@@ -100,7 +106,7 @@ struct GemmConfigComputeV3 : public GemmConfigBase
100106
};
101107

102108
template <typename PrecType>
103-
struct GemmConfigComputeV3_1 : public GemmConfigBase
109+
struct ConvConfigComputeV3_1 : public ConvConfigBase
104110
{
105111
static constexpr ck_tile::index_t M_Tile = 256;
106112
static constexpr ck_tile::index_t N_Tile = 256;
@@ -119,7 +125,7 @@ struct GemmConfigComputeV3_1 : public GemmConfigBase
119125
};
120126

121127
template <typename PrecType>
122-
struct GemmConfigComputeV3_2 : public GemmConfigBase
128+
struct ConvConfigComputeV3_2 : public ConvConfigBase
123129
{
124130
static constexpr ck_tile::index_t M_Tile = 128;
125131
static constexpr ck_tile::index_t N_Tile = 128;
@@ -140,7 +146,7 @@ struct GemmConfigComputeV3_2 : public GemmConfigBase
140146
};
141147

142148
template <typename PrecType>
143-
struct GemmConfigComputeV3_WMMA : public GemmConfigBase
149+
struct ConvConfigComputeV3_WMMA : public ConvConfigBase
144150
{
145151
static constexpr ck_tile::index_t M_Tile = 128;
146152
static constexpr ck_tile::index_t N_Tile = 128;
@@ -161,7 +167,7 @@ struct GemmConfigComputeV3_WMMA : public GemmConfigBase
161167
};
162168

163169
template <typename PrecType>
164-
struct GemmConfigComputeV4 : public GemmConfigBase
170+
struct ConvConfigComputeV4 : public ConvConfigBase
165171
{
166172
// Compute V4 only support Intrawave scheduler
167173
// Using the ping pong reader in the lds level
@@ -182,7 +188,7 @@ struct GemmConfigComputeV4 : public GemmConfigBase
182188
};
183189

184190
template <typename PrecType>
185-
struct GemmConfigComputeV4_1 : public GemmConfigBase
191+
struct ConvConfigComputeV4_1 : public ConvConfigBase
186192
{
187193
static constexpr ck_tile::index_t M_Tile = 256;
188194
static constexpr ck_tile::index_t N_Tile = 256;
@@ -201,7 +207,7 @@ struct GemmConfigComputeV4_1 : public GemmConfigBase
201207
};
202208

203209
template <typename PrecType>
204-
struct GemmConfigComputeV5 : public GemmConfigBase
210+
struct ConvConfigComputeV5 : public ConvConfigBase
205211
{
206212
static constexpr ck_tile::index_t M_Tile = 128;
207213
static constexpr ck_tile::index_t N_Tile = 128;
@@ -220,6 +226,31 @@ struct GemmConfigComputeV5 : public GemmConfigBase
220226
static constexpr ck_tile::index_t NumWaNumWaveGroups = 2;
221227
};
222228

229+
template <typename PrecType>
230+
struct ConvConfigComputeV3_merged_groups : public ConvConfigBase
231+
{
232+
static constexpr ck_tile::index_t VectorSizeA = 4;
233+
static constexpr ck_tile::index_t VectorSizeB = 8;
234+
static constexpr ck_tile::index_t VectorSizeC = 8;
235+
236+
static constexpr ck_tile::index_t M_Tile = 16;
237+
static constexpr ck_tile::index_t N_Tile = 32;
238+
static constexpr ck_tile::index_t K_Tile = 32;
239+
240+
static constexpr ck_tile::index_t M_Warp = 1;
241+
static constexpr ck_tile::index_t N_Warp = 2;
242+
static constexpr ck_tile::index_t K_Warp = 1;
243+
244+
static constexpr ck_tile::index_t M_Warp_Tile = 16;
245+
static constexpr ck_tile::index_t N_Warp_Tile = 16;
246+
static constexpr ck_tile::index_t K_Warp_Tile = 32;
247+
248+
static constexpr bool DoubleSmemBuffer = false;
249+
static constexpr ck_tile::index_t Pipeline = CK_TILE_PIPELINE_COMPUTE_V3;
250+
251+
static constexpr ck_tile::index_t NumGroupsToMerge = 2;
252+
};
253+
223254
template <typename InDataType, typename WeiDataType = InDataType, typename OutDataType = InDataType>
224255
struct ConvTypeConfig;
225256

example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ int run_grouped_conv_bwd_data_example(int argc, char* argv[])
5151
int main(int argc, char* argv[])
5252
{
5353
#if CK_TILE_USE_WMMA
54-
return !run_grouped_conv_bwd_data_example<GemmConfigComputeV3_WMMA>(argc, argv);
54+
return !run_grouped_conv_bwd_data_example<ConvConfigComputeV3_WMMA>(argc, argv);
5555
#else
56-
return !run_grouped_conv_bwd_data_example<GemmConfigComputeV3>(argc, argv);
56+
return !run_grouped_conv_bwd_data_example<ConvConfigComputeV3>(argc, argv);
5757
#endif
5858
}

example/ck_tile/20_grouped_convolution/grouped_convolution_backward_weight.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "grouped_convolution_backward_weight_invoker.hpp"
1515
#include "run_grouped_convolution_bwd_weight_example.inc"
1616

17-
template <template <typename PrecType> typename GemmConfig>
17+
template <template <typename PrecType> typename ConvConfig>
1818
int run_grouped_conv_bwd_weight_example(ck_tile::ArgParser& arg_parser)
1919
{
2020
using Invoker = GroupedConvolutionBackwardWeightInvoker;
@@ -27,14 +27,14 @@ int run_grouped_conv_bwd_weight_example(ck_tile::ArgParser& arg_parser)
2727
if(data_type == "fp16")
2828
{
2929
return run_grouped_conv_bwd_weight_example_prec_type<Invoker,
30-
GemmConfig<ck_tile::half_t>,
30+
ConvConfig<ck_tile::half_t>,
3131
ck_tile::half_t>(
3232
in_layout, wei_layout, out_layout, arg_parser);
3333
}
3434
else if(data_type == "bf16")
3535
{
3636
return run_grouped_conv_bwd_weight_example_prec_type<Invoker,
37-
GemmConfig<ck_tile::bf16_t>,
37+
ConvConfig<ck_tile::bf16_t>,
3838
ck_tile::bf16_t>(
3939
in_layout, wei_layout, out_layout, arg_parser);
4040
}
@@ -54,9 +54,9 @@ int main(int argc, char* argv[])
5454
try
5555
{
5656
#if CK_TILE_USE_WMMA
57-
return !run_grouped_conv_bwd_weight_example<GemmConfigComputeV3_WMMA>(arg_parser);
57+
return !run_grouped_conv_bwd_weight_example<ConvConfigComputeV3_WMMA>(arg_parser);
5858
#else
59-
return !run_grouped_conv_bwd_weight_example<GemmConfigComputeV3>(arg_parser);
59+
return !run_grouped_conv_bwd_weight_example<ConvConfigComputeV3>(arg_parser);
6060
#endif
6161
}
6262
catch(const std::runtime_error& e)

example/ck_tile/20_grouped_convolution/grouped_convolution_backward_weight_invoker.hpp

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
struct GroupedConvolutionBackwardWeightInvoker
88
{
99
template <ck_tile::index_t NDimSpatial,
10-
typename GemmConfig,
10+
typename ConvConfig,
1111
typename InDataType,
1212
typename WeiDataType,
1313
typename AccDataType,
@@ -25,22 +25,22 @@ struct GroupedConvolutionBackwardWeightInvoker
2525

2626
// Implicit GEMM Traits
2727
using GemmShape = ck_tile::TileGemmShape<
28-
ck_tile::sequence<GemmConfig::M_Tile, GemmConfig::N_Tile, GemmConfig::K_Tile>,
29-
ck_tile::sequence<GemmConfig::M_Warp, GemmConfig::N_Warp, GemmConfig::K_Warp>,
28+
ck_tile::sequence<ConvConfig::M_Tile, ConvConfig::N_Tile, ConvConfig::K_Tile>,
29+
ck_tile::sequence<ConvConfig::M_Warp, ConvConfig::N_Warp, ConvConfig::K_Warp>,
3030
ck_tile::
31-
sequence<GemmConfig::M_Warp_Tile, GemmConfig::N_Warp_Tile, GemmConfig::K_Warp_Tile>,
32-
GemmConfig::PermuteA,
33-
GemmConfig::PermuteB>;
31+
sequence<ConvConfig::M_Warp_Tile, ConvConfig::N_Warp_Tile, ConvConfig::K_Warp_Tile>,
32+
ConvConfig::PermuteA,
33+
ConvConfig::PermuteB>;
3434

35-
constexpr ck_tile::index_t VectorSizeA = 4;
36-
constexpr ck_tile::index_t VectorSizeB = 8;
37-
constexpr ck_tile::index_t VectorSizeC = 8;
35+
constexpr ck_tile::index_t VectorSizeA = ConvConfig::VectorSizeA;
36+
constexpr ck_tile::index_t VectorSizeB = ConvConfig::VectorSizeB;
37+
constexpr ck_tile::index_t VectorSizeC = ConvConfig::VectorSizeC;
3838

3939
constexpr auto ConvSpec = ck_tile::ConvolutionSpecialization::Default;
4040
using TilePartitioner =
4141
ck_tile::GemmSpatiallyLocalTilePartitioner<GemmShape,
42-
GemmConfig::TileParitionerGroupNum,
43-
GemmConfig::TileParitionerM01>;
42+
ConvConfig::TileParitionerGroupNum,
43+
ConvConfig::TileParitionerM01>;
4444
using GroupedConvTraitsType = ck_tile::GroupedConvTraits<NDimSpatial,
4545
ConvSpec,
4646
InLayout,
@@ -49,20 +49,21 @@ struct GroupedConvolutionBackwardWeightInvoker
4949
OutLayout,
5050
VectorSizeA,
5151
VectorSizeB,
52-
VectorSizeC>;
52+
VectorSizeC,
53+
ConvConfig::NumGroupsToMerge>;
5354

5455
using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits<
55-
GemmConfig::kPadM,
56-
GemmConfig::kPadN,
57-
GemmConfig::kPadK,
58-
GemmConfig::DoubleSmemBuffer,
56+
ConvConfig::kPadM,
57+
ConvConfig::kPadN,
58+
ConvConfig::kPadK,
59+
ConvConfig::DoubleSmemBuffer,
5960
typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsBwdWeight::AsLayout,
6061
typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsBwdWeight::BsLayout,
6162
typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsBwdWeight::CLayout,
62-
GemmConfig::TransposeC,
63-
GemmConfig::UseStructuredSparsity,
63+
ConvConfig::TransposeC,
64+
ConvConfig::UseStructuredSparsity,
6465
false, // Persistent,
65-
GemmConfig::NumWaveGroups>;
66+
ConvConfig::NumWaveGroups>;
6667

6768
using GemmPipelineProblem = ck_tile::GemmPipelineProblem<
6869
OutDataType,
@@ -78,16 +79,16 @@ struct GroupedConvolutionBackwardWeightInvoker
7879
VectorSizeB>;
7980

8081
using BaseGemmPipeline = typename PipelineTypeTraits<
81-
GemmConfig::Pipeline>::template UniversalGemmPipeline<GemmPipelineProblem>;
82+
ConvConfig::Pipeline>::template UniversalGemmPipeline<GemmPipelineProblem>;
8283

8384
const ck_tile::index_t gemm_k =
8485
args.N_ * std::accumulate(args.output_spatial_lengths_.begin(),
8586
args.output_spatial_lengths_.end(),
8687
1,
8788
std::multiplies<ck_tile::index_t>());
8889

89-
const ck_tile::index_t k_grain = args.k_batch * GemmConfig::K_Tile;
90-
const ck_tile::index_t K_split = (gemm_k + k_grain - 1) / k_grain * GemmConfig::K_Tile;
90+
const ck_tile::index_t k_grain = args.k_batch * ConvConfig::K_Tile;
91+
const ck_tile::index_t K_split = (gemm_k + k_grain - 1) / k_grain * ConvConfig::K_Tile;
9192
const ck_tile::index_t num_loop = TilePartitioner::GetLoopNum(K_split);
9293
const bool has_hot_loop = BaseGemmPipeline::BlockHasHotloop(num_loop);
9394
const ck_tile::TailNumber tail_num = BaseGemmPipeline::GetBlockLoopTailNum(num_loop);
@@ -98,7 +99,7 @@ struct GroupedConvolutionBackwardWeightInvoker
9899
const auto memory_operation_) {
99100
constexpr bool has_hot_loop_v = has_hot_loop_.value;
100101
constexpr auto tail_number_v = tail_number_.value;
101-
constexpr auto scheduler = GemmConfig::Scheduler;
102+
constexpr auto scheduler = ConvConfig::Scheduler;
102103
constexpr auto memory_operation = memory_operation_.value;
103104

104105
using UniversalGemmProblem =
@@ -118,7 +119,7 @@ struct GroupedConvolutionBackwardWeightInvoker
118119
VectorSizeB>;
119120

120121
using GemmPipeline = typename PipelineTypeTraits<
121-
GemmConfig::Pipeline>::template GemmPipeline<UniversalGemmProblem>;
122+
ConvConfig::Pipeline>::template GemmPipeline<UniversalGemmProblem>;
122123

123124
using ConvEpilogue = ck_tile::CShuffleEpilogue<ck_tile::CShuffleEpilogueProblem<
124125
OutDataType,
@@ -131,12 +132,12 @@ struct GroupedConvolutionBackwardWeightInvoker
131132
CDEElementWise,
132133
TilePartitioner::MPerBlock,
133134
TilePartitioner::NPerBlock,
134-
GemmConfig::M_Warp,
135-
GemmConfig::N_Warp,
136-
GemmConfig::M_Warp_Tile,
137-
GemmConfig::N_Warp_Tile,
138-
GemmConfig::K_Warp_Tile,
139-
GemmConfig::TransposeC,
135+
ConvConfig::M_Warp,
136+
ConvConfig::N_Warp,
137+
ConvConfig::M_Warp_Tile,
138+
ConvConfig::N_Warp_Tile,
139+
ConvConfig::K_Warp_Tile,
140+
ConvConfig::TransposeC,
140141
memory_operation,
141142
1,
142143
true,

example/ck_tile/20_grouped_convolution/grouped_convolution_backward_weight_two_stage.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
#include "grouped_convolution_utils.hpp"
1414
#include "grouped_convolution_backward_weight_two_stage_invoker.hpp"
1515
#include "run_grouped_convolution_bwd_weight_example.inc"
16-
#include "gemm_configs.hpp"
16+
#include "conv_configs.hpp"
1717

18-
template <template <typename PrecType> typename GemmConfig>
18+
template <template <typename PrecType> typename ConvConfig>
1919
int run_grouped_conv_bwd_weight_example(ck_tile::ArgParser& arg_parser)
2020
{
2121
using Invoker = GroupedConvolutionBackwardWeightTwoStageInvoker;
@@ -28,14 +28,14 @@ int run_grouped_conv_bwd_weight_example(ck_tile::ArgParser& arg_parser)
2828
if(data_type == "fp16")
2929
{
3030
return run_grouped_conv_bwd_weight_example_prec_type<Invoker,
31-
GemmConfig<ck_tile::half_t>,
31+
ConvConfig<ck_tile::half_t>,
3232
ck_tile::half_t>(
3333
in_layout, wei_layout, out_layout, arg_parser);
3434
}
3535
else if(data_type == "bf16")
3636
{
3737
return run_grouped_conv_bwd_weight_example_prec_type<Invoker,
38-
GemmConfig<ck_tile::bf16_t>,
38+
ConvConfig<ck_tile::bf16_t>,
3939
ck_tile::bf16_t>(
4040
in_layout, wei_layout, out_layout, arg_parser);
4141
}
@@ -55,9 +55,9 @@ int main(int argc, char* argv[])
5555
try
5656
{
5757
#if CK_TILE_USE_WMMA
58-
return !run_grouped_conv_bwd_weight_example<GemmConfigComputeV3_WMMA>(arg_parser);
58+
return !run_grouped_conv_bwd_weight_example<ConvConfigComputeV3_WMMA>(arg_parser);
5959
#else
60-
return !run_grouped_conv_bwd_weight_example<GemmConfigComputeV3>(arg_parser);
60+
return !run_grouped_conv_bwd_weight_example<ConvConfigComputeV3>(arg_parser);
6161
#endif
6262
}
6363
catch(const std::runtime_error& e)

0 commit comments

Comments
 (0)