Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions src/operator-run.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,10 +770,13 @@ void xnn_compute_dq_zero_buffer_igemm(struct igemm_context* restrict context,
}

void xnn_compute_dq_zero_buffer_subconv(
struct subconv_context* restrict context, size_t batch_index) {
memset(context->zero_buffers[batch_index],
context->quantization_params[batch_index].zero_point,
context->zero_size);
struct subconv_context* restrict context, size_t batch_index,
size_t batch_size) {
for (size_t k = 0; k < batch_size; k++) {
memset(context->zero_buffers[batch_index + k],
context->quantization_params[batch_index + k].zero_point,
context->zero_size);
}
}

void xnn_compute_grouped_batch_dqigemm(struct igemm_context* restrict context,
Expand Down Expand Up @@ -1041,14 +1044,20 @@ void xnn_compute_dqigemm(struct igemm_context* restrict context,
void xnn_compute_conv2d_igemm_indirection(
struct conv2d_igemm_indirection_init_context* restrict context,
size_t output_tile_start, size_t output_tile_size) {
xnn_indirection_init_conv2d(
output_tile_size, output_tile_start, output_tile_start + output_tile_size,
context->indirection_buffer, context->input, context->zero_buffer,
context->input_pixel_stride, context->input_height, context->input_width,
context->output_height, context->output_width, context->kernel_height,
context->kernel_width, context->stride_height, context->stride_width,
context->dilation_height, context->dilation_width,
context->input_padding_top, context->input_padding_left);
while (output_tile_size > 0) {
const size_t mr_step = min(output_tile_size, context->mr);
xnn_indirection_init_conv2d(
mr_step, output_tile_start, output_tile_start + mr_step,
context->indirection_buffer, context->input, context->zero_buffer,
context->input_pixel_stride, context->input_height,
context->input_width, context->output_height, context->output_width,
context->kernel_height, context->kernel_width, context->stride_height,
context->stride_width, context->dilation_height,
context->dilation_width, context->input_padding_top,
context->input_padding_left);
output_tile_size -= mr_step;
output_tile_start += mr_step;
}
}

void xnn_compute_grouped_subgemm2d(struct subgemm_context* restrict context,
Expand Down Expand Up @@ -2494,6 +2503,16 @@ enum xnn_status xnn_run_operator_with_index(xnn_operator_t op,
threadpool, compute->task_3d_tile_1d, context, compute->range[0],
compute->range[1], compute->range[2], compute->tile[0], flags);
break;
case xnn_parallelization_type_3d_tile_1d_dynamic:
assert(compute->range[0] != 0);
assert(compute->range[1] != 0);
assert(compute->range[2] != 0);
assert(compute->tile[0] != 0);
pthreadpool_parallelize_3d_tile_1d_dynamic(
threadpool, compute->task_3d_tile_1d_dynamic, context,
compute->range[0], compute->range[1], compute->range[2],
compute->tile[0], flags);
break;
case xnn_parallelization_type_3d_tile_1d_with_thread:
assert(compute->range[0] != 0);
assert(compute->range[1] != 0);
Expand Down
33 changes: 14 additions & 19 deletions src/operators/convolution-nhwc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1882,13 +1882,14 @@ static enum xnn_status reshape_igemm(
.dilation_width = convolution_op->convolution_op->dilation_width,
.input_padding_top = convolution_op->convolution_op->padding_top,
.input_padding_left = convolution_op->convolution_op->padding_left,
.mr = mr,
};

indirection_compute->type = xnn_parallelization_type_1d_tile_1d;
indirection_compute->type = xnn_parallelization_type_1d_tile_1d_dynamic;
indirection_compute->context_offset =
offsetof(struct igemm_op_context, conv2d_igemm_indirection_init);
indirection_compute->task_1d_tile_1d =
(pthreadpool_task_1d_tile_1d_t)xnn_compute_conv2d_igemm_indirection;
indirection_compute->task_1d_tile_1d_dynamic =
(pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_conv2d_igemm_indirection;
indirection_compute->range[0] = tiled_output_size;
indirection_compute->tile[0] = mr;
} else {
Expand Down Expand Up @@ -2279,20 +2280,14 @@ static enum xnn_status reshape_dwconv(
.tile_size = primary_tile,
};

convolution_op->compute[0].type = xnn_parallelization_type_1d_tile_1d;
convolution_op->compute[0].type = xnn_parallelization_type_1d_tile_1d_dynamic;
convolution_op->compute[0].context_offset =
offsetof(struct dwconv_op_context, dwconv_indirection_init);
convolution_op->compute[0].task_1d_tile_1d =
(pthreadpool_task_1d_tile_1d_t)xnn_compute_dwconv_indirection;
convolution_op->compute[0].task_1d_tile_1d_dynamic =
(pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_dwconv_indirection;
convolution_op->compute[0].range[0] = output_height;
convolution_op->compute[0].tile[0] = 1;

if (num_threads > 1) {
const size_t target_tiles_per_thread = 5;
convolution_op->compute[0].tile[0] =
divide_round_up(output_height, num_threads * target_tiles_per_thread);
} else {
convolution_op->compute[0].tile[0] = output_height;
}
} else {
dwconv_compute_index = 0;

Expand Down Expand Up @@ -2403,9 +2398,9 @@ static enum xnn_status reshape_dwconv(
convolution_op->compute[dwconv_compute_index].tile[0] =
max(tile_size, channel_tile);
convolution_op->compute[dwconv_compute_index].type =
xnn_parallelization_type_3d_tile_1d;
convolution_op->compute[dwconv_compute_index].task_3d_tile_1d =
(pthreadpool_task_3d_tile_1d_t)xnn_compute_dwconv_unipass;
xnn_parallelization_type_3d_tile_1d_dynamic;
convolution_op->compute[dwconv_compute_index].task_3d_tile_1d_dynamic =
(pthreadpool_task_3d_tile_1d_dynamic_t)xnn_compute_dwconv_unipass;
convolution_op->dynamic_context.dwconv->dwconv.ukernel =
convolution_op->ukernel.dwconv.ukernel;

Expand Down Expand Up @@ -2446,9 +2441,9 @@ static enum xnn_status reshape_vmulcaddc(xnn_operator_t convolution_op,
}
}

convolution_op->compute[0].type = xnn_parallelization_type_1d_tile_1d;
convolution_op->compute[0].task_1d_tile_1d =
(pthreadpool_task_1d_tile_1d_t)xnn_compute_vmulcaddc;
convolution_op->compute[0].type = xnn_parallelization_type_1d_tile_1d_dynamic;
convolution_op->compute[0].task_1d_tile_1d_dynamic =
(pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_vmulcaddc;
convolution_op->compute[0].range[0] = batch_output_size;
convolution_op->compute[0].tile[0] = mc;
convolution_op->state = xnn_run_state_needs_setup;
Expand Down
7 changes: 4 additions & 3 deletions src/operators/deconvolution-nhwc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1694,10 +1694,11 @@ static enum xnn_status reshape_subconv2d_path(
size_t igemm_compute_index = 0;
if (dynamic_quantization) {
deconvolution_op->compute[igemm_compute_index].type =
xnn_parallelization_type_1d;
deconvolution_op->compute[igemm_compute_index].task_1d =
(pthreadpool_task_1d_t)xnn_compute_dq_zero_buffer_subconv;
xnn_parallelization_type_1d_tile_1d_dynamic;
deconvolution_op->compute[igemm_compute_index].task_1d_tile_1d_dynamic =
(pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_dq_zero_buffer_subconv;
deconvolution_op->compute[igemm_compute_index].range[0] = batch_size;
deconvolution_op->compute[igemm_compute_index].tile[0] = 1;
++igemm_compute_index;
}
if (groups == 1) {
Expand Down
5 changes: 4 additions & 1 deletion src/xnnpack/compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ enum xnn_parallelization_type {
xnn_parallelization_type_2d_tile_2d_dynamic_with_thread,
xnn_parallelization_type_3d,
xnn_parallelization_type_3d_tile_1d,
xnn_parallelization_type_3d_tile_1d_dynamic,
xnn_parallelization_type_3d_tile_1d_with_thread,
xnn_parallelization_type_3d_tile_1d_dynamic_with_thread,
xnn_parallelization_type_3d_tile_2d,
Expand Down Expand Up @@ -83,6 +84,7 @@ struct compute_parameters {
pthreadpool_task_3d_t task_3d;
pthreadpool_task_3d_tile_1d_t task_3d_tile_1d;
pthreadpool_task_3d_tile_1d_with_thread_t task_3d_tile_1d_with_thread;
pthreadpool_task_3d_tile_1d_dynamic_t task_3d_tile_1d_dynamic;
pthreadpool_task_3d_tile_1d_dynamic_with_id_t
task_3d_tile_1d_dynamic_with_id;
pthreadpool_task_3d_tile_2d_t task_3d_tile_2d;
Expand Down Expand Up @@ -491,6 +493,7 @@ struct conv2d_igemm_indirection_init_context {
size_t dilation_width;
size_t input_padding_top;
size_t input_padding_left;
size_t mr;
};

// Context for Indirect Dense Matrix Multiplication.
Expand Down Expand Up @@ -728,7 +731,7 @@ struct subconv_context {
};

XNN_PRIVATE void xnn_compute_dq_zero_buffer_subconv(
struct subconv_context* context, size_t batch_index);
struct subconv_context* context, size_t batch_index, size_t batch_size);

XNN_PRIVATE void xnn_compute_grouped_subconv2d(
struct subconv_context* context, size_t batch_index, size_t group_index,
Expand Down
Loading