Skip to content

Commit 6839a4f

Browse files
committed
Update base for Update on "[ET-VK] 5/n Split dispatches between multiple command buffers. Track previously submitted command buffers in context and add function to execute all previous command buffers."
The diff adds changes to store command buffers submitted with final_use set to false. Storing these buffers is necessary for `execute()` function. Since, `encode_execute()` function is typically called once but `execute()` can be called multiple times, `submit_all_non_final_cmds` function is added so all recorded command buffers with `final_use = False` can be called multiple times in `execute()`. #### Key Changes * Added a flag `execute_pending_first_submission` to the `ComputeGraph` class to track whether execute nodes have been freshly encoded and need to be submitted first. * Added a new function `submit_all_non_final_cmds` to the `Context` class, which submits all non-final command buffers to the GPU. * Modified the `submit_cmd_to_gpu` function to add the submitted command buffer to the `non_final_cmds_` list if it's not marked as final use. * Updated the `execute` function in `ComputeGraph` to submit all non-final command buffers before executing the graph. Differential Revision: [D78360038](https://our.internmc.facebook.com/intern/diff/D78360038/) [ghstack-poisoned]
2 parents b7c5cab + b77e412 commit 6839a4f

File tree

83 files changed

+4130
-216
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+4130
-216
lines changed

.github/workflows/trunk.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,37 @@ jobs:
302302
exit 1
303303
fi
304304
305+
test-arm-ootb-linux:
306+
name: test-arm-ootb-linux
307+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
308+
permissions:
309+
id-token: write
310+
contents: read
311+
with:
312+
runner: linux.2xlarge
313+
docker-image: executorch-ubuntu-22.04-arm-sdk
314+
submodules: 'recursive'
315+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
316+
timeout: 90
317+
script: |
318+
# The generic Linux job chooses to use base env, not the one setup by the image
319+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
320+
conda activate "${CONDA_ENV}"
321+
322+
# Follow the steps required before running the notebooks
323+
# Try to mirror these as closely as possible
324+
source .ci/scripts/utils.sh
325+
install_executorch "--use-pt-pinned-commit"
326+
327+
.ci/scripts/setup-arm-baremetal-tools.sh
328+
source examples/arm/ethos-u-scratch/setup_path.sh
329+
330+
# Install requirements for converting notebooks
331+
pip install notebook
332+
333+
# Run OOTB tests
334+
backends/arm/test/test_arm_ootb.sh
335+
305336
test-coreml-delegate:
306337
name: test-coreml-delegate
307338
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

CMakeLists.txt

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@
4848
cmake_minimum_required(VERSION 3.24)
4949
project(executorch)
5050

51-
# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION
52-
5351
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
5452
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
5553
include(CMakeDependentOption)
@@ -82,6 +80,7 @@ announce_configured_options(BUCK2)
8280

8381
announce_configured_options(CMAKE_CXX_COMPILER_ID)
8482
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
83+
announce_configured_options(BUILD_TESTING)
8584

8685
load_build_preset()
8786
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
@@ -97,11 +96,6 @@ else()
9796
endif()
9897
announce_configured_options(CCACHE_PROGRAM)
9998

100-
# Print all the configs that were called with announce_configured_options.
101-
print_configured_options()
102-
103-
# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION
104-
10599
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
106100

107101
# Setup RPATH. See
@@ -750,3 +744,6 @@ if(EXECUTORCH_BUILD_ANDROID_JNI)
750744
endif()
751745

752746
include(Test.cmake)
747+
748+
# Print all the configs that were called with announce_configured_options.
749+
print_configured_options()

CMakePresets.json

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
},
99
{
1010
"name": "macos",
11-
"displayName": "Build everything buildable on macOS",
11+
"displayName": "Build ExecuTorch for macOS",
1212
"inherits": ["common"],
1313
"generator": "Xcode",
1414
"cacheVariables": {
@@ -25,7 +25,7 @@
2525
},
2626
{
2727
"name": "ios",
28-
"displayName": "Build everything buildable on iOS",
28+
"displayName": "Build ExecuTorch for iOS",
2929
"inherits": ["common"],
3030
"generator": "Xcode",
3131
"cacheVariables": {
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"name": "ios-simulator",
45-
"displayName": "Build everything buildable on iOS simulator",
45+
"displayName": "Build ExecuTorch for iOS Simulator",
4646
"inherits": ["common"],
4747
"generator": "Xcode",
4848
"cacheVariables": {
@@ -59,7 +59,7 @@
5959
},
6060
{
6161
"name": "linux",
62-
"displayName": "Build everything buildable on Linux",
62+
"displayName": "Build ExecuTorch for Linux",
6363
"inherits": ["common"],
6464
"cacheVariables": {
6565
"CMAKE_SYSTEM_NAME": "Linux",
@@ -88,29 +88,21 @@
8888
{
8989
"name": "llm",
9090
"displayName": "Build LLM libraries",
91-
"inherits": [
92-
"common"
93-
],
91+
"inherits": ["common"],
9492
"cacheVariables": {
9593
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/llm.cmake",
9694
"CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
9795
},
9896
"condition": {
9997
"type": "inList",
10098
"string": "${hostSystemName}",
101-
"list": [
102-
"Darwin",
103-
"Linux",
104-
"Windows"
105-
]
99+
"list": ["Darwin", "Linux", "Windows"]
106100
}
107101
},
108102
{
109103
"name": "zephyr",
110-
"displayName": "Build everything buildable on Zephyr RTOS",
111-
"inherits": [
112-
"common"
113-
],
104+
"displayName": "Build ExecuTorch for Zephyr RTOS",
105+
"inherits": ["common"],
114106
"cacheVariables": {
115107
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/zephyr.cmake",
116108
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake"

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass # noqa
2727
from .decompose_asin_pass import DecomposeAsinPass # noqa
2828
from .decompose_atan_pass import DecomposeAtanPass # noqa
29+
from .decompose_atanh_pass import DecomposeAtanhPass # noqa
2930
from .decompose_avg_pool2d import DecomposeAvgPool2d # noqa
3031
from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass # noqa
3132
from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
DecomposeAcoshPass,
3131
DecomposeAdaptiveAvgPool2dPass,
3232
DecomposeAsinPass,
33+
DecomposeAtanhPass,
3334
DecomposeAtanPass,
3435
DecomposeAvgPool2d,
3536
DecomposeBatchNormNoStatsPass,
@@ -163,6 +164,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
163164
self.add_pass(DecomposeAsinPass())
164165
self.add_pass(DecomposeSqrtPass())
165166
self.add_pass(DecomposeAtanPass())
167+
self.add_pass(DecomposeAtanhPass())
166168
self.add_pass(ConvertIntPowToMuls())
167169
self.add_pass(CastBoolToInt8Pass())
168170
self.add_pass(DecomposeSinhPass())

backends/arm/_passes/cast_int64_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _to_int32(self, graph_module: torch.fx.GraphModule):
4747
buffer_name = self.exported_program.graph_signature.inputs_to_buffers[
4848
node.name
4949
]
50-
buffer = self.exported_program.state_dict[node.name]
50+
buffer = self.exported_program.state_dict[buffer_name]
5151
self._assert_within_int32(buffer, node)
5252
logger.warning(
5353
f"Casting buffer {node.name} from torch.int64 to torch.int32"

backends/arm/_passes/decompose_asin_pass.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,11 @@ def _build_polynomial(
8585
return result
8686

8787
def call_operator(self, op, args, kwargs, meta):
88+
if op not in edge_asin_op:
89+
return super().call_operator(op, args, kwargs, meta)
8890
logging.info(
8991
f"Approximating asin. This may introduce small numerical errors. For details, see {__file__}."
9092
)
91-
if op not in edge_asin_op:
92-
return super().call_operator(op, args, kwargs, meta)
93-
9493
x = args[0]
9594
half = 0.5
9695
one = 1.0
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from executorch.backends.arm._passes import ArmPass
7+
from executorch.exir.dialects._ops import ops as exir_ops
8+
9+
10+
edge_atanh = exir_ops.edge.aten.atanh.default # MI case
11+
12+
13+
def _get_atanh_ops(op):
14+
"""Return the primitive ops required.."""
15+
if op is not edge_atanh:
16+
raise RuntimeError(f"Can't decompose atanh for op {op}")
17+
return (
18+
exir_ops.edge.aten.mul.Tensor,
19+
exir_ops.edge.aten.mul.Scalar,
20+
exir_ops.edge.aten.add.Scalar,
21+
exir_ops.edge.aten.reciprocal.default,
22+
exir_ops.edge.aten.log.default,
23+
exir_ops.edge.aten.neg.default,
24+
)
25+
26+
27+
class DecomposeAtanhPass(ArmPass):
28+
"""
29+
Decomposes the atanh operator into primitive ops.
30+
atanh(x) = 0.5 * log((1 + x) / (1 - x))
31+
"""
32+
33+
def call_operator(self, op, args, kwargs, meta):
34+
if op is not edge_atanh:
35+
return super().call_operator(op, args, kwargs, meta, updated=False)
36+
37+
ops = _get_atanh_ops(op)
38+
(
39+
op_mul_tensor,
40+
op_mul_scalar,
41+
op_add_scalar,
42+
op_reciprocal,
43+
op_log,
44+
op_neg,
45+
) = ops
46+
47+
x = args[0]
48+
49+
nom = super().call_operator(op_add_scalar, (x, 1.0), {}, meta, updated=True)
50+
51+
neg_x = super().call_operator(op_neg, (x,), {}, meta, updated=True)
52+
denom = super().call_operator(
53+
op_add_scalar, (neg_x, 1.0), {}, meta, updated=True
54+
)
55+
recip = super().call_operator(op_reciprocal, (denom,), {}, meta, updated=True)
56+
57+
log_input = super().call_operator(
58+
op_mul_tensor, (nom, recip), {}, meta, updated=True
59+
)
60+
log = super().call_operator(op_log, (log_input,), {}, meta, updated=True)
61+
62+
return super().call_operator(op_mul_scalar, (log, 0.5), {}, meta, updated=True)

backends/arm/_passes/insert_table_ops.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class TableOps:
5252
exir_ops.edge.aten.sin.default: torch.sin,
5353
exir_ops.edge.aten.tanh.default: torch.tanh,
5454
exir_ops.edge.aten.atan.default: torch.atan,
55+
exir_ops.edge.aten.atanh.default: torch.atanh,
5556
exir_ops.edge.aten.hardsigmoid.default: torch.nn.functional.hardsigmoid,
5657
exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
5758
exir_ops.edge.aten.sinh.default: torch.sinh,

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ def is_node_supported(
252252
exir_ops.edge.aten._adaptive_avg_pool2d.default,
253253
exir_ops.edge.aten.sign.default,
254254
exir_ops.edge.aten.asin.default,
255+
exir_ops.edge.aten.atanh.default,
255256
]
256257

257258
return supported

0 commit comments

Comments
 (0)