pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/check_c10_sync.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/check_c10_sync.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 5 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 12 additions & 28 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 12 additions & 28 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/fuse_ops.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/pass_utils.py
Lines changed: 37 additions & 14 deletions b/‎backends/cadence/aot/pass_utils.py
Lines changed: 37 additions & 14 deletions
diff --git a/‎backends/cadence/aot/passes.py
Lines changed: 36 additions & 8 deletions b/‎backends/cadence/aot/passes.py
Lines changed: 36 additions & 8 deletions
diff --git a/‎backends/cadence/aot/remove_ops.py
Lines changed: 9 additions & 9 deletions b/‎backends/cadence/aot/remove_ops.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎backends/nxp/runtime/TARGETS
Lines changed: 5 additions & 0 deletions b/‎backends/nxp/runtime/TARGETS
Lines changed: 5 additions & 0 deletions
@@ -1 +1 @@
-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
+7cda4017ddda554752e89069ae205be5e8388f59
@@ -12,4 +12,4 @@ pushd pytorch
 git checkout "$pytorch_pin"
 popd
 "$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
-"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
+"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/headeronly pytorch/torch/headeronly
@@ -240,11 +240,11 @@ jobs:
 
         cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
         setup_script_args=""
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then 
+        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           toolchain_prefix=arm-none-eabi-
-          threshold="103268" # ~100KiB
+          threshold="104000" # should be ~103.7KB, set threshold to 104KB.
           toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then 
+        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
           setup_script_args="--target-toolchain zephyr"
           toolchain_prefix=arm-zephyr-eabi-
           threshold="133120" # should be ~125KB, set threshold to 130KB
 
@@ -490,7 +490,7 @@ install(
   INCLUDES
   DESTINATION ${_common_include_directories}
 )
-install(FILES tools/cmake/executorch-config.cmake
+install(FILES tools/cmake/Utils.cmake tools/cmake/executorch-config.cmake
         DESTINATION lib/cmake/ExecuTorch
 )
 
@@ -732,4 +732,8 @@ if(EXECUTORCH_BUILD_VULKAN)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
 endif()
 
+if(EXECUTORCH_BUILD_ANDROID_JNI)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
+endif()
+
 include(Test.cmake)
@@ -8,7 +8,7 @@
 
 import logging
 from pathlib import Path
-from typing import Callable, cast, Optional
+from typing import Optional
 
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
@@ -32,7 +32,6 @@
     ExecutorchBackendConfig,
     ExecutorchProgramManager,
 )
-from executorch.exir.pass_base import PassResult
 from executorch.exir.passes import ToOutVarPass
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
 from executorch.exir.program._program import to_edge_with_preserved_ops
@@ -41,7 +40,7 @@
 from torch.export.exported_program import ExportedProgram
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
-from .passes import get_cadence_passes
+from .passes import apply_exir_ops_passes, apply_torch_ops_passes
 
 from .utils import print_ops_info
 
@@ -262,14 +261,20 @@ def export_to_edge(
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
     constant_methods: Optional[dict[str, object]] = None,
+    core_aten_exceptions: Optional[list[torch._ops.OpOverload]] = None,
 ) -> EdgeProgramManager:
     assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
 
     # Export the model into an ExportedProgram.
     expo_program = trace(model, inputs)
 
+    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
+    expo_program = apply_torch_ops_passes(expo_program)
+
     # Lower the model to edge IR.
-    edge_prog_manager = _lower_ep_to_edge(expo_program, dump_graphs, constant_methods)
+    edge_prog_manager = _lower_ep_to_edge(
+        expo_program, dump_graphs, constant_methods, core_aten_exceptions
+    )
 
     return edge_prog_manager
 
@@ -311,14 +316,7 @@ def _lower_ep_to_cadence(
     Lower an existing ExportedProgram to edge IR and apply frontend optimization passes.
     """
     edge_prog_manager = _lower_ep_to_edge(program, dump_graphs=dump_graphs)
-    cadence_passes = get_cadence_passes(opt_level)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
     return cadence_prog_manager
 
 
@@ -329,14 +327,7 @@ def export_to_cadence(
     opt_level: int = 1,
 ) -> EdgeProgramManager:
     edge_prog_manager = export_to_edge(model, inputs, dump_graphs=dump_graphs)
-    cadence_passes = get_cadence_passes(opt_level)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
     return cadence_prog_manager
 
 
@@ -373,15 +364,8 @@ def export_to_executorch_gen_etrecord(
     memory_config: Optional[MemoryConfig] = None,
     dump_graphs: bool = False,
 ) -> ExecutorchProgramManager:
-    cadence_passes = get_cadence_passes(opt_level)
     edge_prog_manager = export_to_edge(model, inputs, dump_graphs)
-
-    # Run a couple required passes for quant/dequant ops
-    cadence_prog_manager = edge_prog_manager.transform(
-        cast(
-            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
-        )
-    )
+    cadence_prog_manager = apply_exir_ops_passes(opt_level, edge_prog_manager)
 
     # Print some information to terminal
     print_ops_info(
 
@@ -1127,6 +1127,7 @@ class CadenceFuseOpsInGraph:
         FuseCascadedTransposeOrPermuteOps,
         FuseCascadedViewOps,
         FuseQuantDequantToRequantizePass,
+        FuseMulTensorIntoQuantPass,
         FuseMulTensorIntoDequantPass,
         FuseMulScalarIntoDequantPass,
         FuseFullThenReshapePass,
 
@@ -174,30 +174,53 @@ def nodes_not_adjacent_in_gm(
 
 def get_arg(
     node: torch.fx.Node,
-    arg_index: int,
     kwarg_name: str,
-    *,
-    default: torch.fx.node.Argument = None,
 ) -> torch.fx.node.Argument:
     """
-    Get the arg at arg_index or kwarg with arg_name of the node. If neither is found
-    return default.
+    Get the arg with arg_name of the node, returns default value if not set.
     """
-    if arg_index < len(node.args):
-        return node.args[arg_index]
-    elif kwarg_name in node.kwargs:
+    # Try to get the arg from kwargs first since this is faster
+    if kwarg_name in node.kwargs:
         return node.kwargs[kwarg_name]
-    else:
-        return default
+
+    # If it's not found in kwargs, try to normalize the args
+    normalized_args = node.normalized_arguments(
+        node.graph.owning_module, normalize_to_only_use_kwargs=True
+    )
+    if not normalized_args:
+        raise RuntimeError(
+            f"get_arg: Node {node} does not support normalization of arguments"
+        )
+
+    return normalized_args.kwargs[kwarg_name]
 
 
 def set_arg(
-    node: torch.fx.Node, arg_index: int, kwarg_name: str, value: torch.fx.node.Argument
+    node: torch.fx.Node, kwarg_name: str, value: torch.fx.node.Argument
 ) -> None:
     """
-    Set the arg at arg_index if it exists, otherwise set the kwarg.
+    Set the node's arg with its name to the given value.
     """
-    if arg_index < len(node.args):
-        node.update_arg(arg_index, value)
+    # Try to set the arg if it is present in kwargs first since this is faster
+    if kwarg_name in node.kwargs:
+        node.update_kwarg(kwarg_name, value)
+        return
+
+    # If it's not found in kwargs, try to normalize the args and set the arg
+    normalized_args = node.normalized_arguments(
+        node.graph.owning_module, normalize_to_only_use_kwargs=True
+    )
+    if not normalized_args:
+        raise RuntimeError(
+            f"set_arg: Node {node} does not support normalization of arguments"
+        )
+
+    kwargs = normalized_args.kwargs
+    if kwarg_name not in kwargs:
+        raise ValueError(f"set_arg: invalid arg name {kwarg_name} for node {node} used")
+
+    idx = list(kwargs.keys()).index(kwarg_name)
+    if idx < len(node.args):
+        node.update_arg(idx, value)
     else:
         node.update_kwarg(kwarg_name, value)
@@ -6,7 +6,7 @@
 
 # pyre-strict
 
-from typing import Any, List, Optional
+from typing import Any, Callable, cast, List, Optional
 
 import torch
 import torch.fx
@@ -28,13 +28,18 @@
     RemoveRedundantOps,
 )
 from executorch.backends.cadence.aot.reorder_ops import CadenceReorderOpsInGraph
-from executorch.backends.cadence.aot.replace_ops import CadenceReplaceOpsInGraph
+from executorch.backends.cadence.aot.replace_ops import (
+    CadenceReplaceOpsInGraph,
+    ReplaceMulTensorWithMulAndFullOpsPass,
+)
 from executorch.backends.cadence.aot.simplify_ops import CadenceSimplifyOpsInGraph
+from executorch.exir import EdgeProgramManager
 from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.pass_manager import PassManager, PassType
 from executorch.exir.passes import dead_code_elimination_pass
 from executorch.exir.passes.scalar_to_tensor_pass import ScalarToTensorPass
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
+from torch.export.exported_program import ExportedProgram
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=0))
@@ -89,14 +94,37 @@ def get_passes_in_default_order() -> List[ExportPass]:
     return pytree.tree_flatten(passes)[0]
 
 
-def get_cadence_passes(
+def apply_exir_ops_passes(
     opt_level: int,
-) -> List[Optional[PassResult]]:
+    edge_prog_manager: EdgeProgramManager,
+) -> EdgeProgramManager:
     passes = get_passes_in_default_order()
     pass_filter = create_cadence_pass_filter(opt_level)
-    filtered_passes = [
-        # pyre-ignore[20]: Expect argument graph_module
-        filtered_pass()
+    cadence_passes = [
+        (
+            lambda graph_module, filtered_pass=filtered_pass: filtered_pass()(
+                graph_module
+            )
+        )
         for filtered_pass in list(filter(pass_filter, passes))
     ]
-    return filtered_passes
+    cadence_prog_manager = edge_prog_manager.transform(
+        cast(
+            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
+        )
+    )
+    return cadence_prog_manager
+
+
+def apply_torch_ops_passes(expo_program: ExportedProgram) -> ExportedProgram:
+    """
+    Applies compiler passes on torch.ops IR, including torch.ops.aten, torch.ops.cadence, etc.
+    expo_program is expected to be the output of the torch.export.export().
+    """
+
+    aten_passes: List[Callable[[torch.fx.GraphModule], Optional[PassResult]]] = [
+        ReplaceMulTensorWithMulAndFullOpsPass()
+    ]
+    # TODO(T230417247): Use PassResult which is currently ignored.
+    PassManager(aten_passes)(expo_program.graph_module)
+    return expo_program
@@ -779,17 +779,17 @@ def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
         for slice_copy_node in graph_module.graph.find_nodes(
             op="call_function", target=exir_ops.edge.aten.slice_copy.Tensor
         ):
-            cat_node = cast(Node, get_arg(slice_copy_node, 0, "input"))
-            slice_dim = cast(int, get_arg(slice_copy_node, 1, "dim", default=0))
-            start_idx = cast(int, get_arg(slice_copy_node, 2, "start", default=None))
-            end_idx = cast(int, get_arg(slice_copy_node, 3, "end", default=None))
-            step = cast(int, get_arg(slice_copy_node, 4, "step", default=1))
+            cat_node = cast(Node, get_arg(slice_copy_node, "input"))
+            slice_dim = cast(int, get_arg(slice_copy_node, "dim"))
+            start_idx = cast(int, get_arg(slice_copy_node, "start"))
+            end_idx = cast(int, get_arg(slice_copy_node, "end"))
+            step = cast(int, get_arg(slice_copy_node, "step"))
 
             if cat_node.target != exir_ops.edge.aten.cat.default or step != 1:
                 continue
 
             # Make sure cat and slice happens on the same dimension.
-            cat_dim = cast(Node, get_arg(cat_node, 1, "dim", default=0))
+            cat_dim = cast(Node, get_arg(cat_node, "dim"))
             if cat_dim != slice_dim:
                 continue
 
@@ -805,14 +805,14 @@ def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
                 end_idx += cat_output_shape[cat_dim]
 
             offset = 0
-            for cat_input_node in cast(List[Node], get_arg(cat_node, 0, "tensors")):
+            for cat_input_node in cast(List[Node], get_arg(cat_node, "tensors")):
                 cat_input_shape = cat_input_node.meta["val"].shape
 
                 # Check if the slice range overlaps with the cat input range.
                 if offset <= start_idx and end_idx <= offset + cat_input_shape[cat_dim]:
                     slice_copy_node.replace_input_with(cat_node, cat_input_node)
-                    set_arg(slice_copy_node, 2, "start", start_idx - offset)
-                    set_arg(slice_copy_node, 3, "end", end_idx - offset)
+                    set_arg(slice_copy_node, "start", start_idx - offset)
+                    set_arg(slice_copy_node, "end", end_idx - offset)
                     break
 
                 offset += cat_input_shape[cat_dim]
 
@@ -0,0 +1,5 @@
+load("targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f`
	`1`	`+7cda4017ddda554752e89069ae205be5e8388f59`