Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion optimum/commands/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..neuron.version import __sdk_version__ as neuron_sdk_version
from ..neuron.version import __version__ as optimum_neuron_version
from ..version import __version__ as optimum_version
from . import BaseOptimumCLICommand, CommandInfo
from .base import BaseOptimumCLICommand, CommandInfo


class EnvironmentCommand(BaseOptimumCLICommand):
Expand Down
2 changes: 1 addition & 1 deletion optimum/commands/export/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from argparse import ArgumentParser, Namespace, _SubParsersAction
from pathlib import Path

from ...exporters import TasksManager
from ...exporters.tasks import TasksManager
from ..base import BaseOptimumCLICommand, CommandInfo


Expand Down
2 changes: 1 addition & 1 deletion optimum/commands/export/neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from argparse import SUPPRESS, ArgumentParser, Namespace, _SubParsersAction
from pathlib import Path

from ...exporters import TasksManager
from ...exporters.tasks import TasksManager
from ...neuron.utils.instance import SUPPORTED_INSTANCE_TYPES, normalize_instance_type
from ...neuron.utils.system import get_neuron_major
from ..base import BaseOptimumCLICommand, CommandInfo
Expand Down
2 changes: 1 addition & 1 deletion optimum/commands/register/register_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""Registers the export command for Neuron to the Optimum CLI."""

from ...neuron.utils import is_neuron_available, is_neuronx_available
from ..export import ExportCommand
from ..export.base import ExportCommand


_neuron_export_command_was_imported = False
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def get_input_shapes(task: str, args: argparse.Namespace) -> dict[str, int]:
def get_neuron_config_class(task: str, model_id: str) -> NeuronExportConfig:
config = AutoConfig.from_pretrained(model_id)

model_type = config.model_type.replace("_", "-")
model_type = config.model_type
if config.is_encoder_decoder:
model_type = model_type + "-encoder"

Expand Down
6 changes: 2 additions & 4 deletions optimum/exporters/neuron/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,11 +452,9 @@ class LevitNeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager(
"mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
@register_in_tasks_manager("mobilenet_v2", *["feature-extraction", "image-classification", "semantic-segmentation"])
class MobileNetV2NeuronConfig(ViTNeuronConfig):
MODEL_TYPE = "mobilenet-v2"
MODEL_TYPE = "mobilenet_v2"
pass


Expand Down
8 changes: 0 additions & 8 deletions optimum/exporters/neuron/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,16 @@
neuron_scaled_dot_product_attention,
)
from ...utils import (
DIFFUSERS_MINIMUM_VERSION,
check_if_diffusers_greater,
is_diffusers_available,
logging,
)
from ...utils.import_utils import _diffusers_version
from ..tasks import TasksManager


logger = logging.get_logger()


if is_diffusers_available():
if not check_if_diffusers_greater(DIFFUSERS_MINIMUM_VERSION.base_version):
raise ImportError(
f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. "
"Please update diffusers by running `pip install --upgrade diffusers`"
)
from diffusers import (
ControlNetModel,
DiffusionPipeline,
Expand Down
3 changes: 1 addition & 2 deletions optimum/neuron/cache/hub_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
from huggingface_hub import HfApi, get_token
from huggingface_hub.errors import EntryNotFoundError
from huggingface_hub.hf_api import RepoFile

from optimum.exporters import TasksManager
from optimum.exporters.tasks import TasksManager

from ..utils.argument_utils import DTYPE_MAPPER
from ..utils.cache_utils import get_hf_hub_cache_repo
Expand Down
6 changes: 0 additions & 6 deletions optimum/neuron/modeling_traced.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,6 @@ def _from_pretrained(
neuron_config=neuron_config,
)

@classmethod
def _from_transformers(cls, *args, **kwargs):
# Deprecate it when optimum uses `_export` as from_pretrained_method in a stable release.
return cls._export(*args, **kwargs)

@classmethod
def _export(
cls,
Expand Down Expand Up @@ -492,7 +487,6 @@ def _neuron_config_init(cls, config: "PretrainedConfig") -> "NeuronDefaultConfig
task = neuron_config.get("task", None) or TasksManager.infer_task_from_model(cls.auto_model_class)
task = TasksManager.map_from_synonym(task)
model_type = neuron_config.get("model_type", None) or config.model_type
model_type = model_type.replace("_", "-")
neuron_config_constructor = TasksManager.get_exporter_config_constructor(
model_type=model_type,
exporter="neuron",
Expand Down
3 changes: 2 additions & 1 deletion optimum/neuron/utils/argument_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class InputShapesArguments:
feature_size: int | None = None
nb_max_frames: int | None = None
audio_sequence_length: int | None = None
visual_seq_length: int | None = None
point_batch_size: int | None = None
nb_points_per_image: int | None = None
num_beams: int | None = None
Expand Down Expand Up @@ -301,7 +302,7 @@ def store_compilation_config(
original_model_type = getattr(config, "export_model_type", None) or getattr(
config, "model_type", None
) # prioritize sentence_transformers to transformers
neuron_model_type = str(model_type).replace("_", "-") if model_type is not None else model_type
neuron_model_type = str(model_type) if model_type is not None else model_type
if original_model_type is None:
update_func(
"model_type", neuron_model_type
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ classifiers = [
dependencies = [
"transformers ~= 4.57.1",
"accelerate == 1.8.1",
"optimum ~= 1.24.0",
"optimum ~= 2.0.0",
"huggingface_hub >= 0.31.4",
"numpy>=1.22.2, <=1.26.4",
"protobuf>=3.20.3, <4",
Expand Down
5 changes: 2 additions & 3 deletions tests/exporters/exporters_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"hubert": "hf-internal-testing/tiny-random-HubertModel",
"levit": "hf-internal-testing/tiny-random-LevitModel",
"mobilebert": "hf-internal-testing/tiny-random-MobileBertModel",
"mobilenet-v2": "hf-internal-testing/tiny-random-MobileNetV2Model",
"mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model",
# "mobilevit": "hf-internal-testing/tiny-random-mobilevit", # blocked since neuron sdk 2.23: timeout
"modernbert": "hf-internal-testing/tiny-random-ModernBertModel",
"mpnet": "hf-internal-testing/tiny-random-MPNetModel",
Expand Down Expand Up @@ -80,7 +80,7 @@
"clip": "sentence-transformers/clip-ViT-B-32",
}

WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH = ["camembert", "roberta", "mobilenet-v2"]
WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH = ["camembert", "roberta", "mobilenet_v2"]

# Diffusers

Expand Down Expand Up @@ -109,7 +109,6 @@ def get_models_to_test(
):
models_to_test = []
for model_type, model_names_tasks in export_models_dict.items():
model_type = model_type.replace("_", "-")
if exclude_model_types is None or (model_type not in exclude_model_types):
task_config_mapping = TasksManager.get_supported_tasks_for_model_type(
model_type, "neuron", library_name=library_name
Expand Down
6 changes: 0 additions & 6 deletions tests/exporters/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from optimum.utils.testing_utils import require_sentence_transformers
from parameterized import parameterized
from transformers import AutoConfig, AutoModelForSeq2SeqLM, set_seed
from transformers import __version__ as transformers_version
from transformers.testing_utils import slow

from optimum.exporters.neuron import (
Expand Down Expand Up @@ -85,11 +84,6 @@ def _neuronx_export(
dynamic_batch_size: bool = False,
inline_weights_to_neff: bool = True,
):
# REMOVEME: convnextv2 contains a bug in the GRN layer, which is used in the convnextv2 model, but the bug has
# been fixed in the transformers library on newer versions. For more info see:
# https://github.com/huggingface/transformers/issues/38015
if model_type == "convnextv2" and transformers_version.startswith("4.51"):
self.skipTest("convnextv2 contains a bug in this version of transformers.")
library_name = TasksManager.infer_library_from_model(model_name)
if library_name == "sentence_transformers":
model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name)
Expand Down
4 changes: 3 additions & 1 deletion tests/inference/inference_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"gpt2": "hf-internal-testing/tiny-random-gpt2",
"levit": "hf-internal-testing/tiny-random-LevitModel",
"mobilebert": "hf-internal-testing/tiny-random-MobileBertModel",
"mobilenet-v2": "hf-internal-testing/tiny-random-MobileNetV2Model",
"mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model",
"mobilevit": "hf-internal-testing/tiny-random-mobilevit",
"modernbert": "hf-internal-testing/tiny-random-ModernBertModel",
"mpnet": "hf-internal-testing/tiny-random-MPNetModel",
Expand Down Expand Up @@ -108,6 +108,7 @@ def tearDownClass(cls):

class NeuronModelTestMixin(unittest.TestCase):
ARCH_MODEL_MAP = {}
TASK = None
STATIC_INPUTS_SHAPES = {"batch_size": 1, "sequence_length": 32}

@classmethod
Expand Down Expand Up @@ -141,6 +142,7 @@ def _setup(self, model_args: Dict):
model_id,
**model_args,
export=True,
task=self.TASK,
torch_dtype=torch.float32,
dynamic_batch_size=dynamic_batch_size,
**self.STATIC_INPUTS_SHAPES,
Expand Down
49 changes: 2 additions & 47 deletions tests/inference/transformers/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@
PretrainedConfig,
set_seed,
)
from transformers import (
__version__ as transformers_version,
)

from optimum.neuron import (
NeuronModelForAudioClassification,
Expand Down Expand Up @@ -483,14 +480,6 @@ def _validate_outputs(self, model_arch, suffix, batch_size):
f"Inference results between pytorch model and neuron model of {model_arch} not close enough."
)

def test_load_vanilla_transformers_which_is_not_supported(self):
with self.assertRaises(Exception) as context:
_ = NeuronModelForMaskedLM.from_pretrained(
"hf-internal-testing/tiny-random-t5", from_transformers=True, **self.STATIC_INPUTS_SHAPES
)

self.assertIn("Unrecognized configuration class", str(context.exception))

@parameterized.expand(SUPPORTED_ARCHITECTURES, skip_on_empty=True)
def test_compare_to_transformers_non_dyn_bs(self, model_arch):
model_args = {
Expand Down Expand Up @@ -603,14 +592,6 @@ def _validate_outputs(self, model_arch, suffix, batch_size):
if not result_close_end_logits:
warnings.warn(f"End logits between pytorch model and neuron model of {model_arch} not close enough.")

def test_load_vanilla_transformers_which_is_not_supported(self):
with self.assertRaises(Exception) as context:
_ = NeuronModelForQuestionAnswering.from_pretrained(
"hf-internal-testing/tiny-random-t5", from_transformers=True, **self.STATIC_INPUTS_SHAPES
)

assert ("doesn't support" in str(context.exception)) or ("is not supported" in str(context.exception))

def test_compare_to_transformers_dyn_bs(self):
model_arch = "albert"
# Neuron model with dynamic batching
Expand Down Expand Up @@ -719,14 +700,6 @@ def _validate_outputs(self, model_arch, suffix, batch_size):
f"Inference results between pytorch model and neuron model of {model_arch} not close enough."
)

def test_load_vanilla_transformers_which_is_not_supported(self):
with self.assertRaises(Exception) as context:
_ = NeuronModelForSequenceClassification.from_pretrained(
"hf-internal-testing/tiny-random-t5", from_transformers=True, **self.STATIC_INPUTS_SHAPES
)

assert ("doesn't support" in str(context.exception)) or ("is not supported" in str(context.exception))

@parameterized.expand(SUPPORTED_ARCHITECTURES, skip_on_empty=True)
def test_compare_to_transformers_non_dyn_bs(self, model_arch):
model_args = {
Expand Down Expand Up @@ -831,14 +804,6 @@ def _validate_outputs(self, model_arch, suffix, batch_size):
f"Inference results between pytorch model and neuron model of {model_arch} not close enough."
)

def test_load_vanilla_transformers_which_is_not_supported(self):
with self.assertRaises(Exception) as context:
_ = NeuronModelForTokenClassification.from_pretrained(
"hf-internal-testing/tiny-random-t5", from_transformers=True, **self.STATIC_INPUTS_SHAPES
)

assert ("doesn't support" in str(context.exception)) or ("is not supported" in str(context.exception))

@parameterized.expand(SUPPORTED_ARCHITECTURES, skip_on_empty=True)
def test_compare_to_transformers_non_dyn_bs(self, model_arch):
model_args = {
Expand Down Expand Up @@ -987,7 +952,7 @@ class NeuronModelForImageClassificationIntegrationTest(NeuronModelTestMixin):
"cvt",
"deit",
"levit",
"mobilenet-v2",
"mobilenet_v2",
"mobilevit",
"swin",
"vit",
Expand Down Expand Up @@ -1046,11 +1011,6 @@ def test_compare_to_transformers_non_dyn_bs(self, model_arch):
"model_arch": model_arch,
"dynamic_batch_size": False,
}
# REMOVEME: convnextv2 contains a bug in the GRN layer, which is used in the convnextv2 model, but the bug has
# been fixed in the transformers library on newer versions. For more info see:
# https://github.com/huggingface/transformers/issues/38015
if model_arch == "convnextv2" and transformers_version.startswith("4.51"):
self.skipTest("convnextv2 contains a bug in this version of transformers.")
self._setup(model_args)
self._validate_outputs(model_arch, "_dyn_bs_false", batch_size=1)

Expand All @@ -1064,11 +1024,6 @@ def test_compare_to_transformers_dyn_bs(self):
"model_arch": model_arch,
"dynamic_batch_size": True,
}
# REMOVEME: convnextv2 contains a bug in the GRN layer, which is used in the convnextv2 model, but the bug has
# been fixed in the transformers library on newer versions. For more info see:
# https://github.com/huggingface/transformers/issues/38015
if model_arch == "convnextv2" and transformers_version.startswith("4.51"):
self.skipTest("convnextv2 contains a bug in this version of transformers.")
self._setup(model_args)
self._validate_outputs(model_arch, "_dyn_bs_true", batch_size=2)

Expand Down Expand Up @@ -1097,7 +1052,7 @@ class NeuronModelForSemanticSegmentationIntegrationTest(NeuronModelTestMixin):
TASK = "semantic-segmentation"
ATOL_FOR_VALIDATION = 1e-3
SUPPORTED_ARCHITECTURES = [
"mobilenet-v2",
"mobilenet_v2",
"mobilevit",
]

Expand Down
2 changes: 1 addition & 1 deletion tools/cache/auto_fill_diffusion_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import time

import requests
from optimum.exporters.tasks import TasksManager

from optimum.exporters import TasksManager
from optimum.neuron.utils.instance import SUPPORTED_INSTANCE_TYPES


Expand Down
2 changes: 1 addition & 1 deletion tools/cache/auto_fill_llm_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import time

import requests
from optimum.exporters.tasks import TasksManager

from optimum.exporters import TasksManager
from optimum.neuron.utils.instance import SUPPORTED_INSTANCE_TYPES


Expand Down