Skip to content

Commit ad54bc1

Browse files
authored
bump: try deepspeed >=0.14.1,<=0.15.0 (#21076)
* try `deepspeed >=0.14.1,<=0.15.0` * drop from oldest * pip uninstall -y deepspeed * error::DeprecationWarning
1 parent 8ff43d4 commit ad54bc1

File tree

10 files changed

+31
-40
lines changed

10 files changed

+31
-40
lines changed

.actions/assistant.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -341,33 +341,6 @@ def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> N
341341

342342

343343
class AssistantCLI:
344-
@staticmethod
345-
def requirements_prune_pkgs(packages: Sequence[str], req_files: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
346-
"""Remove some packages from given requirement files."""
347-
if isinstance(req_files, str):
348-
req_files = [req_files]
349-
for req in req_files:
350-
AssistantCLI._prune_packages(req, packages)
351-
352-
@staticmethod
353-
def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
354-
"""Remove some packages from given requirement files."""
355-
path = Path(req_file)
356-
assert path.exists()
357-
text = path.read_text()
358-
lines = text.splitlines()
359-
final = []
360-
for line in lines:
361-
ln_ = line.strip()
362-
if not ln_ or ln_.startswith("#"):
363-
final.append(line)
364-
continue
365-
req = list(_parse_requirements([ln_]))[0]
366-
if req.name not in packages:
367-
final.append(line)
368-
print(final)
369-
path.write_text("\n".join(final) + "\n")
370-
371344
@staticmethod
372345
def copy_replace_imports(
373346
source_dir: str,

.azure/gpu-tests-fabric.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,16 @@ jobs:
9999
displayName: "Image info & NVIDIA"
100100
101101
- bash: |
102-
cd requirements/fabric
102+
set -ex
103+
pip install "cython<3.0" wheel # for compatibility
103104
pip install -U "lightning-utilities[cli]"
105+
cd requirements/fabric
106+
# replace range by pin minimal requirements
104107
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'strategies.txt']"
105-
pip install "cython<3.0" wheel # for compatibility
108+
# drop deepspeed since it is not supported by our minimal Torch requirements
109+
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
110+
# uninstall deepspeed since some older docker images have it pre-installed
111+
pip uninstall -y deepspeed
106112
condition: contains(variables['Agent.JobName'], 'oldest')
107113
displayName: "setting oldest dependencies"
108114

.azure/gpu-tests-pytorch.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,16 @@ jobs:
103103
displayName: "Image info & NVIDIA"
104104
105105
- bash: |
106-
cd requirements/pytorch
106+
set -ex
107+
pip install "cython<3.0" wheel # for compatibility
107108
pip install -U "lightning-utilities[cli]"
109+
cd requirements/pytorch
110+
# replace range by pin minimal requirements
108111
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'extra.txt', 'strategies.txt', 'examples.txt']"
109-
pip install "cython<3.0" wheel # for compatibility
112+
# drop deepspeed since it is not supported by our minimal Torch requirements
113+
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
114+
# uninstall deepspeed since some older docker images have it pre-installed
115+
pip uninstall -y deepspeed
110116
condition: contains(variables['Agent.JobName'], 'oldest')
111117
displayName: "setting oldest dependencies"
112118

dockers/release/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ FROM pytorchlightning/pytorch_lightning:base-cuda${CUDA_VERSION}-py${PYTHON_VERS
2121
LABEL maintainer="Lightning-AI <https://github.com/Lightning-AI>"
2222

2323
ARG LIGHTNING_VERSION=""
24+
ARG PYTORCH_VERSION
2425

2526
COPY ./ /home/pytorch-lightning/
2627

@@ -39,7 +40,14 @@ RUN \
3940
fi && \
4041
# otherwise there is collision with folder name and pkg name on Pypi
4142
cd pytorch-lightning && \
42-
pip install setuptools==75.6.0 && \
43+
# pip install setuptools==75.6.0 && \
44+
pip install -U "lightning-utilities[cli]" && \
45+
# drop deepspeed since it is not supported by our minimal Torch requirements \
46+
echo "PYTORCH_VERSION is: '$PYTORCH_VERSION'" && \
47+
if [[ "$PYTORCH_VERSION" =~ ^(2\.1|2\.2|2\.3|2\.4)$ ]]; then \
48+
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files requirements/fabric/strategies.txt ; \
49+
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files requirements/pytorch/strategies.txt ; \
50+
fi && \
4351
PACKAGE_NAME=lightning pip install '.[extra,loggers,strategies]' --no-cache-dir && \
4452
PACKAGE_NAME=pytorch pip install '.[extra,loggers,strategies]' --no-cache-dir && \
4553
cd .. && \

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ markers = [
179179
"cloud: Run the cloud tests for example",
180180
]
181181
filterwarnings = [
182+
# "error::DeprecationWarning",
182183
"error::FutureWarning",
183184
"ignore::FutureWarning:onnxscript", # Temporary ignore until onnxscript is updated
184185
]

requirements/fabric/strategies.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55

66
# note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods`
77
# shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372
8-
deepspeed >=0.9.3, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin" # strict
8+
deepspeed >=0.14.1,<=0.15.0; platform_system != "Windows" and platform_system != "Darwin" # strict
99
bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin"

requirements/pytorch/strategies.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
# note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods`
55
# shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372
6-
deepspeed >=0.9.3, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin" # strict
6+
deepspeed >=0.14.1,<=0.15.0; platform_system != "Windows" and platform_system != "Darwin" # strict

src/lightning/fabric/strategies/deepspeed.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
from torch.optim.lr_scheduler import _LRScheduler
4848

4949
_DEEPSPEED_AVAILABLE = RequirementCache("deepspeed")
50-
_DEEPSPEED_GREATER_EQUAL_0_14_1 = RequirementCache("deepspeed>=0.14.1")
5150

5251

5352
# TODO(fabric): Links in the docstrings to PL-specific deepspeed user docs need to be replaced.
@@ -503,10 +502,7 @@ def load_checkpoint(
503502
)
504503
engine = engines[0]
505504

506-
if _DEEPSPEED_GREATER_EQUAL_0_14_1:
507-
from deepspeed.runtime.base_optimizer import DeepSpeedOptimizer
508-
else:
509-
from deepspeed.runtime import DeepSpeedOptimizer
505+
from deepspeed.runtime.base_optimizer import DeepSpeedOptimizer
510506

511507
optimzer_state_requested = any(isinstance(item, (Optimizer, DeepSpeedOptimizer)) for item in state.values())
512508

tests/tests_fabric/strategies/launchers/test_multiprocessing_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(self):
3030

3131

3232
@RunIf(skip_windows=True)
33+
@pytest.mark.flaky(reruns=3)
3334
@pytest.mark.parametrize("strategy", ["ddp_spawn", "ddp_fork"])
3435
def test_memory_sharing_disabled(strategy):
3536
"""Test that the multiprocessing launcher disables memory sharing on model parameters and buffers to avoid race

tests/tests_pytorch/utilities/test_compile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
# https://github.com/pytorch/pytorch/issues/95708
3434
@pytest.mark.skipif(sys.platform == "darwin", reason="fatal error: 'omp.h' file not found")
35-
@RunIf(dynamo=True)
35+
@RunIf(dynamo=True, deepspeed=True)
3636
@mock.patch("lightning.pytorch.trainer.call._call_and_handle_interrupt")
3737
def test_trainer_compiled_model(_, tmp_path, monkeypatch, mps_count_0):
3838
trainer_kwargs = {

0 commit comments

Comments
 (0)