Skip to content

Commit 3fdeee8

Browse files
authored
Merge branch 'main' into kylesayrs/calib
2 parents 3003c83 + 4caf540 commit 3fdeee8

28 files changed

+193
-166
lines changed

.github/workflows/linkcheck.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@ name: Check Markdown links
22

33
on:
44
push:
5-
branches:
6-
- main
5+
branches: [ main, 'release/*' ]
76
pull_request:
8-
branches:
9-
- main
7+
branches: [ main, 'release/*' ]
108

119
# Allows you to run this workflow manually from the Actions tab
1210
workflow_dispatch:

.github/workflows/quality-check.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
name: Quality Checks
22
on:
33
push:
4-
branches:
5-
- main
6-
- 'release/*'
4+
branches: [ main , 'release/*' ]
75
pull_request:
8-
branches:
9-
- main
10-
- 'release/*'
6+
branches: [ main, 'release/*' ]
117
jobs:
128
quality-check:
139
runs-on: ubuntu-22.04

.github/workflows/set-comment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: PR Reminder Comment Bot
22
on:
33
pull_request_target:
4-
branches: [main]
4+
branches: [ main, 'release/*' ]
55
types: [opened]
66

77
jobs:

.github/workflows/test-check-transformers.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
name: Test Checks (Transformers)
22
on:
33
pull_request:
4-
branches: [ main ]
4+
branches: [ main, 'release/*' ]
55
types: [ labeled, synchronize ]
66
push:
7-
branches: [ main ]
7+
branches: [ main, 'release/*' ]
88
workflow_dispatch:
99
inputs:
1010
code_coverage:

.github/workflows/test-check.yaml

Lines changed: 3 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
name: Test Checks (Base/PyTorch)
22
on:
33
pull_request:
4-
branches:
5-
- main
4+
branches: [ main, 'release/*' ]
65
push:
6+
branches: [ main, 'release/*' ]
77
workflow_dispatch:
88
inputs:
99
code_coverage:
@@ -115,59 +115,9 @@ jobs:
115115
run: |
116116
coverage report --data-file="$COVERAGE_FILE" --skip-empty --format="markdown" > "$GITHUB_STEP_SUMMARY"
117117
118-
compat-pytorch-1_9-pytorch-tests:
119-
runs-on: ubuntu-22.04
120-
env:
121-
COVERAGE_FILE: ".coverage.compat-pytorch-1.9"
122-
steps:
123-
- uses: actions/setup-python@v5
124-
with:
125-
python-version: '3.10'
126-
- uses: actions/checkout@v4
127-
with:
128-
fetch-depth: 0
129-
fetch-tags: true
130-
- name: "⚙️ Install dependencies"
131-
run: pip3 install -U pip setuptools && pip3 install .[dev]
132-
- uses: actions/checkout@v4
133-
with:
134-
repository: "neuralmagic/compressed-tensors"
135-
path: "compressed-tensors"
136-
fetch-depth: 0
137-
fetch-tags: true
138-
- name: "⚙️ Install compressed-tensors dependencies"
139-
run: |
140-
pip3 uninstall -y compressed-tensors
141-
export GIT_CEILING_DIRECTORIES="$(pwd)"
142-
cd compressed-tensors
143-
BUILD_TYPE=nightly pip3 install .
144-
- name: "Clean compressed-tensors directory"
145-
run: rm -r compressed-tensors/
146-
- name: "⚙️ Prepare code coverage"
147-
if: inputs.code_coverage
148-
uses: ./.github/actions/prepare-code-coverage
149-
- name: "🔬 Running pytorch tests"
150-
run: |
151-
pytest -v tests/llmcompressor/pytorch
152-
- name: "Upload coverage report"
153-
if: (success() || failure()) && inputs.code_coverage
154-
uses: actions/upload-artifact@v4
155-
with:
156-
name: compat-pytorch-tests-coverage-results
157-
path: |
158-
.coverage*
159-
coverage-html
160-
coverage.json
161-
include-hidden-files: true
162-
retention-days: 5
163-
- name: "Report coverage"
164-
if: (success() || failure()) && inputs.code_coverage
165-
run: |
166-
coverage report --data-file="$COVERAGE_FILE" --skip-empty --format="markdown" > "$GITHUB_STEP_SUMMARY"
167-
168118
combine-coverage:
169119
runs-on: ubuntu-22.04
170-
needs: [base-tests, pytorch-tests, compat-pytorch-1_9-pytorch-tests]
120+
needs: [base-tests, pytorch-tests]
171121
if: (success() || failure()) && inputs.code_coverage
172122
steps:
173123
- name: "Checkout llm-compressor"

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ endif
2525
quality:
2626
@echo "Running python quality checks";
2727
ruff check $(CHECKDIRS);
28+
ruff format --check $(CHECKDIRS);
2829
isort --check-only $(CHECKDIRS);
2930
flake8 $(CHECKDIRS) --max-line-length 88 --extend-ignore E203,W605;
3031

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ skip = ["src/llmcompressor/transformers/tracing/", "src/llmcompressor/version.py
1414
files = "src/guidellm"
1515

1616
[tool.ruff]
17-
exclude = ["build", "dist", "env", ".venv", "src/llmcompressor/transformers/tracing/"]
17+
exclude = ["build", "dist", "env", ".venv", "src/llmcompressor/transformers/tracing/", "src/llmcompressor/version.py"]
1818
lint.select = ["E", "F", "W"]
1919
lint.extend-ignore = ["E203", "W605"]
2020

setup.py

Lines changed: 8 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -110,63 +110,31 @@ def localversion_func(version: ScmVersion) -> str:
110110
"src", include=["llmcompressor", "llmcompressor.*"], exclude=["*.__pycache__.*"]
111111
),
112112
install_requires=[
113-
(
114-
"loguru>=0.7.2,<=0.7.3"
115-
if BUILD_TYPE == "release"
116-
else "loguru>=0.7.2"
117-
),
118-
(
119-
"pyyaml>=6.0.1,<=6.0.2"
120-
if BUILD_TYPE == "release"
121-
else "pyyaml>=6.0.1"
122-
),
113+
("loguru>=0.7.2,<=0.7.3" if BUILD_TYPE == "release" else "loguru>=0.7.2"),
114+
("pyyaml>=6.0.1,<=6.0.2" if BUILD_TYPE == "release" else "pyyaml>=6.0.1"),
123115
# librosa dependency numba is currently not compatible with numpy>=2.3
124116
# https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information
125-
(
126-
"numpy>=2.0.0,<=2.3.2"
127-
if BUILD_TYPE == "release"
128-
else "numpy>=2.0.0"
129-
),
117+
("numpy>=2.0.0,<=2.3.2" if BUILD_TYPE == "release" else "numpy>=2.0.0"),
130118
(
131119
"requests>=2.32.2,<=2.32.5"
132120
if BUILD_TYPE == "release"
133121
else "requests>=2.32.2"
134122
),
135-
(
136-
"tqdm>=4.66.3,<=4.67.1"
137-
if BUILD_TYPE == "release"
138-
else "tqdm>=4.66.3"
139-
),
140-
(
141-
"torch>=2.7.0,<=2.8.0"
142-
if BUILD_TYPE == "release"
143-
else "torch>=2.7.0"
144-
),
123+
("tqdm>=4.66.3,<=4.67.1" if BUILD_TYPE == "release" else "tqdm>=4.66.3"),
124+
("torch>=2.7.0,<=2.8.0" if BUILD_TYPE == "release" else "torch>=2.7.0"),
145125
(
146126
"transformers>=4.53.0,<=4.55.2"
147127
if BUILD_TYPE == "release"
148128
else "transformers>=4.53.0"
149129
),
150-
(
151-
"datasets>=4.0.0,<=4.0.0"
152-
if BUILD_TYPE == "release"
153-
else "datasets>=4.0.0"
154-
),
130+
("datasets>=4.0.0,<=4.0.0" if BUILD_TYPE == "release" else "datasets>=4.0.0"),
155131
(
156132
"accelerate>=1.6.0,<=1.10.0"
157133
if BUILD_TYPE == "release"
158134
else "accelerate>=1.6.0"
159135
),
160-
(
161-
"pynvml>=11.5.3,<=12.0.0"
162-
if BUILD_TYPE == "release"
163-
else "pynvml>=11.5.3"
164-
),
165-
(
166-
"pillow>=10.4.0,<=10.4.0"
167-
if BUILD_TYPE == "release"
168-
else "pillow>=10.4.0"
169-
),
136+
("pynvml>=11.5.3,<=12.0.0" if BUILD_TYPE == "release" else "pynvml>=11.5.3"),
137+
("pillow>=10.4.0,<=10.4.0" if BUILD_TYPE == "release" else "pillow>=10.4.0"),
170138
(
171139
"compressed-tensors==0.11.0"
172140
if BUILD_TYPE == "release"

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ class GPTQModifier(Modifier, QuantizationMixin):
7272
:param block_size: Used to determine number of columns to compress in one pass
7373
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
7474
diagonal norm
75-
:param actorder: order in which weight columns are quantized. For more information,
76-
on actorder options, see https://github.com/vllm-project/vllm/pull/8135
75+
:param actorder: order in which weight columns are quantized. Defaults to "static"
76+
activation ordering, which achieves best accuracy recovery with no runtime cost.
77+
For more information, see https://github.com/vllm-project/vllm/pull/8135
7778
:param offload_hessians: Set to True for decreased memory usage but increased
7879
runtime.
7980
@@ -106,7 +107,7 @@ class GPTQModifier(Modifier, QuantizationMixin):
106107
sequential_targets: Union[str, List[str], None] = None
107108
block_size: int = 128
108109
dampening_frac: Optional[float] = 0.01
109-
actorder: Optional[Union[ActivationOrdering, Sentinel]] = None
110+
actorder: Optional[Union[ActivationOrdering, Sentinel]] = Sentinel("static")
110111
offload_hessians: bool = False
111112

112113
# private variables
@@ -134,18 +135,17 @@ def resolve_actorder(existing):
134135
return ActivationOrdering.STATIC if existing is None else existing
135136

136137
# user-provided value always attempts to override
137-
if self.actorder is not None:
138-
if existing is None or self.actorder == existing:
139-
return self.actorder
140-
raise ValueError(
141-
"Cannot resolve activation ordering when both "
142-
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
143-
"are provided and differ. Either set `GPTQModifier.actorder = "
144-
"None` or remove `actorder` from config groups."
145-
)
138+
if existing is None or self.actorder == existing:
139+
return self.actorder
146140

147-
# setting `GPTQModifier.actorder = None` does nothing
148-
return existing
141+
# if existing provided and conflicts
142+
raise ValueError(
143+
"Cannot resolve activation ordering when both "
144+
"`GPTQModifier.actorder` and `QuantizationScheme.actorder` "
145+
f"are provided and differ ({self.actorder}, {existing}). "
146+
"Either unset `GPTQModifier.actorder` or "
147+
"remove `actorder` from config groups."
148+
)
149149

150150
for scheme in config.config_groups.values():
151151
assert isinstance(scheme, QuantizationScheme)

tests/e2e/e2e_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ def data_collator(batch):
7070
# a compatible preset sceme
7171
if quant_type == "GPTQ":
7272
oneshot_kwargs["recipe"] = GPTQModifier(
73-
targets="Linear", scheme=scheme, ignore=["lm_head"]
73+
targets="Linear",
74+
scheme=scheme,
75+
actorder=None, # added for consistency with past testing configs
76+
ignore=["lm_head"],
7477
)
7578
else:
7679
oneshot_kwargs["recipe"] = QuantizationModifier(

0 commit comments

Comments
 (0)