Skip to content

Commit a99d1bc

Browse files
authored
Merge branch 'main' into feature/refactor/main
2 parents 62b7317 + 7f2fbeb commit a99d1bc

File tree

16 files changed

+4385
-115
lines changed

16 files changed

+4385
-115
lines changed

.github/workflows/development.yml

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -180,59 +180,6 @@ jobs:
180180
- name: Run integration tests
181181
run: npm run test:integration
182182

183-
build:
184-
# Only build if the PR branch is local
185-
if: github.event.pull_request.head.repo.full_name == github.repository
186-
runs-on: ubuntu-latest
187-
strategy:
188-
matrix:
189-
python: ["3.9"]
190-
steps:
191-
- name: Checkout code
192-
uses: actions/checkout@v4
193-
with:
194-
fetch-depth: 0
195-
- name: Set up Python
196-
uses: actions/setup-python@v5
197-
with:
198-
python-version: ${{ matrix.python }}
199-
- name: Install dependencies
200-
run: pip install tox
201-
- name: Build the package
202-
run: |
203-
export GUIDELLM_BUILD_TYPE=dev
204-
export GUIDELLM_BUILD_ITERATION=${{ github.event.pull_request.number }}
205-
tox -e build
206-
- name: Upload build artifacts
207-
id: artifact-upload
208-
uses: actions/upload-artifact@v4
209-
with:
210-
name: build-artifacts
211-
path: dist/*
212-
compression-level: 6
213-
if-no-files-found: error
214-
retention-days: 30
215-
- name: Generate GitHub App token
216-
id: app-token
217-
uses: actions/create-github-app-token@v1
218-
with:
219-
app-id: ${{ secrets.GH_NM_REDHAT_AUTOMATION_APP_ID }}
220-
private-key: ${{ secrets.GH_NM_REDHAT_AUTOMATION_APP_PRIVATE_KEY }}
221-
- name: Comment Install instructions
222-
uses: actions/github-script@v7
223-
with:
224-
github-token: ${{ steps.app-token.outputs.token }}
225-
script: |
226-
github.rest.issues.createComment({
227-
issue_number: context.issue.number,
228-
owner: context.repo.owner,
229-
repo: context.repo.repo,
230-
body: `📦 **Build Artifacts Available**
231-
The build artifacts (\`.whl\` and \`.tar.gz\`) have been successfully generated and are available for download: ${{ steps.artifact-upload.outputs.artifact-url }}.
232-
They will be retained for **up to 30 days**.
233-
`
234-
})
235-
236183
ui-pr-preview:
237184
needs: [ui-quality-checks, ui-precommit-checks, ui-unit-tests, ui-integration-tests]
238185
permissions:

.pre-commit-config.yaml

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.6.0
3+
rev: v6.0.0
44
hooks:
55
- id: trailing-whitespace
66
exclude: ^tests/?.*/assets/.+
77
- id: end-of-file-fixer
88
exclude: ^tests/?.*/assets/.+
9+
- repo: https://github.com/pdm-project/pdm
10+
rev: 2.25.6
11+
hooks:
12+
- id: pdm-lock-check
13+
name: check lock file matches pyproject
914
- repo: https://github.com/astral-sh/ruff-pre-commit
1015
rev: v0.11.7
1116
hooks:
@@ -14,35 +19,3 @@ repos:
1419
args: [ --fix, --show-fixes ]
1520
- id: ruff-format
1621
name: run formatter
17-
- repo: https://github.com/pre-commit/mirrors-mypy
18-
rev: v1.15.0
19-
hooks:
20-
- id: mypy
21-
args: [--check-untyped-defs]
22-
additional_dependencies:
23-
[
24-
# main dependencies
25-
click,
26-
datasets,
27-
ftfy,
28-
loguru,
29-
numpy,
30-
pillow,
31-
pydantic,
32-
pydantic_settings,
33-
pyyaml,
34-
respx,
35-
rich,
36-
setuptools,
37-
setuptools-git-versioning,
38-
transformers,
39-
40-
# dev dependencies
41-
pytest,
42-
pydantic_settings,
43-
44-
# types
45-
types-PyYAML,
46-
types-requests,
47-
types-toml,
48-
]

docs/datasets.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ guidellm benchmark \
7676
- `output_tokens_stdev`: Standard deviation for output tokens. If not supplied and min/max are not specified, no deviation is applied. If not supplied and min/max are specified, a uniform distribution is used.
7777
- `output_tokens_min`: Minimum number of tokens in outputs. If unset and `output_tokens_stdev` is set, the minimum is 1.
7878
- `output_tokens_max`: Maximum number of tokens in outputs. If unset and `output_tokens_stdev` is set, the maximum is 5 times the standard deviation.
79+
- `prefix_tokens`: Number of tokens to share as a prefix across all prompts. Is additive to the prompt tokens distribution so each request is `prefix_tokens + prompt_tokens_sample()`. If unset, defaults to 0.
7980
- `samples`: Number of samples to generate (default: 1000). More samples will increase the time taken to generate the dataset before benchmarking, but will also decrease the likelihood of caching requests.
8081
- `source`: Source text for generation (default: `data:prideandprejudice.txt.gz`). This can be any text file, URL containing a text file, or a compressed text file. The text is used to sample from at a word and punctuation granularity and then combined into a single string of the desired lengths.
8182

pdm.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[lock]
2+
format = "pylock"

pylock.toml

Lines changed: 3366 additions & 0 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ include = ["*"]
1010
[tool.setuptools.package-data]
1111
"guidellm.data" = ["*.gz"]
1212

13+
[tool.pdm]
14+
distribution = true
15+
1316

1417
# ************************************************
1518
# ********** Project Metadata **********
@@ -101,6 +104,10 @@ dev = [
101104
"mkdocs-linkcheck~=1.0.6",
102105
]
103106

107+
# For PEP 735 compliant tools
108+
[dependency-groups]
109+
dev = [ "guidellm[dev]" ]
110+
104111
[project.urls]
105112
homepage = "https://github.com/vllm-project/guidellm"
106113
source = "https://github.com/vllm-project/guidellm"

src/guidellm/benchmark/benchmark.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,9 @@ def from_stats(
817817
],
818818
iter_counts=[req.output_tokens for req in total_with_output_first],
819819
first_iter_counts=[
820-
req.prompt_tokens for req in total_with_output_first
820+
# prompt tokens + first token
821+
req.prompt_tokens + 1
822+
for req in total_with_output_first
821823
],
822824
),
823825
),

src/guidellm/dataset/synthetic.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import random
33
from collections.abc import Iterable, Iterator
4+
from itertools import cycle
45
from pathlib import Path
56
from typing import Any, Literal, Optional, Union
67

@@ -25,6 +26,11 @@
2526

2627

2728
class SyntheticDatasetConfig(BaseModel):
29+
prefix_tokens: int = Field(
30+
description="The number of shared prefix tokens to prepend to each prompt.",
31+
ge=0,
32+
default=0,
33+
)
2834
prompt_tokens: int = Field(
2935
description="The average number of text tokens generated for prompts.",
3036
gt=0,
@@ -163,39 +169,54 @@ def __iter__(
163169
)
164170
# ensure diff distribution from output tokens
165171
rand = random.Random(self.random_seed + 2) # noqa: S311
172+
unique_prefix_iter = cycle(self.processor.get_vocab().values())
173+
174+
prefix_index = rand.randint(0, len(self.text_creator.words))
175+
prefix_tokens = self._create_prompt(self.config.prefix_tokens, prefix_index)
166176

167177
for _, prompt_tokens, output_tokens in zip(
168178
range(self.config.samples),
169179
prompt_tokens_sampler,
170180
output_tokens_sampler,
171181
):
172182
start_index = rand.randint(0, len(self.text_creator.words))
183+
prompt_text = self.processor.decode(
184+
prefix_tokens
185+
+ self._create_prompt(
186+
prompt_tokens, start_index, next(unique_prefix_iter)
187+
),
188+
skip_special_tokens=True,
189+
)
173190
yield {
174-
"prompt": self._create_prompt(prompt_tokens, start_index),
175-
"prompt_tokens_count": prompt_tokens,
191+
"prompt": prompt_text,
192+
"prompt_tokens_count": self.config.prefix_tokens + prompt_tokens,
176193
"output_tokens_count": output_tokens,
177194
}
178195

179-
def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
196+
def _create_prompt(
197+
self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
198+
) -> list[int]:
180199
if prompt_tokens <= 0:
181-
return ""
200+
return []
182201

183202
left = start_index
184203
right = start_index + 4 * prompt_tokens
204+
start_tokens = [unique_prefix] if unique_prefix else []
185205

186206
while left < right:
187207
mid = (left + right) // 2
188208
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
189-
test_tokens = len(self.processor.tokenize(test_prompt))
209+
test_tokens = start_tokens + self.processor.encode(test_prompt)
190210

191-
if test_tokens == prompt_tokens:
192-
return test_prompt
193-
elif test_tokens < prompt_tokens:
211+
if len(test_tokens) == prompt_tokens:
212+
return test_tokens
213+
elif len(test_tokens) < prompt_tokens:
194214
left = mid + 1
195215
else:
196216
right = mid
197217

198-
return self.text_creator.create_text(start_index, left - start_index)
218+
final_text = self.text_creator.create_text(start_index, left - start_index)
219+
return start_tokens + self.processor.encode(final_text)
199220

200221

201222
class SyntheticDatasetCreator(DatasetCreator):

src/guidellm/objects/statistics.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -243,18 +243,9 @@ def from_request_times(
243243
"""
244244
if distribution_type == "concurrency":
245245
# convert to delta changes based on when requests were running
246-
time_deltas: dict[float, int] = defaultdict(int)
247-
for start, end in requests:
248-
time_deltas[start] += 1
249-
time_deltas[end] -= 1
250-
251-
# convert to the events over time measuring concurrency changes
252-
events = []
253-
active = 0
254-
255-
for time, delta in sorted(time_deltas.items()):
256-
active += delta
257-
events.append((time, active))
246+
events = [(start, 1) for start, _ in requests] + [
247+
(end, -1) for _, end in requests
248+
]
258249
elif distribution_type == "rate":
259250
# convert to events for when requests finished
260251
global_start = min(start for start, _ in requests) if requests else 0
@@ -281,6 +272,16 @@ def from_request_times(
281272
else:
282273
flattened_events.append((time, val))
283274

275+
if distribution_type == "concurrency":
276+
# convert to the events over time measuring concurrency changes
277+
events_over_time: list[tuple[float, float]] = []
278+
active = 0
279+
for time, delta in flattened_events:
280+
active += delta # type: ignore [assignment]
281+
events_over_time.append((time, active))
282+
283+
flattened_events = events_over_time
284+
284285
# convert to value distribution function
285286
distribution: dict[float, float] = defaultdict(float)
286287

src/guidellm/utils/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, *types: click.ParamType):
3535
self.types = types
3636
self.name = "".join(t.name for t in types)
3737

38-
def convert(self, value, param, ctx):
38+
def convert(self, value, param, ctx): # noqa: RET503
3939
fails = []
4040
for t in self.types:
4141
try:

0 commit comments

Comments
 (0)