vllm-project
diff --git a/‎.github/workflows/development.yml
Lines changed: 0 additions & 53 deletions b/‎.github/workflows/development.yml
Lines changed: 0 additions & 53 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 6 additions & 33 deletions b/‎.pre-commit-config.yaml
Lines changed: 6 additions & 33 deletions
diff --git a/‎docs/datasets.md
Lines changed: 1 addition & 0 deletions b/‎docs/datasets.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎pdm.toml
Lines changed: 2 additions & 0 deletions b/‎pdm.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎pylock.toml
Lines changed: 3366 additions & 0 deletions b/‎pylock.toml
Lines changed: 3366 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 7 additions & 0 deletions b/‎pyproject.toml
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/guidellm/benchmark/benchmark.py
Lines changed: 3 additions & 1 deletion b/‎src/guidellm/benchmark/benchmark.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/guidellm/dataset/synthetic.py
Lines changed: 30 additions & 9 deletions b/‎src/guidellm/dataset/synthetic.py
Lines changed: 30 additions & 9 deletions
diff --git a/‎src/guidellm/objects/statistics.py
Lines changed: 13 additions & 12 deletions b/‎src/guidellm/objects/statistics.py
Lines changed: 13 additions & 12 deletions
diff --git a/‎src/guidellm/utils/cli.py
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/utils/cli.py
Lines changed: 1 addition & 1 deletion
@@ -180,59 +180,6 @@ jobs:
       - name: Run integration tests
         run: npm run test:integration
 
-  build:
-    # Only build if the PR branch is local
-    if: github.event.pull_request.head.repo.full_name == github.repository
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python: ["3.9"]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python }}
-      - name: Install dependencies
-        run: pip install tox
-      - name: Build the package
-        run: |
-          export GUIDELLM_BUILD_TYPE=dev
-          export GUIDELLM_BUILD_ITERATION=${{ github.event.pull_request.number }}
-          tox -e build
-      - name: Upload build artifacts
-        id: artifact-upload
-        uses: actions/upload-artifact@v4
-        with:
-          name: build-artifacts
-          path: dist/*
-          compression-level: 6
-          if-no-files-found: error
-          retention-days: 30
-      - name: Generate GitHub App token
-        id: app-token
-        uses: actions/create-github-app-token@v1
-        with:
-          app-id: ${{ secrets.GH_NM_REDHAT_AUTOMATION_APP_ID }}
-          private-key: ${{ secrets.GH_NM_REDHAT_AUTOMATION_APP_PRIVATE_KEY }}
-      - name: Comment Install instructions
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ steps.app-token.outputs.token }}
-          script: |
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: `📦 **Build Artifacts Available**
-              The build artifacts (\`.whl\` and \`.tar.gz\`) have been successfully generated and are available for download: ${{ steps.artifact-upload.outputs.artifact-url }}.
-              They will be retained for **up to 30 days**.
-              `
-            })
-
   ui-pr-preview:
     needs: [ui-quality-checks, ui-precommit-checks, ui-unit-tests, ui-integration-tests]
     permissions:
 
@@ -1,11 +1,16 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.6.0
+  rev: v6.0.0
   hooks:
   - id: trailing-whitespace
     exclude: ^tests/?.*/assets/.+
   - id: end-of-file-fixer
     exclude: ^tests/?.*/assets/.+
+- repo: https://github.com/pdm-project/pdm
+  rev: 2.25.6
+  hooks:
+    - id: pdm-lock-check
+      name: check lock file matches pyproject
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.11.7
   hooks:
@@ -14,35 +19,3 @@ repos:
       args: [ --fix, --show-fixes ]
     - id: ruff-format
       name: run formatter
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.15.0
-  hooks:
-  - id: mypy
-    args: [--check-untyped-defs]
-    additional_dependencies:
-      [
-        # main dependencies
-        click,
-        datasets,
-        ftfy,
-        loguru,
-        numpy,
-        pillow,
-        pydantic,
-        pydantic_settings,
-        pyyaml,
-        respx,
-        rich,
-        setuptools,
-        setuptools-git-versioning,
-        transformers,
-
-        # dev dependencies
-        pytest,
-        pydantic_settings,
-
-        # types
-        types-PyYAML,
-        types-requests,
-        types-toml,
-      ]
@@ -76,6 +76,7 @@ guidellm benchmark \
 - `output_tokens_stdev`: Standard deviation for output tokens. If not supplied and min/max are not specified, no deviation is applied. If not supplied and min/max are specified, a uniform distribution is used.
 - `output_tokens_min`: Minimum number of tokens in outputs. If unset and `output_tokens_stdev` is set, the minimum is 1.
 - `output_tokens_max`: Maximum number of tokens in outputs. If unset and `output_tokens_stdev` is set, the maximum is 5 times the standard deviation.
+- `prefix_tokens`: Number of tokens to share as a prefix across all prompts. Is additive to the prompt tokens distribution so each request is `prefix_tokens + prompt_tokens_sample()`. If unset, defaults to 0.
 - `samples`: Number of samples to generate (default: 1000). More samples will increase the time taken to generate the dataset before benchmarking, but will also decrease the likelihood of caching requests.
 - `source`: Source text for generation (default: `data:prideandprejudice.txt.gz`). This can be any text file, URL containing a text file, or a compressed text file. The text is used to sample from at a word and punctuation granularity and then combined into a single string of the desired lengths.
 
 
@@ -0,0 +1,2 @@
+[lock]
+format = "pylock"
@@ -10,6 +10,9 @@ include = ["*"]
 [tool.setuptools.package-data]
 "guidellm.data" = ["*.gz"]
 
+[tool.pdm]
+distribution = true
+
 
 # ************************************************
 # ********** Project Metadata **********
@@ -101,6 +104,10 @@ dev = [
     "mkdocs-linkcheck~=1.0.6",
 ]
 
+# For PEP 735 compliant tools
+[dependency-groups]
+dev = [ "guidellm[dev]" ]
+
 [project.urls]
 homepage = "https://github.com/vllm-project/guidellm"
 source = "https://github.com/vllm-project/guidellm"
 
@@ -817,7 +817,9 @@ def from_stats(
                     ],
                     iter_counts=[req.output_tokens for req in total_with_output_first],
                     first_iter_counts=[
-                        req.prompt_tokens for req in total_with_output_first
+                        # prompt tokens + first token
+                        req.prompt_tokens + 1
+                        for req in total_with_output_first
                     ],
                 ),
             ),
 
@@ -1,6 +1,7 @@
 import json
 import random
 from collections.abc import Iterable, Iterator
+from itertools import cycle
 from pathlib import Path
 from typing import Any, Literal, Optional, Union
 
@@ -25,6 +26,11 @@
 
 
 class SyntheticDatasetConfig(BaseModel):
+    prefix_tokens: int = Field(
+        description="The number of shared prefix tokens to prepend to each prompt.",
+        ge=0,
+        default=0,
+    )
     prompt_tokens: int = Field(
         description="The average number of text tokens generated for prompts.",
         gt=0,
@@ -163,39 +169,54 @@ def __iter__(
         )
         # ensure diff distribution from output tokens
         rand = random.Random(self.random_seed + 2)  # noqa: S311
+        unique_prefix_iter = cycle(self.processor.get_vocab().values())
+
+        prefix_index = rand.randint(0, len(self.text_creator.words))
+        prefix_tokens = self._create_prompt(self.config.prefix_tokens, prefix_index)
 
         for _, prompt_tokens, output_tokens in zip(
             range(self.config.samples),
             prompt_tokens_sampler,
             output_tokens_sampler,
         ):
             start_index = rand.randint(0, len(self.text_creator.words))
+            prompt_text = self.processor.decode(
+                prefix_tokens
+                + self._create_prompt(
+                    prompt_tokens, start_index, next(unique_prefix_iter)
+                ),
+                skip_special_tokens=True,
+            )
             yield {
-                "prompt": self._create_prompt(prompt_tokens, start_index),
-                "prompt_tokens_count": prompt_tokens,
+                "prompt": prompt_text,
+                "prompt_tokens_count": self.config.prefix_tokens + prompt_tokens,
                 "output_tokens_count": output_tokens,
             }
 
-    def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
+    def _create_prompt(
+        self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
+    ) -> list[int]:
         if prompt_tokens <= 0:
-            return ""
+            return []
 
         left = start_index
         right = start_index + 4 * prompt_tokens
+        start_tokens = [unique_prefix] if unique_prefix else []
 
         while left < right:
             mid = (left + right) // 2
             test_prompt = self.text_creator.create_text(start_index, mid - start_index)
-            test_tokens = len(self.processor.tokenize(test_prompt))
+            test_tokens = start_tokens + self.processor.encode(test_prompt)
 
-            if test_tokens == prompt_tokens:
-                return test_prompt
-            elif test_tokens < prompt_tokens:
+            if len(test_tokens) == prompt_tokens:
+                return test_tokens
+            elif len(test_tokens) < prompt_tokens:
                 left = mid + 1
             else:
                 right = mid
 
-        return self.text_creator.create_text(start_index, left - start_index)
+        final_text = self.text_creator.create_text(start_index, left - start_index)
+        return start_tokens + self.processor.encode(final_text)
 
 
 class SyntheticDatasetCreator(DatasetCreator):
 
@@ -243,18 +243,9 @@ def from_request_times(
         """
         if distribution_type == "concurrency":
             # convert to delta changes based on when requests were running
-            time_deltas: dict[float, int] = defaultdict(int)
-            for start, end in requests:
-                time_deltas[start] += 1
-                time_deltas[end] -= 1
-
-            # convert to the events over time measuring concurrency changes
-            events = []
-            active = 0
-
-            for time, delta in sorted(time_deltas.items()):
-                active += delta
-                events.append((time, active))
+            events = [(start, 1) for start, _ in requests] + [
+                (end, -1) for _, end in requests
+            ]
         elif distribution_type == "rate":
             # convert to events for when requests finished
             global_start = min(start for start, _ in requests) if requests else 0
@@ -281,6 +272,16 @@ def from_request_times(
             else:
                 flattened_events.append((time, val))
 
+        if distribution_type == "concurrency":
+            # convert to the events over time measuring concurrency changes
+            events_over_time: list[tuple[float, float]] = []
+            active = 0
+            for time, delta in flattened_events:
+                active += delta  # type: ignore [assignment]
+                events_over_time.append((time, active))
+
+            flattened_events = events_over_time
+
         # convert to value distribution function
         distribution: dict[float, float] = defaultdict(float)
 
 
@@ -35,7 +35,7 @@ def __init__(self, *types: click.ParamType):
         self.types = types
         self.name = "".join(t.name for t in types)
 
-    def convert(self, value, param, ctx):
+    def convert(self, value, param, ctx):  # noqa: RET503
         fails = []
         for t in self.types:
             try: