From 855c7940fc40d6a9d1682c8710a8356b721ef8dd Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Tue, 19 Aug 2025 03:06:05 +0000
Subject: [PATCH 1/6] Add release note for v0.9.1rc3

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 README.md                                  |  2 +-
 README.zh.md                               |  2 +-
 docs/source/community/versioning_policy.md |  2 ++
 docs/source/conf.py                        |  4 +--
 docs/source/user_guide/release_notes.md    | 40 ++++++++++++++++++++++
 5 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 4d4d10735b..7955dd559f 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Please use the following recommended versions to get started quickly:
 | Version    | Release type | Doc                                  |
 |------------|--------------|--------------------------------------|
 |v0.9.2rc1|Latest release candidate|[QuickStart](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/latest/installation.html) for more details|
-|v0.9.1rc2|Next stable release|[QuickStart](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/installation.html) for more details|
+|v0.9.1rc3|Next stable release|[QuickStart](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/installation.html) for more details|
 |v0.7.3.post1|Latest stable version|[QuickStart](https://vllm-ascend.readthedocs.io/en/stable/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/stable/installation.html) for more details|
 
 ## Contributing
diff --git a/README.zh.md b/README.zh.md
index 5feb0fc0d8..3a2aeb67f3 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -52,7 +52,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 | Version    | Release type | Doc                                  |
 |------------|--------------|--------------------------------------|
 |v0.9.2rc1| 最新RC版本 |请查看[快速开始](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html)和[安装指南](https://vllm-ascend.readthedocs.io/en/latest/installation.html)了解更多|
-|v0.9.1rc2| 下一个正式/稳定版 |[快速开始](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/quick_start.html) and [安装指南](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/installation.html)了解更多|
+|v0.9.1rc3| 下一个正式/稳定版 |[快速开始](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/quick_start.html) and [安装指南](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/installation.html)了解更多|
 |v0.7.3.post1| 最新正式/稳定版本 |请查看[快速开始](https://vllm-ascend.readthedocs.io/en/stable/quick_start.html)和[安装指南](https://vllm-ascend.readthedocs.io/en/stable/installation.html)了解更多|
 
 ## 贡献
diff --git a/docs/source/community/versioning_policy.md b/docs/source/community/versioning_policy.md
index 8d4a6c6dc4..62508c374e 100644
--- a/docs/source/community/versioning_policy.md
+++ b/docs/source/community/versioning_policy.md
@@ -23,6 +23,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin:
 | vLLM Ascend | vLLM         | Python           | Stable CANN | PyTorch/torch_npu  | MindIE Turbo |
 |-------------|--------------|------------------|-------------|--------------------|--------------|
 | v0.9.2rc1   | v0.9.2       | >= 3.9, < 3.12   | 8.1.RC1     | 2.5.1 / 2.5.1.post1.dev20250619      |              |
+| v0.9.1rc3   | v0.9.1       | >= 3.9, < 3.12   | 8.1.RC1     | 2.5.1 / 2.5.1.post1 |              |
 | v0.9.1rc2   | v0.9.1       | >= 3.9, < 3.12   | 8.1.RC1     | 2.5.1 / 2.5.1.post1|              |
 | v0.9.1rc1   | v0.9.1       | >= 3.9, < 3.12   | 8.1.RC1     | 2.5.1 / 2.5.1.post1.dev20250528      |              |
 | v0.9.0rc2   | v0.9.0       | >= 3.9, < 3.12   | 8.1.RC1     | 2.5.1 / 2.5.1      |              |
@@ -38,6 +39,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin:
 
 | Date       | Event                                     |
 |------------|-------------------------------------------|
+| 2025.08.20 | Release candidates, v0.9.1rc3             |
 | 2025.08.06 | Release candidates, v0.9.1rc2             |
 | 2025.07.11 | Release candidates, v0.9.2rc1             |
 | 2025.06.22 | Release candidates, v0.9.1rc1             |
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 66aa63b563..d7f2ca3e15 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -69,10 +69,10 @@
     # the branch of vllm-ascend, used in vllm-ascend clone and image tag
     # - main branch: 'main'
     # - vX.Y.Z branch: latest vllm-ascend release tag
-    'vllm_ascend_version': 'v0.9.1rc2',
+    'vllm_ascend_version': 'v0.9.1rc3',
     # the newest release version of vllm-ascend and matched vLLM, used in pip install.
     # This value should be updated when cut down release.
-    'pip_vllm_ascend_version': "0.9.1rc2",
+    'pip_vllm_ascend_version': "0.9.1rc3",
     'pip_vllm_version': "0.9.1",
     # CANN image tag
     'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",
diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 7d9118fdb3..2c1898f097 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -1,5 +1,45 @@
 # Release note
 
+## v0.9.1rc3 - 2025.08.20
+
+This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/) to get started.
+
+### Highlights
+
+- MTP supports V1 scheduler [#2371](https://github.com/vllm-project/vllm-ascend/pull/2371)
+- Add LMhead TP communication groups [#1956](https://github.com/vllm-project/vllm-ascend/pull/1956)
+
+### Graph mode improvement
+
+- Fix torchair runtime errror caused by configuration mismtaches and `.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)
+
+### Ops improvement
+
+- Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)
+
+### Core
+
+- Fix `grammar_bitmask` IndexError caused by outdated `apply_grammar_bitmask` method [#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)
+- Remove `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project/vllm-ascend/pull/2177)
+- Fix bugs and refactor cached mask generation logic [#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)
+- Fix configuration check logic about ascend scheduler [#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)
+- Cancel the verification between deepseek-mtp and non-ascend scheduler in disaggregated-prefill deployment [#2368](https://github.com/vllm-project/vllm-ascend/pull/2368)
+- Fix protobuf version in Dockerfile [#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)
+- Fix incorrect req block length in ascend scheduler [#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)
+- Fix header include issue in rope [#2398](https://github.com/vllm-project/vllm-ascend/pull/2398)
+- Fix mtp config bug [#2412](https://github.com/vllm-project/vllm-ascend/pull/2412)
+- Fix error info and adapt `attn_metedata` refactor [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)
+- Removes explicit ATB extension registration [#1921](https://github.com/vllm-project/vllm-ascend/pull/1921)
+
+### Docs
+
+- Add document for deepseek large EP [#2339](https://github.com/vllm-project/vllm-ascend/pull/2339)
+
+### Known Issues
+
+- `test_aclgraph.py` failed with `"full_cuda_graph": True` on A2 (910B1) [#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)
+- Qwen3 MoE aclgraph mode with tp failed when enbale ep due to bincount error [#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)
+
 ## v0.9.1rc2 - 2025.08.06
 This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/) to get started.
 

From 1e7953d2118aeea7a7a19eb4c4732a50ad0ef470 Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Thu, 21 Aug 2025 11:23:22 +0000
Subject: [PATCH 2/6] update

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 docs/source/user_guide/release_notes.md | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 2c1898f097..204ebaa26a 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -4,32 +4,22 @@
 
 This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/) to get started.
 
-### Highlights
+### Core
 
 - MTP supports V1 scheduler [#2371](https://github.com/vllm-project/vllm-ascend/pull/2371)
 - Add LMhead TP communication groups [#1956](https://github.com/vllm-project/vllm-ascend/pull/1956)
-
-### Graph mode improvement
-
-- Fix torchair runtime errror caused by configuration mismtaches and `.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)
-
-### Ops improvement
-
-- Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)
-
-### Core
-
 - Fix `grammar_bitmask` IndexError caused by outdated `apply_grammar_bitmask` method [#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)
 - Remove `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project/vllm-ascend/pull/2177)
 - Fix bugs and refactor cached mask generation logic [#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)
 - Fix configuration check logic about ascend scheduler [#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)
 - Cancel the verification between deepseek-mtp and non-ascend scheduler in disaggregated-prefill deployment [#2368](https://github.com/vllm-project/vllm-ascend/pull/2368)
-- Fix protobuf version in Dockerfile [#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)
+- Fix issue that failed with ray distributed backend [#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)
 - Fix incorrect req block length in ascend scheduler [#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)
 - Fix header include issue in rope [#2398](https://github.com/vllm-project/vllm-ascend/pull/2398)
 - Fix mtp config bug [#2412](https://github.com/vllm-project/vllm-ascend/pull/2412)
 - Fix error info and adapt `attn_metedata` refactor [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)
-- Removes explicit ATB extension registration [#1921](https://github.com/vllm-project/vllm-ascend/pull/1921)
+- Fix torchair runtime errror caused by configuration mismtaches and `.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)
+- Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)
 
 ### Docs
 
@@ -38,7 +28,6 @@ This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 ### Known Issues
 
 - `test_aclgraph.py` failed with `"full_cuda_graph": True` on A2 (910B1) [#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)
-- Qwen3 MoE aclgraph mode with tp failed when enbale ep due to bincount error [#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)
 
 ## v0.9.1rc2 - 2025.08.06
 This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/) to get started.

From 01916762cdbb256d4af89e297da6f141dc81355f Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Fri, 22 Aug 2025 06:27:16 +0000
Subject: [PATCH 3/6] update

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 docs/source/user_guide/release_notes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 204ebaa26a..679b51bb74 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -8,6 +8,7 @@ This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 
 - MTP supports V1 scheduler [#2371](https://github.com/vllm-project/vllm-ascend/pull/2371)
 - Add LMhead TP communication groups [#1956](https://github.com/vllm-project/vllm-ascend/pull/1956)
+- Fix the bug that qwen3 moe doesn't work with aclgraph [#2478](https://github.com/vllm-project/vllm-ascend/pull/2478)
 - Fix `grammar_bitmask` IndexError caused by outdated `apply_grammar_bitmask` method [#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)
 - Remove `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project/vllm-ascend/pull/2177)
 - Fix bugs and refactor cached mask generation logic [#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)

From a604bba4e9e5522dbb73bf024e9a53ab002caae0 Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Fri, 22 Aug 2025 06:48:41 +0000
Subject: [PATCH 4/6] update

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 docs/source/community/versioning_policy.md | 2 +-
 docs/source/faqs.md                        | 2 +-
 docs/source/user_guide/release_notes.md    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/community/versioning_policy.md b/docs/source/community/versioning_policy.md
index 62508c374e..3c2d9a6edb 100644
--- a/docs/source/community/versioning_policy.md
+++ b/docs/source/community/versioning_policy.md
@@ -39,7 +39,7 @@ Following is the Release Compatibility Matrix for vLLM Ascend Plugin:
 
 | Date       | Event                                     |
 |------------|-------------------------------------------|
-| 2025.08.20 | Release candidates, v0.9.1rc3             |
+| 2025.08.22 | Release candidates, v0.9.1rc3             |
 | 2025.08.06 | Release candidates, v0.9.1rc2             |
 | 2025.07.11 | Release candidates, v0.9.2rc1             |
 | 2025.06.22 | Release candidates, v0.9.1rc1             |
diff --git a/docs/source/faqs.md b/docs/source/faqs.md
index 20e6d8570c..a6d4c40163 100644
--- a/docs/source/faqs.md
+++ b/docs/source/faqs.md
@@ -3,7 +3,7 @@
 ## Version Specific FAQs
 
 - [[v0.7.3.post1] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/1007)
-- [[v0.9.0rc2] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/1115)
+- [[v0.9.1rc3] FAQ & Feedback](https://github.com/vllm-project/vllm-ascend/issues/2410)
 
 ## General FAQs
 
diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 679b51bb74..88d8b7d0a0 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -1,6 +1,6 @@
 # Release note
 
-## v0.9.1rc3 - 2025.08.20
+## v0.9.1rc3 - 2025.08.22
 
 This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/v0.9.1-dev/) to get started.
 

From 67e89ae60022ed3456215f447f026edb2641c268 Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Fri, 22 Aug 2025 06:58:41 +0000
Subject: [PATCH 5/6] update

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 docs/source/user_guide/release_notes.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 88d8b7d0a0..0c3b9503cd 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -38,7 +38,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Dynamic EPLB support in [#1943](https://github.com/vllm-project/vllm-ascend/pull/1943)
 * Disaggregated Prefilling support for V1 Engine and improvement, continued development and stabilization of the disaggregated prefill feature, including performance enhancements and bug fixes for single-machine setups:[#1953](https://github.com/vllm-project/vllm-ascend/pull/1953) [#1612](https://github.com/vllm-project/vllm-ascend/pull/1612) [#1361](https://github.com/vllm-project/vllm-ascend/pull/1361) [#1746](https://github.com/vllm-project/vllm-ascend/pull/1746) [#1552](https://github.com/vllm-project/vllm-ascend/pull/1552) [#1801](https://github.com/vllm-project/vllm-ascend/pull/1801) [#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) [#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)
 
-### Models improvement:
+### Models improvement
 * DeepSeek DeepSeek DBO support and improvement: [#1285](https://github.com/vllm-project/vllm-ascend/pull/1285) [#1291](https://github.com/vllm-project/vllm-ascend/pull/1291) [#1328](https://github.com/vllm-project/vllm-ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-ascend/pull/2093)
 * DeepSeek MTP improvement and bugfix: [#1214](https://github.com/vllm-project/vllm-ascend/pull/1214) [#943](https://github.com/vllm-project/vllm-ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-ascend/pull/2019)
 * Qwen3 MoE support improvement and bugfix around graph mode and DP:  [#1940](https://github.com/vllm-project/vllm-ascend/pull/1940) [#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) [#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)
@@ -47,7 +47,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Qwen2.5 VL improvement via mrope/padding mechanism improvement: [#1261](https://github.com/vllm-project/vllm-ascend/pull/1261) [#1705](https://github.com/vllm-project/vllm-ascend/pull/1705) [#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) [#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)
 * Ray: Fix the device error when using ray and add initialize_cache and improve warning info: [#1234](https://github.com/vllm-project/vllm-ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-ascend/pull/1501)
 
-### Graph mode improvement:
+### Graph mode improvement
 * Fix DeepSeek with deepseek with mc2 in [#1269](https://github.com/vllm-project/vllm-ascend/pull/1269)
 * Fix accuracy problem for deepseek V3/R1 models with torchair graph in long sequence predictions in [#1332](https://github.com/vllm-project/vllm-ascend/pull/1332)
 * Fix torchair_graph_batch_sizes bug in [#1570](https://github.com/vllm-project/vllm-ascend/pull/1570)
@@ -73,13 +73,13 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Fix DeepSeek OOM issue in extreme `--gpu-memory-utilization` scenario in [#1829](https://github.com/vllm-project/vllm-ascend/pull/1829)
 * Turn off aclgraph when enabling TorchAir in [#2154](https://github.com/vllm-project/vllm-ascend/pull/2154)
 
-### Ops improvement:
+### Ops improvement
 * add custom ascendc kernel vocabparallelembedding [#796](https://github.com/vllm-project/vllm-ascend/pull/796)
 * fix rope sin/cos cache bug in [#1267](https://github.com/vllm-project/vllm-ascend/pull/1267)
 * Refactoring AscendFusedMoE (#1229) in [#1264](https://github.com/vllm-project/vllm-ascend/pull/1264)
 * Use fused ops npu_top_k_top_p in sampler [#1920](https://github.com/vllm-project/vllm-ascend/pull/1920)
 
-### Core:
+### Core
 * Upgrade CANN to 8.2.rc1 in [#2036](https://github.com/vllm-project/vllm-ascend/pull/2036)
 * Upgrade torch-npu to 2.5.1.post1 in [#2135](https://github.com/vllm-project/vllm-ascend/pull/2135)
 * Upgrade python to 3.11 in [#2136](https://github.com/vllm-project/vllm-ascend/pull/2136)
@@ -121,7 +121,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Add with_prefill cpu allreduce to handle D-node recomputatio in [#2129](https://github.com/vllm-project/vllm-ascend/pull/2129)
 * Add D2H & initRoutingQuantV2 to improve prefill perf in [#2038](https://github.com/vllm-project/vllm-ascend/pull/2038)
 
-### Docs:
+### Docs
 * Provide an e2e guide for execute duration profiling [#1113](https://github.com/vllm-project/vllm-ascend/pull/1113)
 * Add Referer header for CANN package download url. [#1192](https://github.com/vllm-project/vllm-ascend/pull/1192)
 * Add reinstall instructions doc [#1370](https://github.com/vllm-project/vllm-ascend/pull/1370)
@@ -150,7 +150,7 @@ This is the 1st release candidate of v0.9.2 for vLLM Ascend. Please follow the [
 - Fix the accuracy problem with deploy models with parallel parameters. [#1678](https://github.com/vllm-project/vllm-ascend/pull/1678)
 - The pre-built wheel package now requires lower version of glibc. Users can use it by `pip install vllm-ascend` directly. [#1582](https://github.com/vllm-project/vllm-ascend/pull/1582)
 
-## Other
+### Other
 - Official doc has been updated for better read experience. For example, more deployment tutorials are added, user/developer docs are updated. More guide will coming soon.
 - Fix accuracy problem for deepseek V3/R1 models with torchair graph in long sequence predictions. [#1331](https://github.com/vllm-project/vllm-ascend/pull/1331)
 - A new env variable `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` has been added. It enables the fused allgather-experts kernel for Deepseek V3/R1 models. The default value is `0`. [#1335](https://github.com/vllm-project/vllm-ascend/pull/1335)

From 7a706e83efc713862699be714950cbbb1903664d Mon Sep 17 00:00:00 2001
From: shen-shanshan <467638484@qq.com>
Date: Fri, 22 Aug 2025 07:02:08 +0000
Subject: [PATCH 6/6] update

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 docs/source/user_guide/release_notes.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md
index 0c3b9503cd..a1bbbe93ba 100644
--- a/docs/source/user_guide/release_notes.md
+++ b/docs/source/user_guide/release_notes.md
@@ -38,7 +38,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Dynamic EPLB support in [#1943](https://github.com/vllm-project/vllm-ascend/pull/1943)
 * Disaggregated Prefilling support for V1 Engine and improvement, continued development and stabilization of the disaggregated prefill feature, including performance enhancements and bug fixes for single-machine setups:[#1953](https://github.com/vllm-project/vllm-ascend/pull/1953) [#1612](https://github.com/vllm-project/vllm-ascend/pull/1612) [#1361](https://github.com/vllm-project/vllm-ascend/pull/1361) [#1746](https://github.com/vllm-project/vllm-ascend/pull/1746) [#1552](https://github.com/vllm-project/vllm-ascend/pull/1552) [#1801](https://github.com/vllm-project/vllm-ascend/pull/1801) [#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) [#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)
 
-### Models improvement
+### Models improvement:
 * DeepSeek DeepSeek DBO support and improvement: [#1285](https://github.com/vllm-project/vllm-ascend/pull/1285) [#1291](https://github.com/vllm-project/vllm-ascend/pull/1291) [#1328](https://github.com/vllm-project/vllm-ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-ascend/pull/2093)
 * DeepSeek MTP improvement and bugfix: [#1214](https://github.com/vllm-project/vllm-ascend/pull/1214) [#943](https://github.com/vllm-project/vllm-ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-ascend/pull/2019)
 * Qwen3 MoE support improvement and bugfix around graph mode and DP:  [#1940](https://github.com/vllm-project/vllm-ascend/pull/1940) [#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) [#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)
@@ -47,7 +47,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Qwen2.5 VL improvement via mrope/padding mechanism improvement: [#1261](https://github.com/vllm-project/vllm-ascend/pull/1261) [#1705](https://github.com/vllm-project/vllm-ascend/pull/1705) [#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) [#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)
 * Ray: Fix the device error when using ray and add initialize_cache and improve warning info: [#1234](https://github.com/vllm-project/vllm-ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-ascend/pull/1501)
 
-### Graph mode improvement
+### Graph mode improvement:
 * Fix DeepSeek with deepseek with mc2 in [#1269](https://github.com/vllm-project/vllm-ascend/pull/1269)
 * Fix accuracy problem for deepseek V3/R1 models with torchair graph in long sequence predictions in [#1332](https://github.com/vllm-project/vllm-ascend/pull/1332)
 * Fix torchair_graph_batch_sizes bug in [#1570](https://github.com/vllm-project/vllm-ascend/pull/1570)
@@ -73,13 +73,13 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Fix DeepSeek OOM issue in extreme `--gpu-memory-utilization` scenario in [#1829](https://github.com/vllm-project/vllm-ascend/pull/1829)
 * Turn off aclgraph when enabling TorchAir in [#2154](https://github.com/vllm-project/vllm-ascend/pull/2154)
 
-### Ops improvement
+### Ops improvement:
 * add custom ascendc kernel vocabparallelembedding [#796](https://github.com/vllm-project/vllm-ascend/pull/796)
 * fix rope sin/cos cache bug in [#1267](https://github.com/vllm-project/vllm-ascend/pull/1267)
 * Refactoring AscendFusedMoE (#1229) in [#1264](https://github.com/vllm-project/vllm-ascend/pull/1264)
 * Use fused ops npu_top_k_top_p in sampler [#1920](https://github.com/vllm-project/vllm-ascend/pull/1920)
 
-### Core
+### Core:
 * Upgrade CANN to 8.2.rc1 in [#2036](https://github.com/vllm-project/vllm-ascend/pull/2036)
 * Upgrade torch-npu to 2.5.1.post1 in [#2135](https://github.com/vllm-project/vllm-ascend/pull/2135)
 * Upgrade python to 3.11 in [#2136](https://github.com/vllm-project/vllm-ascend/pull/2136)
@@ -121,7 +121,7 @@ This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please follow the [
 * Add with_prefill cpu allreduce to handle D-node recomputatio in [#2129](https://github.com/vllm-project/vllm-ascend/pull/2129)
 * Add D2H & initRoutingQuantV2 to improve prefill perf in [#2038](https://github.com/vllm-project/vllm-ascend/pull/2038)
 
-### Docs
+### Docs:
 * Provide an e2e guide for execute duration profiling [#1113](https://github.com/vllm-project/vllm-ascend/pull/1113)
 * Add Referer header for CANN package download url. [#1192](https://github.com/vllm-project/vllm-ascend/pull/1192)
 * Add reinstall instructions doc [#1370](https://github.com/vllm-project/vllm-ascend/pull/1370)