Skip to content

Commit c56b2a7

Browse files
author
youxiao
committed
Merge remote-tracking branch 'upstream/main' into dev1
2 parents 8241e1e + b375c66 commit c56b2a7

File tree

40 files changed

+1760
-224
lines changed

40 files changed

+1760
-224
lines changed

.github/workflows/ci.yml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,25 @@ on:
55
branches: [ "main" ]
66
pull_request:
77
branches: [ "main" ]
8+
types: [opened, synchronize, reopened, labeled]
89

910
jobs:
11+
init-label:
12+
if: github.event.action == 'opened'
13+
runs-on: ubuntu-latest
14+
permissions:
15+
pull-requests: write
16+
steps:
17+
- name: Add run-ci label
18+
env:
19+
GH_TOKEN: ${{ github.token }}
20+
run: gh pr edit ${{ github.event.pull_request.number }} --add-label "run-ci"
21+
1022
build:
23+
if: >-
24+
github.event_name == 'push' ||
25+
github.event.action == 'opened' ||
26+
contains(github.event.pull_request.labels.*.name, 'run-ci')
1127
runs-on: ubuntu-22.04
1228
strategy:
1329
matrix:
@@ -188,7 +204,71 @@ jobs:
188204
python scripts/test_tensor_api.py -n 1
189205
shell: bash
190206

207+
test-sglang-integration:
208+
needs: build-flags
209+
if: >-
210+
github.event_name == 'push' ||
211+
github.event.action == 'opened' ||
212+
contains(github.event.pull_request.labels.*.name, 'run-ci')
213+
runs-on: ubuntu-latest
214+
env:
215+
tone_user_name: ${{ secrets.TONE_USER_NAME }}
216+
steps:
217+
- name: trigger T-one test
218+
if: ${{ env.tone_user_name != '' }}
219+
run: |
220+
curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts > artifact.json
221+
cat artifact.json
222+
artifact_id=$(jq -r ".artifacts[] | select(.name | contains(\"py312\") ) | .id" artifact.json)
223+
signature="${{ secrets.TONE_USER_NAME }}|${{ secrets.TONE_USER_TOKEN }}|$(python3 -c "import time;print(time.time())")"
224+
signature="$(python3 -c "import base64;print(base64.b64encode(\"$signature\".encode('utf-8')).decode('utf-8'))")"
225+
curl -s -H 'Content-Type: application/json' -X POST -d "{\"workspace\":\"mooncake_test\",\"project\":\"mooncake-ci\",\"template\":\"mooncake-ci-test\",\"name\":\"mooncake-ci-${{ github.sha }}\",\"username\":\"${{ secrets.TONE_USER_NAME }}\",\"env_ifs\":\" \",\"env_info\":\"ARTIFACT_ID=${artifact_id} GIT_REPO=${{ github.repository }}\",\"signature\":\"$signature\"}" https://tone.openanolis.cn/api/job/create/ > job.json
226+
if [ "$(jq .code job.json)" == 200 ]; then
227+
echo "job created"
228+
else
229+
echo "job create failed"
230+
exit 1
231+
fi
232+
job_id=$(jq .data.id job.json)
233+
echo "check job status here and remember to cancel it before restart the job !"
234+
echo "job_url: https://tone.openanolis.cn/ws/gclfnh19/test_result/${job_id}"
235+
echo "job_id=${job_id}" >> $GITHUB_ENV
236+
shell: bash
237+
238+
- name: qurey job results
239+
if: ${{ env.tone_user_name != '' }}
240+
run: |
241+
time=0
242+
while true; do
243+
if [ $time -gt 720 ]; then
244+
echo "timeout"
245+
exit 1
246+
fi
247+
signature="${{ secrets.TONE_USER_NAME }}|${{ secrets.TONE_USER_TOKEN }}|$(python3 -c "import time;print(time.time())")"
248+
signature="$(python3 -c "import base64;print(base64.b64encode(\"$signature\".encode('utf-8')).decode('utf-8'))")"
249+
curl -s -H 'Content-Type: application/json' -X POST -d "{\"username\":\"${{ secrets.TONE_USER_NAME }}\", \"signature\":\"$signature\", \"job_id\": \"${job_id}\"}" https://tone.openanolis.cn/api/job/query/ > job_status.json
250+
if ! [ "$(jq .code job_status.json)" == 200 ]; then
251+
echo "job query failed"
252+
exit 1
253+
fi
254+
job_status=$(jq .data.job_second_state job_status.json)
255+
if [[ $job_status =~ "pass" ]]; then
256+
echo "job successful !"
257+
exit 0
258+
elif [[ $job_status =~ "fail" ]] ; then
259+
echo "job failed or stopped !"
260+
exit 1
261+
fi
262+
time=$(( time + 1))
263+
sleep 10
264+
done
265+
shell: bash
266+
191267
build-flags:
268+
if: >-
269+
github.event_name == 'push' ||
270+
github.event.action == 'opened' ||
271+
contains(github.event.pull_request.labels.*.name, 'run-ci')
192272
runs-on: ubuntu-22.04
193273
strategy:
194274
matrix:
@@ -331,6 +411,10 @@ jobs:
331411

332412
build-docker:
333413
name: Build Docker Image
414+
if: >-
415+
github.event_name == 'push' ||
416+
github.event.action == 'opened' ||
417+
contains(github.event.pull_request.labels.*.name, 'run-ci')
334418
runs-on: ubuntu-22.04
335419
steps:
336420
- uses: actions/checkout@v4
@@ -343,6 +427,10 @@ jobs:
343427

344428
spell-check:
345429
name: Spell Check with Typos
430+
if: >-
431+
github.event_name == 'push' ||
432+
github.event.action == 'opened' ||
433+
contains(github.event.pull_request.labels.*.name, 'run-ci')
346434
runs-on: ubuntu-22.04
347435
steps:
348436
- name: Checkout Actions Repository
@@ -352,6 +440,10 @@ jobs:
352440

353441
clang-format:
354442
name: Check code format
443+
if: >-
444+
github.event_name == 'push' ||
445+
github.event.action == 'opened' ||
446+
contains(github.event.pull_request.labels.*.name, 'run-ci')
355447
runs-on: ubuntu-22.04
356448
steps:
357449
- name: Checkout Actions Repository

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,14 @@ The above presents two samples from our trace dataset. The trace includes the ti
243243
Please kindly cite our paper if you find the paper or the traces are useful:
244244

245245
```bibtex
246+
@article{qin2024mooncake,
247+
title={Mooncake: A kvcache-centric disaggregated architecture for llm serving},
248+
author={Qin, Ruoyu and Li, Zheming and He, Weiran and Cui, Jialei and Tang, Heyi and Ren, Feng and Ma, Teng and Cai, Shangming and Zhang, Yineng and Zhang, Mingxing and others},
249+
journal={ACM Transactions on Storage},
250+
year={2024},
251+
publisher={ACM New York, NY}
252+
}
253+
246254
@article{qin2024mooncake,
247255
title = {Mooncake: A KVCache-centric Disaggregated Architecture for LLM Serving},
248256
author = {Ruoyu Qin, Zheming Li, Weiran He, Mingxing Zhang, Yongwei Wu, Weimin Zheng, and Xinran Xu},

doc/en/mooncake-store.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,3 +732,13 @@ We provide a reference example `distributed_object_store_provider.py`, located i
732732

733733
#### C++ Usage Example
734734
The C++ API of Mooncake Store provides more low-level control capabilities. We provide a reference example `client_integration_test`, located in the `mooncake-store/tests` directory. To check if the related components are properly installed, you can run etcd and Master Service (`mooncake_master`) on the same server, and then execute this C++ program (located in the `build/mooncake-store/tests` directory). It should output a successful test result.
735+
736+
## Version Management Policy
737+
738+
The current version of Mooncake Store is defined in [`CMakeLists.txt`](../../mooncake-store/CMakeLists.txt) as `project(MooncakeStore VERSION 2.0.0)`.
739+
740+
When to bump the version:
741+
742+
* **Major version (X.0.0)**: For breaking API changes, major architectural changes, or significant new features that affect backward compatibility
743+
* **Minor version (0.X.0)**: For new features, API additions, or notable improvements that maintain backward compatibility
744+
* **Patch version (0.0.X)**: For bug fixes, performance optimizations, or minor improvements that don't affect the API

doc/zh/mooncake-store.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,3 +738,13 @@ python -m mooncake.mooncake_store_service --config=[config_path] --port=8081
738738
#### C++ 使用示例
739739

740740
Mooncake Store 的 C++ API 提供了更底层的控制能力。我们提供一个参考样例 `client_integration_test`,其位于 `mooncake-store/tests` 目录下。为了检测相关组件是否正常安装,可在相同的服务器上运行 etcd、Master Service(`mooncake_master`),并执行该 C++ 程序(位于 `build/mooncake-store/tests` 目录下),应输出测试成功的结果。
741+
742+
## 版本管理策略
743+
744+
Mooncake Store 的当前版本定义在 [`CMakeLists.txt`](../../mooncake-store/CMakeLists.txt) 中,为 `project(MooncakeStore VERSION 2.0.0)`
745+
746+
何时需要升级版本:
747+
748+
* **主版本号 (X.0.0)**:当存在破坏性的 API 变更、主要架构更改,或影响向后兼容性的重要新功能时
749+
* **次版本号 (0.X.0)**:当添加新功能、API 扩展,或保持向后兼容性的显著改进时
750+
* **修订版本号 (0.0.X)**:当进行错误修复、性能优化,或不改变 API 的细微改进时

docs/source/design/mooncake-store.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,3 +696,13 @@ We provide a reference example `distributed_object_store_provider.py`, located i
696696

697697
#### C++ Usage Example
698698
The C++ API of Mooncake Store provides more low-level control capabilities. We provide a reference example `client_integration_test`, located in the `mooncake-store/tests` directory. To check if the related components are properly installed, you can run etcd and Master Service (`mooncake_master`) on the same server, and then execute this C++ program (located in the `build/mooncake-store/tests` directory). It should output a successful test result.
699+
700+
## Version Management Policy
701+
702+
The current version of Mooncake Store is defined in [`CMakeLists.txt`](../../mooncake-store/CMakeLists.txt) as `project(MooncakeStore VERSION 2.0.0)`.
703+
704+
When to bump the version:
705+
706+
* **Major version (X.0.0)**: For breaking API changes, major architectural changes, or significant new features that affect backward compatibility
707+
* **Minor version (0.X.0)**: For new features, API additions, or notable improvements that maintain backward compatibility
708+
* **Patch version (0.0.X)**: For bug fixes, performance optimizations, or minor improvements that don't affect the API

mooncake-common/common.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,15 @@ if (USE_ASCEND OR USE_ASCEND_DIRECT)
160160
file(GLOB ASCEND_TOOLKIT_ROOT "/usr/local/Ascend/ascend-toolkit/latest/*-linux")
161161
endif()
162162
set(ASCEND_LIB_DIR "${ASCEND_TOOLKIT_ROOT}/lib64")
163-
set(ASCEND_DEVLIB_DIR "${ASCEND_TOOLKIT_ROOT}/devlib")
164163
set(ASCEND_INCLUDE_DIR "${ASCEND_TOOLKIT_ROOT}/include")
165164
add_compile_options(-Wno-ignored-qualifiers)
166165
include_directories(/usr/local/include /usr/include ${ASCEND_INCLUDE_DIR})
167-
link_directories(${ASCEND_LIB_DIR} ${ASCEND_DEVLIB_DIR})
166+
link_directories(${ASCEND_LIB_DIR})
168167
endif()
169168

170169
if (USE_ASCEND)
170+
set(ASCEND_DEVLIB_DIR "${ASCEND_TOOLKIT_ROOT}/devlib")
171+
link_directories(${ASCEND_DEVLIB_DIR})
171172
add_compile_definitions(USE_ASCEND)
172173
endif()
173174

mooncake-ep/include/mooncake_backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class MooncakeBackend final : public ::c10d::Backend {
6969
return matrix[location].preferred_hca[0];
7070
}
7171

72+
at::Tensor getActiveRanksTensor() { return meta_.activeRanksTensor; }
73+
7274
private:
7375
static TransferEngine engine_;
7476
static Transport* transport_;

mooncake-ep/include/mooncake_ep_buffer.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ struct MooncakeEpBuffer {
8181
void* qp_devctxs = nullptr;
8282
std::string device_name;
8383
bool is_roce_ = false;
84+
bool ibgda_disabled_ = false;
8485

8586
// Stream for communication
8687
at::cuda::CUDAStream comm_stream;
@@ -114,7 +115,9 @@ struct MooncakeEpBuffer {
114115
torch::Tensor get_next_combine_buffer(int num_max_dispatch_tokens_per_rank,
115116
int hidden, int num_experts);
116117

117-
void init_ibgda();
118+
int init_ibgda();
119+
120+
bool ibgda_disabled() { return ibgda_disabled_; }
118121

119122
bool is_roce() { return is_roce_; }
120123

mooncake-ep/src/mooncake_ep_buffer.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@ MooncakeEpBuffer::MooncakeEpBuffer(int rank, int num_ranks,
1919
CUDA_CHECK(cudaMalloc(&rkeys, num_ranks * sizeof(uint32_t)));
2020
CUDA_CHECK(
2121
cudaMalloc(&qp_devctxs, MAX_QP_COUNT * sizeof(mlx5gda_qp_devctx)));
22-
init_ibgda();
22+
int ret = init_ibgda();
23+
if (ret != 0) {
24+
LOG(WARNING) << "Failed to initialize IBGDA. "
25+
<< "Using fallback implementation. "
26+
<< "Performance will be degraded.";
27+
ibgda_disabled_ = true;
28+
}
2329

2430
// Create 32 MiB workspace
2531
CUDA_CHECK(cudaMalloc(&workspace, NUM_WORKSPACE_BYTES));
@@ -279,7 +285,7 @@ torch::Tensor MooncakeEpBuffer::get_next_combine_buffer(
279285
torch::TensorOptions().dtype(dtype).device(torch::kCUDA));
280286
}
281287

282-
void MooncakeEpBuffer::init_ibgda() {
288+
int MooncakeEpBuffer::init_ibgda() {
283289
int num_devices;
284290
ibv_device** dev_list = ibv_get_device_list(&num_devices);
285291
int nic_id = -1;
@@ -299,7 +305,7 @@ void MooncakeEpBuffer::init_ibgda() {
299305
ibv_context* ctx = ibv_open_device(dev_list[nic_id]);
300306
if (!ctx) {
301307
perror("Failed to open device");
302-
exit(1);
308+
return -1;
303309
}
304310
if (ibv_query_gid(ctx, 1, 3, &gid)) {
305311
perror("Failed to query gid");
@@ -309,7 +315,7 @@ void MooncakeEpBuffer::init_ibgda() {
309315
ibv_pd* pd = ibv_alloc_pd(ctx);
310316
if (!pd) {
311317
perror("Failed to allocate protection domain");
312-
exit(1);
318+
return -1;
313319
}
314320
mlx5dv_pd mpd;
315321
mlx5dv_obj dv_obj = {};
@@ -334,24 +340,24 @@ void MooncakeEpBuffer::init_ibgda() {
334340
fprintf(stderr,
335341
"If the error is `Bad address`, probably because your GPU "
336342
"does not support GPUDirect RDMA.\n");
337-
exit(1);
343+
return -1;
338344
}
339345
memheap* ctrl_buf_heap = memheap_create(CTRL_BUF_SIZE);
340346
if (!ctrl_buf_heap) {
341347
perror("Failed to create memory heap");
342-
exit(1);
348+
return -1;
343349
}
344350
for (int i = 0; i < MAX_QP_COUNT; ++i) {
345351
mlx5gda_qp* qp = mlx5gda_create_rc_qp(mpd, ctrl_buf, ctrl_buf_umem,
346352
ctrl_buf_heap, pd, 16384, 1);
347353
if (!qp) {
348354
perror("Failed to create QP");
349-
exit(1);
355+
return -1;
350356
}
351357
is_roce_ = qp->port_attr.link_layer == IBV_LINK_LAYER_ETHERNET;
352358
if (mlx5gda_modify_rc_qp_rst2init(qp, 0)) {
353359
perror("Failed to mlx5gda_modify_rc_qp_rst2init");
354-
exit(1);
360+
return -1;
355361
}
356362
mlx5gda_qp_devctx qp_devctx = {
357363
.qpn = qp->qpn,
@@ -365,6 +371,7 @@ void MooncakeEpBuffer::init_ibgda() {
365371
sizeof(mlx5gda_qp_devctx), cudaMemcpyHostToDevice);
366372
qps.push_back(qp);
367373
}
374+
return 0;
368375
}
369376

370377
void MooncakeEpBuffer::sync_ib(const std::vector<int64_t>& remote_addrs,

mooncake-integration/ep/ep_py.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,19 @@ std::string getPreferredHca(c10::intrusive_ptr<c10d::Backend> backend,
5757
return mooncakeBackend->getPreferredHca(location);
5858
}
5959

60+
at::Tensor getActiveRanks(c10::intrusive_ptr<c10d::Backend> backend) {
61+
auto mooncakeBackend =
62+
c10::static_intrusive_pointer_cast<MooncakeBackend>(backend);
63+
return mooncakeBackend->getActiveRanksTensor();
64+
}
65+
6066
PYBIND11_MODULE(ep, m) {
6167
m.def("createMooncakeBackend", &createMooncakeBackend);
6268
m.def("createMooncakeCpuBackend", &createMooncakeCpuBackend);
6369
m.def("set_host_ip", &MooncakeBackend::setHostIp);
6470
m.def("set_device_filter", &MooncakeBackend::setDeviceFilter);
6571
m.def("get_preferred_hca", &getPreferredHca);
72+
m.def("get_active_ranks", &getActiveRanks);
6673

6774
py::class_<MooncakeBackend::MooncakeBackendOptions,
6875
c10::intrusive_ptr<MooncakeBackend::MooncakeBackendOptions>>(
@@ -79,6 +86,7 @@ PYBIND11_MODULE(ep, m) {
7986

8087
py::class_<MooncakeEpBuffer>(m, "Buffer")
8188
.def(py::init<int, int, int64_t, std::string>())
89+
.def("ibgda_disabled", &MooncakeEpBuffer::ibgda_disabled)
8290
.def("is_roce", &MooncakeEpBuffer::is_roce)
8391
.def("sync_ib", &MooncakeEpBuffer::sync_ib)
8492
.def("sync_roce", &MooncakeEpBuffer::sync_roce)

0 commit comments

Comments
 (0)