Skip to content

Commit db267e3

Browse files
committed
local changes
1 parent 6135f75 commit db267e3

File tree

3 files changed

+17
-3
lines changed

3 files changed

+17
-3
lines changed

docker/Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ endef
120120
DOCKER_RUN_OPTS ?= --rm -it --ipc=host --ulimit stack=67108864 $(if $(filter 0,$(IS_ROOTLESS)),--ulimit memlock=-1)
121121
DOCKER_RUN_ARGS ?=
122122
# Check if NVIDIA_VISIBLE_DEVICES is set and not empty
123-
NVIDIA_VISIBLE_DEVICES_VAL = $(shell echo $$NVIDIA_VISIBLE_DEVICES)
123+
NVIDIA_VISIBLE_DEVICES_VAL = $(shell echo $$NV_GPU)
124124
ifeq ($(NVIDIA_VISIBLE_DEVICES_VAL),)
125125
# If empty or not set, use all GPUs
126126
GPU_OPTS ?= --gpus=all
@@ -149,17 +149,23 @@ ifeq ($(LOCAL_USER),1)
149149
$(call add_local_user,$(IMAGE_WITH_TAG))
150150
endif
151151
docker run $(DOCKER_RUN_OPTS) $(DOCKER_RUN_ARGS) \
152+
--network=host \
152153
$(GPU_OPTS) \
153154
--volume $(SOURCE_DIR):$(CODE_DIR) \
154155
$(if $(and $(filter 1,$(LOCAL_USER)),$(shell [ -w "$(USER_CACHE_DIR)" ] && echo 1)),--volume $(USER_CACHE_DIR):/home/$(USER_NAME)/.cache:rw) \
155156
--env "CCACHE_DIR=$(CCACHE_DIR)" \
156157
--env "CCACHE_BASEDIR=$(CODE_DIR)" \
157158
--env "CONAN_HOME=$(CONAN_DIR)" \
159+
--env "HF_HOME=/home/scratch.williamz_gpu/code/trtc/builder/hf_cache" \
160+
--volume /home/scratch.trt_llm_data:/home/scratch.trt_llm_data \
161+
--volume /home/scratch.williamz_gpu:/home/scratch.williamz_gpu \
158162
--workdir $(WORK_DIR) \
159163
--hostname $(shell hostname)-$* \
160164
--name $(CONTAINER_NAME)-$*-$(USER_NAME) \
161165
--tmpfs /tmp:exec \
162166
$(IMAGE_WITH_TAG)$(IMAGE_TAG_SUFFIX) $(RUN_CMD)
167+
# $(if $(filter 1,$(LOCAL_USER)),--volume ${HOME_DIR}/.cache:/home/${USER_NAME}/.cache:rw) \
168+
# --env TLLM_LLMAPI_BUILD_CACHE_ROOT=/home/scratch.williamz_gpu/trtllm_llmapi_cache \
163169
164170
devel_%: STAGE = devel
165171
tritondevel_%: STAGE = tritondevel

tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ class HfWeightLoader(BaseWeightLoader):
2525

2626
def load_weights(self, checkpoint_dir: str) -> dict[str, Any]:
2727
weight_files = glob.glob(f"{checkpoint_dir}/*.safetensors")
28+
print(f"==== BEFORE: {len(weight_files)}")
29+
weight_files = [x for x in weight_files if "consolidated" not in x]
30+
print(f"==== AFTER: {len(weight_files)}")
2831
if weight_files:
2932
# Prefetch the weight files to CPU memory if the size is less than 90% of the available memory.
3033
# This is a heuristic to avoid prefetching files that are too large and causing file cache thrashing.

tensorrt_llm/builder.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,8 +685,13 @@ def from_json_file(cls, config_file, plugin_config=None):
685685
def to_dict(self):
686686
output = copy.deepcopy(self.__dict__)
687687
# the enum KVCacheType cannot be converted automatically
688-
if output.get('kv_cache_type', None) is not None:
689-
output['kv_cache_type'] = str(output['kv_cache_type'].name)
688+
if (kv_cache_type := output.get('kv_cache_type', None)) is not None:
689+
if isinstance(kv_cache_type, KVCacheType):
690+
output['kv_cache_type'] = str(kv_cache_type.name)
691+
elif isinstance(kv_cache_type, str):
692+
output['kv_cache_type'] = kv_cache_type
693+
else:
694+
raise TypeError(f"Unknown type: {type(kv_cache_type)}")
690695
output['plugin_config'] = output['plugin_config'].to_dict()
691696
output['lora_config'] = output['lora_config'].to_dict()
692697
output['auto_parallel_config'] = output['auto_parallel_config'].to_dict(

0 commit comments

Comments
 (0)