File tree Expand file tree Collapse file tree 7 files changed +8
-8
lines changed Expand file tree Collapse file tree 7 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ default-members = [
2020resolver = " 2"
2121
2222[workspace .package ]
23- version = " 2.3.1 -dev0"
23+ version = " 2.3.2 -dev0"
2424edition = " 2021"
2525authors = [" Olivier Dehaene" ]
2626homepage = " https://github.com/huggingface/text-generation-inference"
Original file line number Diff line number Diff line change @@ -83,7 +83,7 @@ model=HuggingFaceH4/zephyr-7b-beta
8383volume=$PWD /data
8484
8585docker run --gpus all --shm-size 1g -p 8080:80 -v $volume :/data \
86- ghcr.io/huggingface/text-generation-inference:2.3.0 --model-id $model
86+ ghcr.io/huggingface/text-generation-inference:2.3.1 --model-id $model
8787```
8888
8989And then you can make requests like
Original file line number Diff line number Diff line change 1010 "name" : " Apache 2.0" ,
1111 "url" : " https://www.apache.org/licenses/LICENSE-2.0"
1212 },
13- "version" : " 2.3.1 -dev0"
13+ "version" : " 2.3.2 -dev0"
1414 },
1515 "paths" : {
1616 "/" : {
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
1111docker run --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
1212 --device=/dev/kfd --device=/dev/dri --group-add video \
1313 --ipc=host --shm-size 256g --net host -v $volume :/data \
14- ghcr.io/huggingface/text-generation-inference:2.3.0 -rocm \
14+ ghcr.io/huggingface/text-generation-inference:2.3.1 -rocm \
1515 --model-id $model
1616```
1717
Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
1212docker run --rm --privileged --cap-add=sys_nice \
1313 --device=/dev/dri \
1414 --ipc=host --shm-size 1g --net host -v $volume :/data \
15- ghcr.io/huggingface/text-generation-inference:2.3.0 -intel-xpu \
15+ ghcr.io/huggingface/text-generation-inference:2.3.1 -intel-xpu \
1616 --model-id $model --cuda-graphs 0
1717```
1818
@@ -29,7 +29,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
2929docker run --rm --privileged --cap-add=sys_nice \
3030 --device=/dev/dri \
3131 --ipc=host --shm-size 1g --net host -v $volume :/data \
32- ghcr.io/huggingface/text-generation-inference:2.3.0 -intel-cpu \
32+ ghcr.io/huggingface/text-generation-inference:2.3.1 -intel-cpu \
3333 --model-id $model --cuda-graphs 0
3434```
3535
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
1111volume=$PWD /data # share a volume with the Docker container to avoid downloading weights every run
1212
1313docker run --gpus all --shm-size 64g -p 8080:80 -v $volume :/data \
14- ghcr.io/huggingface/text-generation-inference:2.3.0 \
14+ ghcr.io/huggingface/text-generation-inference:2.3.1 \
1515 --model-id $model
1616```
1717
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
1111volume=$PWD /data # share a volume with the Docker container to avoid downloading weights every run
1212
1313docker run --gpus all --shm-size 1g -p 8080:80 -v $volume :/data \
14- ghcr.io/huggingface/text-generation-inference:2.3.0 \
14+ ghcr.io/huggingface/text-generation-inference:2.3.1 \
1515 --model-id $model
1616```
1717
You can’t perform that action at this time.
0 commit comments