Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -997,8 +997,8 @@ The default save path for the configuration file is `PaddleOCR-VL.yaml`. Modify
VLRecognition:
...
genai_config:
backend: vllm
server_url: http://127.0.0.1:8118
backend: vllm-server
server_url: http://127.0.0.1:8118/v1
```

After that, the modified configuration file can be used for production line invocation. For example, invoke it through the CLI:
Expand Down Expand Up @@ -1290,7 +1290,7 @@ Below are the API references for basic service-based deployment and examples of
</tr>
<tr>
<td><code>promptLabel</code></td>
<td><code>string</code>|<code>object</code>|<code>null</code></td>
<td><code>string</code>|<code>null</code></td>
<td>Please refer to the description of the <code>prompt_label</code> parameter in the <code>predict</code> method of the PaddleOCR-VL object.</td>
<td>No</td>
</tr>
Expand Down
6 changes: 3 additions & 3 deletions docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
Original file line number Diff line number Diff line change
Expand Up @@ -1038,8 +1038,8 @@ paddlex --get_pipeline_config PaddleOCR-VL
VLRecognition:
...
genai_config:
backend: vllm
server_url: http://127.0.0.1:8118
backend: vllm-server
server_url: http://127.0.0.1:8118/v1
```

之后,可以使用修改好的配置文件进行产线调用。例如通过 CLI 调用:
Expand Down Expand Up @@ -1334,7 +1334,7 @@ INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
</tr>
<tr>
<td><code>promptLabel</code></td>
<td><code>string</code> | <code>object</code> | <code>null</code></td>
<td><code>string</code> | <code>null</code></td>
<td>请参阅PaddleOCR-VL对象中 <code>predict</code> 方法的 <code>prompt_label</code> 参数相关说明。</td>
<td>否</td>
</tr>
Expand Down
3 changes: 1 addition & 2 deletions paddlex/inference/models/base/predictor/base_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from ....utils.benchmark import ENTRY_POINT_NAME, benchmark
from ....utils.hpi import HPIConfig, HPIInfo
from ....utils.io import YAMLReader
from ....utils.model_paths import get_model_paths
from ....utils.pp_option import PaddlePredictorOption
from ...common import HPInfer, PaddleInfer
from ...common.genai import GenAIClient, GenAIConfig, need_local_model
Expand Down Expand Up @@ -156,7 +155,7 @@ def __init__(

self.batch_sampler.batch_size = batch_size

if self.model_dir and get_model_paths(self.model_dir, self.MODEL_FILE_PREFIX):
if self._use_local_model:
self._use_hpip = use_hpip
if not use_hpip:
self._pp_option = self._prepare_pp_option(pp_option, device)
Expand Down
12 changes: 4 additions & 8 deletions paddlex/inference/models/common/genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def stop(self):
self.loop = None
self.thread = None

def run_async(self, coro, return_future=False):
def run_async(self, coro):
if not self.is_running():
raise RuntimeError("Event loop is not running")

Expand Down Expand Up @@ -164,6 +164,8 @@ def __init__(

self.backend = backend
self._max_concurrency = max_concurrency
if model_name is None:
model_name = run_async(self._get_model_name(), timeout=10)
self._model_name = model_name

if "api_key" not in kwargs:
Expand All @@ -177,12 +179,6 @@ def openai_client(self):
return self._client

def create_chat_completion(self, messages, *, return_future=False, **kwargs):
if self._model_name is not None:
model_name = self._model_name
else:
model_name = run_async(self._get_model_name(), timeout=10)
self._model_name = model_name

async def _create_chat_completion_with_semaphore(*args, **kwargs):
async with self._semaphore:
return await self._client.chat.completions.create(
Expand All @@ -192,7 +188,7 @@ async def _create_chat_completion_with_semaphore(*args, **kwargs):

return run_async(
_create_chat_completion_with_semaphore(
model=model_name,
model=self._model_name,
messages=messages,
**kwargs,
),
Expand Down
2 changes: 1 addition & 1 deletion paddlex/inference/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ def is_bfloat16_available(device):
device_type, _ = parse_device(device)
return (
"npu" in get_device_type() or paddle.amp.is_bfloat16_supported()
) and device_type in ("gpu", "npu", "xpu", "mlu", "dcu")
) and device_type in ("gpu", "npu", "xpu", "mlu")