Skip to content

Commit 8a5ce67

Browse files
committed
Remove some changes.
1 parent 90f5810 commit 8a5ce67

File tree

3 files changed

+9
-44
lines changed

3 files changed

+9
-44
lines changed

benchmark/profile_restful_api.py

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -84,27 +84,14 @@ def __init__(self,
8484
self.model_name = model_name
8585
self.pbar = None
8686

87-
def _inference(self,
88-
req_queue: Queue,
89-
res_queue: Queue,
90-
session_id: int,
91-
stream_output: bool,
92-
image_url: str = None):
87+
def _inference(self, req_queue: Queue, res_queue: Queue, session_id: int,
88+
stream_output: bool):
9389

9490
stats = []
9591
client = APIClient(self.server_addr, api_key=self.api_key)
9692

9793
for prompt, input_seqlen, output_seqlen in iter(
9894
req_queue.get, [None, None, None]):
99-
if image_url is not None:
100-
prompt = [
101-
dict(role='user',
102-
content=[
103-
dict(type='text', text=prompt),
104-
dict(type='image_url',
105-
image_url=dict(url=image_url))
106-
])
107-
]
10895
timestamps = []
10996
timestamps.append(time.perf_counter())
11097
for output in client.chat_completions_v1(
@@ -136,8 +123,7 @@ def _inference(self,
136123
def process_request(self,
137124
requests,
138125
concurrency: int = 1,
139-
stream_output: bool = False,
140-
img_hw: str = None):
126+
stream_output: bool = False):
141127
res_queue = Queue()
142128
req_queue = Queue()
143129
threads = []
@@ -152,28 +138,10 @@ def process_request(self,
152138

153139
start = time.time()
154140

155-
if img_hw is not None:
156-
import PIL
157-
158-
from lmdeploy.vl.utils import encode_image_base64
159-
h, w = [int(s) for s in img_hw.split('x')]
160-
data = np.random.randint(low=0,
161-
high=255,
162-
size=h * w * 3,
163-
dtype=np.uint8)
164-
data = data.reshape(h, w, 3)
165-
img = PIL.Image.fromarray(data, 'RGB')
166-
encoded = encode_image_base64(img)
167-
image_url = f'data:image/jpeg;base64,{encoded}'
168-
else:
169-
image_url = None
170-
171141
# start threads
172142
for i in range(concurrency):
173143
t = Thread(target=self._inference,
174-
# args=(req_queue, res_queue, i, stream_output))
175-
args=(req_queue, res_queue, i, stream_output,
176-
image_url))
144+
args=(req_queue, res_queue, i, stream_output))
177145
t.start()
178146
threads.append(t)
179147

@@ -254,8 +222,7 @@ def main(server_addr: str,
254222
temperature: float = 1.0,
255223
stream_output: bool = False,
256224
csv: str = './profile_api_server.csv',
257-
seed: int = 0,
258-
img_hw: str = None):
225+
seed: int = 0):
259226
"""Benchmark the request througput of api server.
260227
261228
Args:
@@ -273,8 +240,6 @@ def main(server_addr: str,
273240
stream_output (bool, optional): Indicator for streaming output. Defaults to False.
274241
csv (str, optional): The path to save the result.
275242
seed (int, optional): Seed used in sampling prompts from dataset. Defaults to 0.
276-
img_hw (str, optional): The image size to benchmark vl serving, such as '512x512'.
277-
Default to None, which means to benchmark language model only.
278243
""" # noqa
279244
if not server_addr.startswith('http://'):
280245
print(f'[WARNING] server_addr of the api_server should '
@@ -293,7 +258,7 @@ def main(server_addr: str,
293258

294259
requests = sample_requests(dataset, num_prompts, engine.tokenizer)
295260

296-
engine.process_request(requests, concurrency, stream_output, img_hw)
261+
engine.process_request(requests, concurrency, stream_output)
297262

298263

299264
if __name__ == '__main__':

lmdeploy/messages.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,14 +204,14 @@ class PytorchEngineConfig:
204204
cache_max_entry_count: float = 0.8
205205
eviction_type: str = 'recompute'
206206
prefill_interval: int = 16
207-
block_size: int = 16
207+
block_size: int = 64
208208
num_cpu_blocks: int = 0
209209
num_gpu_blocks: int = 0
210210
adapters: Dict[str, str] = None
211211
max_prefill_token_num: int = 4096
212212
thread_safe: bool = False
213213
enable_prefix_caching: bool = False
214-
device_type: str = 'ascend'
214+
device_type: str = 'cuda'
215215
download_dir: str = None
216216
revision: str = None
217217

lmdeploy/pytorch/devices/device_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
@dataclass
99
class DeviceContext:
10-
device_type: str = 'ascend'
10+
device_type: str = 'cuda'
1111

1212

1313
DefaultContext = DeviceContext()

0 commit comments

Comments
 (0)