Skip to content

Commit c05b525

Browse files
authored
better handling of tokenization (#708)
1 parent 937a766 commit c05b525

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

src/platform/tokenizer/node/tokenizer.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,12 @@ class BPETokenizer extends Disposable implements ITokenizer {
145145
case Raw.ChatCompletionContentPartKind.Opaque:
146146
return text.tokenUsage || 0;
147147
case Raw.ChatCompletionContentPartKind.Image:
148-
if (text.imageUrl.url.startsWith('data')) {
149-
return calculateImageTokenCost(text.imageUrl.url, text.imageUrl.detail);
148+
if (text.imageUrl.url.startsWith('data:image/')) {
149+
try {
150+
return calculateImageTokenCost(text.imageUrl.url, text.imageUrl.detail);
151+
} catch {
152+
return this._textTokenLength(text.imageUrl.url);
153+
}
150154
}
151155
return this._textTokenLength(text.imageUrl.url);
152156
case Raw.ChatCompletionContentPartKind.CacheBreakpoint:
@@ -210,8 +214,12 @@ class BPETokenizer extends Disposable implements ITokenizer {
210214
if (casted.type === 'text') {
211215
numTokens += await this.tokenLength(casted.text);
212216
} else if (casted.type === 'image_url' && casted.image_url) {
213-
if (casted.image_url.url.startsWith('data')) {
214-
numTokens += calculateImageTokenCost(casted.image_url.url, casted.image_url.detail);
217+
if (casted.image_url.url.startsWith('data:image/')) {
218+
try {
219+
numTokens += calculateImageTokenCost(casted.image_url.url, casted.image_url.detail);
220+
} catch {
221+
numTokens += await this.tokenLength(casted.image_url.url);
222+
}
215223
} else {
216224
numTokens += await this.tokenLength(casted.image_url.url);
217225
}

0 commit comments

Comments
 (0)