@@ -221,7 +221,8 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
221221 SetMethodNoSideEffect (isolate, target, " decodeUTF8" , DecodeUTF8);
222222 SetMethodNoSideEffect (isolate, target, " toASCII" , ToASCII);
223223 SetMethodNoSideEffect (isolate, target, " toUnicode" , ToUnicode);
224- SetMethodNoSideEffect (isolate, target, " decodeLatin1" , DecodeLatin1);
224+ SetMethodNoSideEffect (
225+ isolate, target, " decodeWindows1252" , DecodeWindows1252);
225226}
226227
227228void BindingData::CreatePerContextProperties (Local<Object> target,
@@ -239,10 +240,10 @@ void BindingData::RegisterTimerExternalReferences(
239240 registry->Register (DecodeUTF8);
240241 registry->Register (ToASCII);
241242 registry->Register (ToUnicode);
242- registry->Register (DecodeLatin1 );
243+ registry->Register (DecodeWindows1252 );
243244}
244245
245- void BindingData::DecodeLatin1 (const FunctionCallbackInfo<Value>& args) {
246+ void BindingData::DecodeWindows1252 (const FunctionCallbackInfo<Value>& args) {
246247 Environment* env = Environment::GetCurrent (args);
247248
248249 CHECK_GE (args.Length (), 1 );
@@ -255,7 +256,6 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
255256 }
256257
257258 bool ignore_bom = args[1 ]->IsTrue ();
258- bool has_fatal = args[2 ]->IsTrue ();
259259
260260 ArrayBufferViewContents<uint8_t > buffer (args[0 ]);
261261 const uint8_t * data = buffer.data ();
@@ -270,20 +270,115 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
270270 return args.GetReturnValue ().SetEmptyString ();
271271 }
272272
273- std::string result (length * 2 , ' \0 ' );
273+ // Windows-1252 specific mapping for bytes 128-159
274+ // These differ from Latin-1/ISO-8859-1
275+ static const uint16_t windows1252_mapping[32 ] = {
276+ 0x20AC , 0x0081 , 0x201A , 0x0192 , 0x201E , 0x2026 , 0x2020 , 0x2021 , // 80-87
277+ 0x02C6 , 0x2030 , 0x0160 , 0x2039 , 0x0152 , 0x008D , 0x017D , 0x008F , // 88-8F
278+ 0x0090 , 0x2018 , 0x2019 , 0x201C , 0x201D , 0x2022 , 0x2013 , 0x2014 , // 90-97
279+ 0x02DC , 0x2122 , 0x0161 , 0x203A , 0x0153 , 0x009D , 0x017E , 0x0178 // 98-9F
280+ };
281+
282+ std::string result;
283+ result.reserve (length * 3 ); // Reserve space for UTF-8 output
284+
285+ for (size_t i = 0 ; i < length; i++) {
286+ uint8_t byte = data[i];
287+ uint32_t codepoint;
288+
289+ // Check if byte is in the special Windows-1252 range (128-159)
290+ if (byte >= 0x80 && byte <= 0x9F ) {
291+ codepoint = windows1252_mapping[byte - 0x80 ];
292+ } else {
293+ // For all other bytes, Windows-1252 is identical to Latin-1
294+ codepoint = byte;
295+ }
274296
275- size_t written = simdutf::convert_latin1_to_utf8 (
276- reinterpret_cast <const char *>(data), length, result.data ());
297+ // Convert codepoint to UTF-8
298+ if (codepoint < 0x80 ) {
299+ result.push_back (static_cast <char >(codepoint));
300+ } else if (codepoint < 0x800 ) {
301+ result.push_back (static_cast <char >(0xC0 | (codepoint >> 6 )));
302+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
303+ } else {
304+ result.push_back (static_cast <char >(0xE0 | (codepoint >> 12 )));
305+ result.push_back (static_cast <char >(0x80 | ((codepoint >> 6 ) & 0x3F )));
306+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
307+ }
308+ }
277309
278- if (has_fatal && written == 0 ) {
279- return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA (
280- env-> isolate (), " The encoded data was not valid for encoding latin1 " );
310+ Local<Value> ret;
311+ if ( ToV8Value (env-> context (), result, env-> isolate ()). ToLocal (&ret)) {
312+ args. GetReturnValue (). Set (ret );
281313 }
314+ }
282315
283- std::string_view view (result.c_str (), written);
316+ void BindingData::DecodeWindows1252 (const FunctionCallbackInfo<Value>& args) {
317+ Environment* env = Environment::GetCurrent (args);
318+
319+ CHECK_GE (args.Length (), 1 );
320+ if (!(args[0 ]->IsArrayBuffer () || args[0 ]->IsSharedArrayBuffer () ||
321+ args[0 ]->IsArrayBufferView ())) {
322+ return node::THROW_ERR_INVALID_ARG_TYPE (
323+ env->isolate (),
324+ " The \" input\" argument must be an instance of ArrayBuffer, "
325+ " SharedArrayBuffer, or ArrayBufferView." );
326+ }
327+
328+ bool ignore_bom = args[1 ]->IsTrue ();
329+
330+ ArrayBufferViewContents<uint8_t > buffer (args[0 ]);
331+ const uint8_t * data = buffer.data ();
332+ size_t length = buffer.length ();
333+
334+ if (ignore_bom && length > 0 && data[0 ] == 0xFF ) {
335+ data++;
336+ length--;
337+ }
338+
339+ if (length == 0 ) {
340+ return args.GetReturnValue ().SetEmptyString ();
341+ }
342+
343+ // Windows-1252 specific mapping for bytes 128-159
344+ // These differ from Latin-1/ISO-8859-1
345+ static const uint16_t windows1252_mapping[32 ] = {
346+ 0x20AC , 0x0081 , 0x201A , 0x0192 , 0x201E , 0x2026 , 0x2020 , 0x2021 , // 80-87
347+ 0x02C6 , 0x2030 , 0x0160 , 0x2039 , 0x0152 , 0x008D , 0x017D , 0x008F , // 88-8F
348+ 0x0090 , 0x2018 , 0x2019 , 0x201C , 0x201D , 0x2022 , 0x2013 , 0x2014 , // 90-97
349+ 0x02DC , 0x2122 , 0x0161 , 0x203A , 0x0153 , 0x009D , 0x017E , 0x0178 // 98-9F
350+ };
351+
352+ std::string result;
353+ result.reserve (length * 3 ); // Reserve space for UTF-8 output
354+
355+ for (size_t i = 0 ; i < length; i++) {
356+ uint8_t byte = data[i];
357+ uint32_t codepoint;
358+
359+ // Check if byte is in the special Windows-1252 range (128-159)
360+ if (byte >= 0x80 && byte <= 0x9F ) {
361+ codepoint = windows1252_mapping[byte - 0x80 ];
362+ } else {
363+ // For all other bytes, Windows-1252 is identical to Latin-1
364+ codepoint = byte;
365+ }
366+
367+ // Convert codepoint to UTF-8
368+ if (codepoint < 0x80 ) {
369+ result.push_back (static_cast <char >(codepoint));
370+ } else if (codepoint < 0x800 ) {
371+ result.push_back (static_cast <char >(0xC0 | (codepoint >> 6 )));
372+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
373+ } else {
374+ result.push_back (static_cast <char >(0xE0 | (codepoint >> 12 )));
375+ result.push_back (static_cast <char >(0x80 | ((codepoint >> 6 ) & 0x3F )));
376+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
377+ }
378+ }
284379
285380 Local<Value> ret;
286- if (ToV8Value (env->context (), view , env->isolate ()).ToLocal (&ret)) {
381+ if (ToV8Value (env->context (), result , env->isolate ()).ToLocal (&ret)) {
287382 args.GetReturnValue ().Set (ret);
288383 }
289384}
0 commit comments