Skip to content

Commit 367e5be

Browse files
authored
fix(utils): handle Unicode characters in serializeProperty (#6654)
Replace btoa/atob with Unicode-safe base64 encoding/decoding functions to properly handle non-Latin1 characters (emoji, CJK, €, etc.) during SSR hydration. The btoa() function only accepts Latin1 characters (code points 0-255) and throws InvalidCharacterError on Unicode content. This fix uses TextEncoder/TextDecoder with UTF-8 encoding to properly handle all Unicode characters. fixes: #6643
1 parent 074c620 commit 367e5be

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed

src/utils/serialize.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,40 @@ import { SERIALIZED_PREFIX } from './constants';
22
import { LocalValue } from './local-value';
33
import { RemoteValue } from './remote-value';
44

5+
/**
6+
* Unicode-safe base64 encoding that handles characters outside Latin1 range.
7+
* Unlike btoa(), this properly handles Unicode characters including emoji,
8+
* CJK characters, and currency symbols like €.
9+
* @param {string} str - The string to encode.
10+
* @returns {string} Base64 encoded string.
11+
*/
12+
function encodeBase64Unicode(str: string): string {
13+
// First encode to UTF-8, then convert to base64
14+
const utf8Bytes = new TextEncoder().encode(str);
15+
let binary = '';
16+
for (let i = 0; i < utf8Bytes.length; i++) {
17+
binary += String.fromCharCode(utf8Bytes[i]);
18+
}
19+
return btoa(binary);
20+
}
21+
22+
/**
23+
* Unicode-safe base64 decoding that handles characters outside Latin1 range.
24+
* Unlike atob(), this properly decodes Unicode characters including emoji,
25+
* CJK characters, and currency symbols like €.
26+
* @param {string} base64 - The base64 string to decode.
27+
* @returns {string} Decoded string.
28+
*/
29+
function decodeBase64Unicode(base64: string): string {
30+
// First decode from base64, then convert from UTF-8
31+
const binary = atob(base64);
32+
const bytes = new Uint8Array(binary.length);
33+
for (let i = 0; i < binary.length; i++) {
34+
bytes[i] = binary.charCodeAt(i);
35+
}
36+
return new TextDecoder().decode(bytes);
37+
}
38+
539
/**
640
* Serialize a value to a string that can be deserialized later.
741
* @param {unknown} value - The value to serialize.
@@ -20,7 +54,7 @@ export function serializeProperty(value: unknown) {
2054
}
2155

2256
const arg = LocalValue.getArgument(value);
23-
return (SERIALIZED_PREFIX + btoa(JSON.stringify(arg))) as string;
57+
return (SERIALIZED_PREFIX + encodeBase64Unicode(JSON.stringify(arg))) as string;
2458
}
2559

2660
/**
@@ -33,5 +67,5 @@ export function deserializeProperty(value: string) {
3367
if (typeof value !== 'string' || !value.startsWith(SERIALIZED_PREFIX)) {
3468
return value;
3569
}
36-
return RemoteValue.fromLocalValue(JSON.parse(atob(value.slice(SERIALIZED_PREFIX.length))));
70+
return RemoteValue.fromLocalValue(JSON.parse(decodeBase64Unicode(value.slice(SERIALIZED_PREFIX.length))));
3771
}

src/utils/test/serialize.spec.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,22 @@ describe('serialize', () => {
110110

111111
expect(() => serializeProperty(circular)).toThrow();
112112
});
113+
114+
it('should serialize and deserialize Unicode characters including emoji, CJK, and currency symbols', () => {
115+
const toSerialize = {
116+
euroPrice: 'Price: 100€',
117+
japaneseText: '日本語',
118+
chineseText: '中文字符',
119+
emoji: '🎉🚀👋',
120+
accentedChars: 'café résumé naïve',
121+
mixed: {
122+
title: 'Welcome to 日本! 🎌',
123+
cost: '€50.00',
124+
description: 'CJK: 中文 한국어 日本語',
125+
},
126+
};
127+
128+
const deserialized = deserializeProperty(serializeProperty(toSerialize) as string);
129+
expect(deserialized).toEqual(toSerialize);
130+
});
113131
});

0 commit comments

Comments
 (0)