Skip to content

Commit

Permalink
fix: ut8-string encode/decode
Browse files Browse the repository at this point in the history
There was a bug by which we were storing the strings as unicode
bytes instead of utf8 bytes. This was a bug since the specification
clearly says that the encoding must be utf8.

This commit fixes such bug using the TextEncode / TextDecode tools,
which are widely supported by modern browsers and node versions.
  • Loading branch information
gagdiez committed Nov 21, 2023
1 parent ccf0b00 commit 2d28b1a
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 24 deletions.
4 changes: 3 additions & 1 deletion borsh-ts/deserialize.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { ArrayType, DecodeTypes, MapType, IntegerType, OptionType, Schema, SetType, StructType, integers, EnumType } from './types.js';
import { DecodeBuffer } from './buffer.js';

import * as utfUtil from 'util';

export class BorshDeserializer {
buffer: DecodeBuffer;

Expand Down Expand Up @@ -54,7 +56,7 @@ export class BorshDeserializer {
decode_string(): string {
const len: number = this.decode_integer('u32') as number;
const buffer = new Uint8Array(this.buffer.consume_bytes(len));
return String.fromCharCode.apply(null, buffer);
return new utfUtil.TextDecoder().decode(buffer);
}

decode_boolean(): boolean {
Expand Down
14 changes: 7 additions & 7 deletions borsh-ts/serialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { ArrayType, MapType, IntegerType, OptionType, Schema, SetType, StructTyp
import { EncodeBuffer } from './buffer.js';
import * as utils from './utils.js';

import * as utfUtil from 'util';

export class BorshSerializer {
encoded: EncodeBuffer;
fieldPath: string[];
Expand Down Expand Up @@ -61,15 +63,13 @@ export class BorshSerializer {

encode_string(value: unknown): void {
this.checkTypes && utils.expect_type(value, 'string', this.fieldPath);
const _value = value as string;

// 4 bytes for length
this.encoded.store_value(_value.length, 'u32');
// encode to utf8 bytes
const utf8Bytes = new utfUtil.TextEncoder().encode(value as string);

// string bytes
for (let i = 0; i < _value.length; i++) {
this.encoded.store_value(_value.charCodeAt(i), 'u8');
}
// 4 bytes for length + string bytes
this.encoded.store_value(utf8Bytes.length, 'u32');
this.encoded.store_bytes(utf8Bytes);
}

encode_boolean(value: unknown): void {
Expand Down
3 changes: 3 additions & 0 deletions borsh-ts/test/(de)serialize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ test('serialize booleans', async () => {

test('serialize strings', async () => {
check_roundtrip('h"i', 'string', [3, 0, 0, 0, 104, 34, 105]);
check_roundtrip('Chévere', 'string', [8, 0, 0, 0, 67, 104, 195, 169, 118, 101, 114, 101]);
check_roundtrip('👍', 'string', [4, 0, 0, 0, 240, 159, 145, 141]);
check_roundtrip('óñ 漢', 'string', [8, 0, 0, 0, 195, 179, 195, 177, 32, 230, 188, 162]);
});

test('serialize floats', async () => {
Expand Down
26 changes: 25 additions & 1 deletion lib/cjs/deserialize.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,32 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
exports.__esModule = true;
exports.BorshDeserializer = void 0;
var types_js_1 = require("./types.js");
var buffer_js_1 = require("./buffer.js");
var utfUtil = __importStar(require("util"));
var BorshDeserializer = /** @class */ (function () {
function BorshDeserializer(bufferArray) {
this.buffer = new buffer_js_1.DecodeBuffer(bufferArray);
Expand Down Expand Up @@ -55,7 +79,7 @@ var BorshDeserializer = /** @class */ (function () {
BorshDeserializer.prototype.decode_string = function () {
var len = this.decode_integer('u32');
var buffer = new Uint8Array(this.buffer.consume_bytes(len));
return String.fromCharCode.apply(null, buffer);
return new utfUtil.TextDecoder().decode(buffer);
};
BorshDeserializer.prototype.decode_boolean = function () {
return this.buffer.consume_value('u8') > 0;
Expand Down
13 changes: 6 additions & 7 deletions lib/cjs/serialize.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ exports.BorshSerializer = void 0;
var types_js_1 = require("./types.js");
var buffer_js_1 = require("./buffer.js");
var utils = __importStar(require("./utils.js"));
var utfUtil = __importStar(require("util"));
var BorshSerializer = /** @class */ (function () {
function BorshSerializer(checkTypes) {
this.encoded = new buffer_js_1.EncodeBuffer();
Expand Down Expand Up @@ -83,13 +84,11 @@ var BorshSerializer = /** @class */ (function () {
};
BorshSerializer.prototype.encode_string = function (value) {
this.checkTypes && utils.expect_type(value, 'string', this.fieldPath);
var _value = value;
// 4 bytes for length
this.encoded.store_value(_value.length, 'u32');
// string bytes
for (var i = 0; i < _value.length; i++) {
this.encoded.store_value(_value.charCodeAt(i), 'u8');
}
// encode to utf8 bytes
var utf8Bytes = new utfUtil.TextEncoder().encode(value);
// 4 bytes for length + string bytes
this.encoded.store_value(utf8Bytes.length, 'u32');
this.encoded.store_bytes(utf8Bytes);
};
BorshSerializer.prototype.encode_boolean = function (value) {
this.checkTypes && utils.expect_type(value, 'boolean', this.fieldPath);
Expand Down
3 changes: 2 additions & 1 deletion lib/esm/deserialize.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { integers } from './types.js';
import { DecodeBuffer } from './buffer.js';
import * as utfUtil from 'util';
var BorshDeserializer = /** @class */ (function () {
function BorshDeserializer(bufferArray) {
this.buffer = new DecodeBuffer(bufferArray);
Expand Down Expand Up @@ -52,7 +53,7 @@ var BorshDeserializer = /** @class */ (function () {
BorshDeserializer.prototype.decode_string = function () {
var len = this.decode_integer('u32');
var buffer = new Uint8Array(this.buffer.consume_bytes(len));
return String.fromCharCode.apply(null, buffer);
return new utfUtil.TextDecoder().decode(buffer);
};
BorshDeserializer.prototype.decode_boolean = function () {
return this.buffer.consume_value('u8') > 0;
Expand Down
13 changes: 6 additions & 7 deletions lib/esm/serialize.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { integers } from './types.js';
import { EncodeBuffer } from './buffer.js';
import * as utils from './utils.js';
import * as utfUtil from 'util';
var BorshSerializer = /** @class */ (function () {
function BorshSerializer(checkTypes) {
this.encoded = new EncodeBuffer();
Expand Down Expand Up @@ -57,13 +58,11 @@ var BorshSerializer = /** @class */ (function () {
};
BorshSerializer.prototype.encode_string = function (value) {
this.checkTypes && utils.expect_type(value, 'string', this.fieldPath);
var _value = value;
// 4 bytes for length
this.encoded.store_value(_value.length, 'u32');
// string bytes
for (var i = 0; i < _value.length; i++) {
this.encoded.store_value(_value.charCodeAt(i), 'u8');
}
// encode to utf8 bytes
var utf8Bytes = new utfUtil.TextEncoder().encode(value);
// 4 bytes for length + string bytes
this.encoded.store_value(utf8Bytes.length, 'u32');
this.encoded.store_bytes(utf8Bytes);
};
BorshSerializer.prototype.encode_boolean = function (value) {
this.checkTypes && utils.expect_type(value, 'boolean', this.fieldPath);
Expand Down

0 comments on commit 2d28b1a

Please sign in to comment.