From a08c6bae92da418fc1d4e99572a76aa9e68a5d4d Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Mon, 24 Oct 2022 16:49:20 +0200 Subject: [PATCH] Add lossy parameter to fromUtf8 --- CHANGELOG.md | 2 ++ packages/encoding/src/utf8.spec.ts | 12 ++++++++++++ packages/encoding/src/utf8.ts | 11 +++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ba34fd9..5e641671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to - @cosmjs/tendermint-rpc: Add `HttpBatchClient`, which implements `RpcClient`, supporting batch RPC requests ([#1300]). +- @cosmjs/encoding: Add `lossy` parameter to `fromUtf8` allowing the use of a + replacement charater instead of throwing. ## [0.29.2] - 2022-10-13 diff --git a/packages/encoding/src/utf8.spec.ts b/packages/encoding/src/utf8.spec.ts index be52ef9d..2f7af8bc 100644 --- a/packages/encoding/src/utf8.spec.ts +++ b/packages/encoding/src/utf8.spec.ts @@ -1,3 +1,4 @@ +import { toAscii } from "./ascii"; import { fromUtf8, toUtf8 } from "./utf8"; describe("utf8", () => { @@ -59,4 +60,15 @@ describe("utf8", () => { // Broken UTF8 example from https://github.com/nodejs/node/issues/16894 expect(() => fromUtf8(new Uint8Array([0xf0, 0x80, 0x80]))).toThrow(); }); + + describe("fromUtf8", () => { + it("replaces characters in lossy mode", () => { + expect(fromUtf8(new Uint8Array([]), true)).toEqual(""); + expect(fromUtf8(new Uint8Array([0x61, 0x62, 0x63]), true)).toEqual("abc"); + // Example from https://doc.rust-lang.org/stable/std/string/struct.String.html#method.from_utf8_lossy + expect( + fromUtf8(new Uint8Array([...toAscii("Hello "), 0xf0, 0x90, 0x80, ...toAscii("World")]), true), + ).toEqual("Hello �World"); + }); + }); }); diff --git a/packages/encoding/src/utf8.ts b/packages/encoding/src/utf8.ts index 79380bba..bc063001 100644 --- a/packages/encoding/src/utf8.ts +++ b/packages/encoding/src/utf8.ts @@ -12,6 +12,13 @@ export function toUtf8(str: string): Uint8Array { return new TextEncoder().encode(str); } -export function fromUtf8(data: Uint8Array): string { - return new TextDecoder("utf-8", { fatal: true }).decode(data); +/** + * Takes UTF-8 data and decodes it to a string. + * + * In lossy mode, the replacement character � is used to substitude invalid + * encodings. By default lossy mode is off and invalid data will lead to exceptions. + */ +export function fromUtf8(data: Uint8Array, lossy = false): string { + const fatal = !lossy; + return new TextDecoder("utf-8", { fatal }).decode(data); }