From 7945f41ccc63510a3fcb68d7c00e0c48143c4ed3 Mon Sep 17 00:00:00 2001 From: Alex Beregszaszi Date: Fri, 5 Aug 2016 10:56:59 +0100 Subject: [PATCH] Include UTF8 helpers in libdevcore --- libdevcore/UTF8.cpp | 86 +++++++++++++++++++++++++++++++++++++++++++++ libdevcore/UTF8.h | 40 +++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 libdevcore/UTF8.cpp create mode 100644 libdevcore/UTF8.h diff --git a/libdevcore/UTF8.cpp b/libdevcore/UTF8.cpp new file mode 100644 index 000000000..4bae75efa --- /dev/null +++ b/libdevcore/UTF8.cpp @@ -0,0 +1,86 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file UTF8.cpp + * @author Alex Beregszaszi + * @date 2016 + * + * UTF-8 related helpers + */ + +#include "UTF8.h" + + +namespace dev +{ + +namespace utf8 +{ + + +bool validate(std::string input, int &invalidPosition) +{ + const int length = input.length(); + bool valid = true; + int i = 0; + + for (; i < length; i++) + { + if ((unsigned char)input[i] < 0x80) + continue; + + int count = 0; + switch(input[i] & 0xe0) { + case 0xc0: count = 1; break; + case 0xe0: count = 2; break; + case 0xf0: count = 3; break; + default: break; + } + + if (count == 0) + { + valid = false; + break; + } + + if ((i + count) >= length) + { + valid = false; + break; + } + + for (int j = 0; j < count; j++) + { + i++; + if ((input[i] & 0xc0) != 0x80) + { + valid = false; + break; + } + } + } + + if (valid) + return true; + + invalidPosition = i; + return false; +} + + +} + +} diff --git a/libdevcore/UTF8.h b/libdevcore/UTF8.h new file mode 100644 index 000000000..39f76a116 --- /dev/null +++ b/libdevcore/UTF8.h @@ -0,0 +1,40 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see . +*/ +/** @file UTF8.h + * @author Alex Beregszaszi + * @date 2016 + * + * UTF-8 related helpers + */ + +#pragma once + +#include + +namespace dev +{ + +namespace utf8 +{ + +/// Validate an input for UTF8 encoding +/// @returns true if it is invalid and the first invalid position in invalidPosition +bool validate(std::string input, int &invalidPosition); + +} + +}