Przeglądaj źródła

Fixed regression in UTF16 decoding (fixes #1173)

Benoit Blanchon 6 lat temu
rodzic
commit
09d4b2cd38

+ 5 - 0
CHANGELOG.md

@@ -1,6 +1,11 @@
 ArduinoJson: change log
 =======================
 
+HEAD
+----
+
+* Fixed regression in UTF16 decoding (issue #1173)
+
 v6.14.0 (2020-01-16)
 -------
 

+ 3 - 0
extras/tests/JsonDeserializer/string.cpp

@@ -21,6 +21,9 @@ TEST_CASE("Valid JSON strings value") {
       {"'\\u00E4'", "\xc3\xa4"},                 // ä
       {"'\\u3042'", "\xe3\x81\x82"},             // あ
       {"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"},  // 🖤
+      {"'\\uF053'", "\xef\x81\x93"},             // issue #1173
+      {"'\\uF015'", "\xef\x80\x95"},             // issue #1173
+      {"'\\uF054'", "\xef\x81\x94"},             // issue #1173
   };
   const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
 

+ 1 - 0
extras/tests/Misc/CMakeLists.txt

@@ -11,6 +11,7 @@ add_executable(MiscTests
 	TypeTraits.cpp
 	unsigned_char.cpp
 	Utf8.cpp
+	Utf16.cpp
 	version.cpp
 )
 

+ 68 - 0
extras/tests/Misc/Utf16.cpp

@@ -0,0 +1,68 @@
+// ArduinoJson - arduinojson.org
+// Copyright Benoit Blanchon 2014-2020
+// MIT License
+
+#include <ArduinoJson/Json/Utf16.hpp>
+#include <catch.hpp>
+
+using namespace ARDUINOJSON_NAMESPACE;
+
+static void testUtf16Codepoint(uint16_t codeunit, uint32_t expectedCodepoint) {
+  Utf16::Codepoint cp;
+  REQUIRE(cp.append(codeunit) == true);
+  REQUIRE(cp.value() == expectedCodepoint);
+}
+
+static void testUtf16Codepoint(uint16_t codeunit1, uint16_t codeunit2,
+                               uint32_t expectedCodepoint) {
+  Utf16::Codepoint cp;
+  REQUIRE(cp.append(codeunit1) == false);
+  REQUIRE(cp.append(codeunit2) == true);
+  REQUIRE(cp.value() == expectedCodepoint);
+}
+
+TEST_CASE("Utf16::Codepoint()") {
+  SECTION("U+0000") {
+    testUtf16Codepoint(0x0000, 0x000000);
+  }
+
+  SECTION("U+0001") {
+    testUtf16Codepoint(0x0001, 0x000001);
+  }
+
+  SECTION("U+D7FF") {
+    testUtf16Codepoint(0xD7FF, 0x00D7FF);
+  }
+
+  SECTION("U+E000") {
+    testUtf16Codepoint(0xE000, 0x00E000);
+  }
+
+  SECTION("U+FFFF") {
+    testUtf16Codepoint(0xFFFF, 0x00FFFF);
+  }
+
+  SECTION("U+010000") {
+    testUtf16Codepoint(0xD800, 0xDC00, 0x010000);
+  }
+
+  SECTION("U+010001") {
+    testUtf16Codepoint(0xD800, 0xDC01, 0x010001);
+  }
+
+  SECTION("U+0103FF") {
+    testUtf16Codepoint(0xD800, 0xDFFF, 0x0103FF);
+  }
+
+  SECTION("U+010400") {
+    testUtf16Codepoint(0xD801, 0xDC00, 0x010400);
+  }
+
+  SECTION("U+010400") {
+    testUtf16Codepoint(0xDBFF, 0xDC00, 0x10FC00);
+  }
+
+  SECTION("U+10FFFF") {
+    testUtf16Codepoint(0xDBFF, 0xDFFF, 0x10FFFF);
+  }
+}

+ 4 - 12
src/ArduinoJson/Json/JsonDeserializer.hpp

@@ -6,6 +6,7 @@
 
 #include <ArduinoJson/Deserialization/deserialize.hpp>
 #include <ArduinoJson/Json/EscapeSequence.hpp>
+#include <ArduinoJson/Json/Utf16.hpp>
 #include <ArduinoJson/Json/Utf8.hpp>
 #include <ArduinoJson/Memory/MemoryPool.hpp>
 #include <ArduinoJson/Numbers/parseNumber.hpp>
@@ -190,7 +191,7 @@ class JsonDeserializer {
   DeserializationError parseQuotedString(const char *&result) {
     StringBuilder builder = _stringStorage.startString();
 #if ARDUINOJSON_DECODE_UNICODE
-    uint16_t surrogate1 = 0;
+    Utf16::Codepoint codepoint;
 #endif
     const char stopChar = current();
 
@@ -208,20 +209,11 @@ class JsonDeserializer {
         if (c == 'u') {
 #if ARDUINOJSON_DECODE_UNICODE
           move();
-          uint32_t codepoint;
           uint16_t codeunit;
           DeserializationError err = parseHex4(codeunit);
           if (err) return err;
-          if (codeunit >= 0xDC00) {
-            codepoint =
-                uint32_t(0x10000 | ((surrogate1 << 10) | (codeunit & 0x3FF)));
-          } else if (codeunit < 0xd800) {
-            codepoint = codeunit;
-          } else {
-            surrogate1 = codeunit & 0x3FF;
-            continue;
-          }
-          Utf8::encodeCodepoint(codepoint, builder);
+          if (codepoint.append(codeunit))
+            Utf8::encodeCodepoint(codepoint.value(), builder);
           continue;
 #else
           return DeserializationError::NotSupported;

+ 49 - 0
src/ArduinoJson/Json/Utf16.hpp

@@ -0,0 +1,49 @@
+// ArduinoJson - arduinojson.org
+// Copyright Benoit Blanchon 2014-2020
+// MIT License
+
+#pragma once
+
+#include <ArduinoJson/Namespace.hpp>
+
+#include <stdint.h>  // uint16_t, uint32_t
+
+namespace ARDUINOJSON_NAMESPACE {
+
+namespace Utf16 {
+inline bool isHighSurrogate(uint16_t codeunit) {
+  return codeunit >= 0xD800 && codeunit < 0xDC00;
+}
+
+inline bool isLowSurrogate(uint16_t codeunit) {
+  return codeunit >= 0xDC00 && codeunit < 0xE000;
+}
+
+class Codepoint {
+ public:
+  bool append(uint16_t codeunit) {
+    if (isHighSurrogate(codeunit)) {
+      _highSurrogate = codeunit & 0x3FF;
+      return false;
+    }
+
+    if (isLowSurrogate(codeunit)) {
+      _codepoint =
+          uint32_t(0x10000 + ((_highSurrogate << 10) | (codeunit & 0x3FF)));
+      return true;
+    }
+
+    _codepoint = codeunit;
+    return true;
+  }
+
+  uint32_t value() const {
+    return _codepoint;
+  }
+
+ private:
+  uint16_t _highSurrogate;
+  uint32_t _codepoint;
+};
+}  // namespace Utf16
+}  // namespace ARDUINOJSON_NAMESPACE