Procházet zdrojové kódy

Decode escaped Unicode characters like \u00DE (issue #304, PR #791)

Benoit Blanchon před 7 roky
rodič
revize
7050ef675d

+ 7 - 0
CHANGELOG.md

@@ -1,6 +1,13 @@
 ArduinoJson: change log
 =======================
 
+HEAD
+----
+
+* Decode escaped Unicode characters like \u00DE (issue #304, PR #791)
+  Many thanks to Daniel Schulte (aka @trilader) who implemented this feature.
+* Add option ARDUINOJSON_DECODE_UNICODE to enable it
+
 v6.8.0-beta (2019-01-30)
 -----------
 

+ 5 - 0
src/ArduinoJson/Configuration.hpp

@@ -120,6 +120,11 @@
 #endif
 #endif
 
+// Convert unicode escape sequence (\u0123) to UTF-8
+#ifndef ARDUINOJSON_DECODE_UNICODE
+#define ARDUINOJSON_DECODE_UNICODE 0
+#endif
+
 // Control the exponentiation threshold for big numbers
 // CAUTION: cannot be more that 1e9 !!!!
 #ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD

+ 32 - 1
src/ArduinoJson/Json/JsonDeserializer.hpp

@@ -11,6 +11,7 @@
 #include "../Polyfills/type_traits.hpp"
 #include "../Variant/VariantData.hpp"
 #include "EscapeSequence.hpp"
+#include "Utf8.hpp"
 
 namespace ARDUINOJSON_NAMESPACE {
 
@@ -192,7 +193,18 @@ class JsonDeserializer {
       if (c == '\\') {
         c = current();
         if (c == '\0') return DeserializationError::IncompleteInput;
-        if (c == 'u') return DeserializationError::NotSupported;
+        if (c == 'u') {
+#if ARDUINOJSON_DECODE_UNICODE
+          uint16_t codepoint;
+          move();
+          DeserializationError err = parseCodepoint(codepoint);
+          if (err) return err;
+          Utf8::encodeCodepoint(codepoint, builder);
+          continue;
+#else
+          return DeserializationError::NotSupported;
+#endif
+        }
         // replace char
         c = EscapeSequence::unescapeChar(c);
         if (c == '\0') return DeserializationError::InvalidInput;
@@ -256,6 +268,19 @@ class JsonDeserializer {
     return DeserializationError::Ok;
   }
 
+  DeserializationError parseCodepoint(uint16_t &codepoint) {
+    codepoint = 0;
+    for (uint8_t i = 0; i < 4; ++i) {
+      char digit = current();
+      if (!digit) return DeserializationError::IncompleteInput;
+      uint8_t value = decodeHex(digit);
+      if (value > 0x0F) return DeserializationError::InvalidInput;
+      codepoint = uint16_t((codepoint << 4) | value);
+      move();
+    }
+    return DeserializationError::Ok;
+  }
+
   static inline bool isBetween(char c, char min, char max) {
     return min <= c && c <= max;
   }
@@ -269,6 +294,12 @@ class JsonDeserializer {
     return c == '\'' || c == '\"';
   }
 
+  static inline uint8_t decodeHex(char c) {
+    if (c < 'A') return uint8_t(c - '0');
+    c &= ~0x20;  // uppercase
+    return uint8_t(c - 'A' + 10);
+  }
+
   DeserializationError skipSpacesAndComments() {
     for (;;) {
       switch (current()) {

+ 26 - 0
src/ArduinoJson/Json/Utf8.hpp

@@ -0,0 +1,26 @@
+// ArduinoJson - arduinojson.org
+// Copyright Benoit Blanchon 2014-2018
+// MIT License
+
+#pragma once
+
+namespace ARDUINOJSON_NAMESPACE {
+
+namespace Utf8 {
+template <typename TStringBuilder>
+inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
+  if (codepoint < 0x80) {
+    str.append(char(codepoint));
+    return;
+  }
+
+  if (codepoint >= 0x00000800) {
+    str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
+    str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
+  } else {
+    str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
+  }
+  str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
+}
+}  // namespace Utf8
+}  // namespace ARDUINOJSON_NAMESPACE

+ 2 - 1
src/ArduinoJson/Namespace.hpp

@@ -19,4 +19,5 @@
 #define ARDUINOJSON_NAMESPACE                                                  \
   ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR,                  \
                       ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \
-                      _, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE)
+                      _, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE,    \
+                      ARDUINOJSON_DECODE_UNICODE)

+ 1 - 0
test/JsonDeserializer/CMakeLists.txt

@@ -9,6 +9,7 @@ add_executable(JsonDeserializerTests
 	deserializeJsonObject.cpp
 	deserializeJsonObjectStatic.cpp
 	deserializeJsonValue.cpp
+	deserializeJsonString.cpp
 	input_types.cpp
 	nestingLimit.cpp
 )

+ 6 - 0
test/JsonDeserializer/deserializeJsonObject.cpp

@@ -272,6 +272,12 @@ TEST_CASE("deserialize JSON object") {
 
       REQUIRE(err == DeserializationError::Ok);
     }
+
+    SECTION("Repeated key") {
+      DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
+
+      REQUIRE(err == DeserializationError::Ok);
+    }
   }
 
   SECTION("Block comments") {

+ 66 - 0
test/JsonDeserializer/deserializeJsonString.cpp

@@ -0,0 +1,66 @@
+// ArduinoJson - arduinojson.org
+// Copyright Benoit Blanchon 2014-2018
+// MIT License
+
+#define ARDUINOJSON_DECODE_UNICODE 1
+#include <ArduinoJson.h>
+#include <catch.hpp>
+
+using namespace Catch::Matchers;
+
+TEST_CASE("Valid JSON strings value") {
+  struct TestCase {
+    const char* input;
+    const char* expectedOutput;
+  };
+
+  TestCase testCases[] = {
+      {"\"hello world\"", "hello world"},
+      {"\'hello world\'", "hello world"},
+      {"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
+      {"'\\u0041'", "A"},
+      {"'\\u00e4'", "\xc3\xa4"},      // ä
+      {"'\\u00E4'", "\xc3\xa4"},      // ä
+      {"'\\u3042'", "\xe3\x81\x82"},  // あ
+
+  };
+  const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
+
+  DynamicJsonDocument doc(4096);
+
+  for (size_t i = 0; i < testCount; i++) {
+    const TestCase& testCase = testCases[i];
+    CAPTURE(testCase.input);
+    DeserializationError err = deserializeJson(doc, testCase.input);
+    REQUIRE(err == DeserializationError::Ok);
+    REQUIRE(doc.as<std::string>() == testCase.expectedOutput);
+  }
+}
+
+TEST_CASE("Truncated JSON string") {
+  const char* testCases[] = {"\"hello", "\'hello", "'\\u", "'\\u00", "'\\u000"};
+  const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
+
+  DynamicJsonDocument doc(4096);
+
+  for (size_t i = 0; i < testCount; i++) {
+    const char* input = testCases[i];
+    CAPTURE(input);
+    REQUIRE(deserializeJson(doc, input) ==
+            DeserializationError::IncompleteInput);
+  }
+}
+
+TEST_CASE("Invalid JSON string") {
+  const char* testCases[] = {"'\\u'",     "'\\u000g'", "'\\u000'",
+                             "'\\u000G'", "'\\u000/'", "\\x1234"};
+  const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
+
+  DynamicJsonDocument doc(4096);
+
+  for (size_t i = 0; i < testCount; i++) {
+    const char* input = testCases[i];
+    CAPTURE(input);
+    REQUIRE(deserializeJson(doc, input) == DeserializationError::InvalidInput);
+  }
+}

+ 101 - 127
test/JsonDeserializer/deserializeJsonValue.cpp

@@ -15,161 +15,133 @@ using ARDUINOJSON_NAMESPACE::isnan;
 TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
   DynamicJsonDocument doc(4096);
 
-  SECTION("null char*") {
-    DeserializationError err = deserializeJson(doc, static_cast<char*>(0));
+  SECTION("Edge cases") {
+    SECTION("null char*") {
+      DeserializationError err = deserializeJson(doc, static_cast<char*>(0));
 
-    REQUIRE(err != DeserializationError::Ok);
-  }
-
-  SECTION("null const char*") {
-    DeserializationError err =
-        deserializeJson(doc, static_cast<const char*>(0));
-
-    REQUIRE(err != DeserializationError::Ok);
-  }
-
-  SECTION("Integer") {
-    DeserializationError err = deserializeJson(doc, "-42");
-
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<int>());
-    REQUIRE_FALSE(doc.is<bool>());
-    REQUIRE(doc.as<int>() == -42);
-  }
-
-  SECTION("Double") {
-    DeserializationError err = deserializeJson(doc, "-1.23e+4");
-
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE_FALSE(doc.is<int>());
-    REQUIRE(doc.is<double>());
-    REQUIRE(doc.as<double>() == Approx(-1.23e+4));
-  }
-
-  SECTION("Double quoted string") {
-    DeserializationError err = deserializeJson(doc, "\"hello world\"");
-
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<char*>());
-    REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
-  }
+      REQUIRE(err != DeserializationError::Ok);
+    }
 
-  SECTION("Single quoted string") {
-    DeserializationError err = deserializeJson(doc, "\'hello world\'");
+    SECTION("null const char*") {
+      DeserializationError err =
+          deserializeJson(doc, static_cast<const char*>(0));
 
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<char*>());
-    REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
-  }
-
-  SECTION("Escape sequences") {
-    DeserializationError err =
-        deserializeJson(doc, "\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"");
+      REQUIRE(err != DeserializationError::Ok);
+    }
 
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.as<std::string>() == "1\"2\\3/4\b5\f6\n7\r8\t9");
-  }
+    SECTION("Empty input") {
+      DeserializationError err = deserializeJson(doc, "");
 
-  SECTION("UTF-16 surrogate") {
-    DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
+      REQUIRE(err == DeserializationError::IncompleteInput);
+    }
 
-    REQUIRE(err == DeserializationError::NotSupported);
-  }
+    SECTION("issue #628") {
+      DeserializationError err = deserializeJson(doc, "null");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<float>() == false);
+    }
 
-  SECTION("True") {
-    DeserializationError err = deserializeJson(doc, "true");
+    SECTION("Garbage") {
+      DeserializationError err = deserializeJson(doc, "%*$£¤");
 
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<bool>());
-    REQUIRE(doc.as<bool>() == true);
+      REQUIRE(err == DeserializationError::InvalidInput);
+    }
   }
 
-  SECTION("False") {
-    DeserializationError err = deserializeJson(doc, "false");
+  SECTION("Integers") {
+    SECTION("0") {
+      DeserializationError err = deserializeJson(doc, "0");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<int>() == true);
+      REQUIRE(doc.as<int>() == 0);
+      REQUIRE(doc.as<std::string>() == "0");  // issue #808
+    }
 
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<bool>());
-    REQUIRE(doc.as<bool>() == false);
-  }
+    SECTION("Negative") {
+      DeserializationError err = deserializeJson(doc, "-42");
 
-  SECTION("0") {
-    DeserializationError err = deserializeJson(doc, "0");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<int>() == true);
-    REQUIRE(doc.as<int>() == 0);
-    REQUIRE(doc.as<std::string>() == "0");  // issue #808
-  }
-
-  SECTION("NaN") {
-    DeserializationError err = deserializeJson(doc, "NaN");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<float>() == true);
-    REQUIRE(my::isnan(doc.as<float>()));
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<int>());
+      REQUIRE_FALSE(doc.is<bool>());
+      REQUIRE(doc.as<int>() == -42);
+    }
   }
 
-  SECTION("Infinity") {
-    DeserializationError err = deserializeJson(doc, "Infinity");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<float>() == true);
-    REQUIRE(my::isinf(doc.as<float>()));
-  }
+  SECTION("Floats") {
+    SECTION("Double") {
+      DeserializationError err = deserializeJson(doc, "-1.23e+4");
 
-  SECTION("+Infinity") {
-    DeserializationError err = deserializeJson(doc, "+Infinity");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<float>() == true);
-    REQUIRE(my::isinf(doc.as<float>()));
-  }
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE_FALSE(doc.is<int>());
+      REQUIRE(doc.is<double>());
+      REQUIRE(doc.as<double>() == Approx(-1.23e+4));
+    }
 
-  SECTION("-Infinity") {
-    DeserializationError err = deserializeJson(doc, "-Infinity");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<float>() == true);
-    REQUIRE(my::isinf(doc.as<float>()));
-  }
+    SECTION("NaN") {
+      DeserializationError err = deserializeJson(doc, "NaN");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<float>() == true);
+      REQUIRE(my::isnan(doc.as<float>()));
+    }
 
-  SECTION("issue #628") {
-    DeserializationError err = deserializeJson(doc, "null");
-    REQUIRE(err == DeserializationError::Ok);
-    REQUIRE(doc.is<float>() == false);
-  }
+    SECTION("Infinity") {
+      DeserializationError err = deserializeJson(doc, "Infinity");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<float>() == true);
+      REQUIRE(my::isinf(doc.as<float>()));
+    }
 
-  SECTION("Should clear the JsonVariant") {
-    deserializeJson(doc, "[1,2,3]");
-    deserializeJson(doc, "{}");
+    SECTION("+Infinity") {
+      DeserializationError err = deserializeJson(doc, "+Infinity");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<float>() == true);
+      REQUIRE(my::isinf(doc.as<float>()));
+    }
 
-    REQUIRE(doc.is<JsonObject>());
-    REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0));
+    SECTION("-Infinity") {
+      DeserializationError err = deserializeJson(doc, "-Infinity");
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<float>() == true);
+      REQUIRE(my::isinf(doc.as<float>()));
+    }
   }
 
-  SECTION("Empty input") {
-    DeserializationError err = deserializeJson(doc, "");
+  SECTION("Booleans") {
+    SECTION("True") {
+      DeserializationError err = deserializeJson(doc, "true");
 
-    REQUIRE(err == DeserializationError::IncompleteInput);
-  }
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<bool>());
+      REQUIRE(doc.as<bool>() == true);
+    }
 
-  SECTION("Just a trailing comment") {
-    DeserializationError err = deserializeJson(doc, "// comment");
+    SECTION("False") {
+      DeserializationError err = deserializeJson(doc, "false");
 
-    REQUIRE(err == DeserializationError::IncompleteInput);
+      REQUIRE(err == DeserializationError::Ok);
+      REQUIRE(doc.is<bool>());
+      REQUIRE(doc.as<bool>() == false);
+    }
   }
 
-  SECTION("Just a block comment") {
-    DeserializationError err = deserializeJson(doc, "/*comment*/");
+  SECTION("Comments") {
+    SECTION("Just a trailing comment") {
+      DeserializationError err = deserializeJson(doc, "// comment");
 
-    REQUIRE(err == DeserializationError::IncompleteInput);
-  }
+      REQUIRE(err == DeserializationError::IncompleteInput);
+    }
 
-  SECTION("Just a slash") {
-    DeserializationError err = deserializeJson(doc, "/");
+    SECTION("Just a block comment") {
+      DeserializationError err = deserializeJson(doc, "/*comment*/");
 
-    REQUIRE(err == DeserializationError::InvalidInput);
-  }
+      REQUIRE(err == DeserializationError::IncompleteInput);
+    }
 
-  SECTION("Garbage") {
-    DeserializationError err = deserializeJson(doc, "%*$£¤");
+    SECTION("Just a slash") {
+      DeserializationError err = deserializeJson(doc, "/");
 
-    REQUIRE(err == DeserializationError::InvalidInput);
+      REQUIRE(err == DeserializationError::InvalidInput);
+    }
   }
 
   SECTION("Premature null-terminator") {
@@ -224,9 +196,11 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
     }
   }
 
-  SECTION("Repeated object key") {
-    DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
+  SECTION("Should clear the JsonVariant") {
+    deserializeJson(doc, "[1,2,3]");
+    deserializeJson(doc, "{}");
 
-    REQUIRE(err == DeserializationError::Ok);
+    REQUIRE(doc.is<JsonObject>());
+    REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0));
   }
 }

+ 2 - 0
test/MixedConfiguration/CMakeLists.txt

@@ -6,6 +6,8 @@
 set(CMAKE_CXX_STANDARD 11)
 
 add_executable(MixedConfigurationTests
+	decode_unicode_0.cpp
+	decode_unicode_1.cpp
 	use_double_0.cpp
 	use_double_1.cpp
 	use_long_long_0.cpp

+ 11 - 0
test/MixedConfiguration/decode_unicode_0.cpp

@@ -0,0 +1,11 @@
+#define ARDUINOJSON_DECODE_UNICODE 0
+#include <ArduinoJson.h>
+
+#include <catch.hpp>
+
+TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 0") {
+  DynamicJsonDocument doc(2048);
+  DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
+
+  REQUIRE(err == DeserializationError::NotSupported);
+}

+ 11 - 0
test/MixedConfiguration/decode_unicode_1.cpp

@@ -0,0 +1,11 @@
+#define ARDUINOJSON_DECODE_UNICODE 1
+#include <ArduinoJson.h>
+
+#include <catch.hpp>
+
+TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 1") {
+  DynamicJsonDocument doc(2048);
+  DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
+
+  REQUIRE(err == DeserializationError::Ok);
+}