Просмотр исходного кода

Added referenced performance of UTF-8 validation

git-svn-id: https://rapidjson.googlecode.com/svn/trunk@36 c5894555-1306-4e8d-425f-1f6f381ee07c
miloyip@gmail.com 14 лет назад
Родитель
Сommit
66754fc5b0
3 измененных файлов с 66 добавлено и 4 удалено
  1. 61 0
      test/perftest/misctest.cpp
  2. 1 0
      test/perftest/perftest.h
  3. 4 4
      test/perftest/rapidjsontest.cpp

+ 61 - 0
test/perftest/misctest.cpp

@@ -0,0 +1,61 @@
+#include "perftest.h"
+
+#if TEST_MISC
+
+class Misc : public PerfTest {
+};
+
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 12
+
+static const unsigned char utf8d[] = {
+	// The first part of the table maps bytes to character classes that
+	// to reduce the size of the transition table and create bitmasks.
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+	10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+	// The second part is a transition table that maps a combination
+	// of a state of the automaton and a character class to a state.
+	0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+	12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+	12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+	12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+	12,36,12,12,12,12,12,12,12,12,12,12, 
+};
+
+static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
+	unsigned type = utf8d[byte];
+
+	*codep = (*state != UTF8_ACCEPT) ?
+		(byte & 0x3fu) | (*codep << 6) :
+	(0xff >> type) & (byte);
+
+	*state = utf8d[256 + *state + type];
+	return *state;
+}
+
+static bool IsUTF8(unsigned char* s) {
+	unsigned codepoint, state = 0;
+
+	while (*s)
+		decode(&state, &codepoint, *s++);
+
+	return state == UTF8_ACCEPT;
+}
+
+TEST_F(Misc, Hoehrmann_IsUTF8) {
+	for (int i = 0; i < kTrialCount; i++) {
+		EXPECT_TRUE(IsUTF8((unsigned char*)json_));
+	}
+}
+
+#endif // TEST_ULTRAJSON

+ 1 - 0
test/perftest/perftest.h

@@ -6,6 +6,7 @@
 #define TEST_YAJL		0
 #define TEST_ULTRAJSON  0
 #define TEST_PLATFORM   0
+#define TEST_MISC		1
 
 #if TEST_RAPIDJSON
 //#define RAPIDJSON_SSE2

+ 4 - 4
test/perftest/rapidjsontest.cpp

@@ -233,14 +233,14 @@ TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) {
 }
 
 TEST_F(RapidJson, UTF8_Validate) {
-	StringBuffer os(0, length_ + 1);
+	NullStream os;
 
 	for (int i = 0; i < kTrialCount; i++) {
 		StringStream is(json_);
-		os.Clear();
+		bool result = true;
 		while (is.Peek() != '\0')
-			UTF8<>::Validate(is, os);
-		EXPECT_EQ(length_, os.GetSize());
+			result &= UTF8<>::Validate(is, os);
+		EXPECT_TRUE(result);
 	}
 }