diff --git a/CHANGELOG.md b/CHANGELOG.md
index 753de437..57f2582d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,28 @@
+v1.5 (15 April 2017)
++ added index flag to the filters, to help enable faster lookup during classification (win8 and above)
++ added app container loopback traffic permission (win8 and above)
++ added "allow listen connections for all" config
++ added ctrl+f4 hotkey to hide main window (request)
++ added loopback indication for dropped packets log
+- copy real path instead display path on copy command in main window listview
+- do not show dropped packets notifications when filters are not installed
+- if boot-time filters enabled then apply system rules for boot-time too
+- custom rules for apps does not saved sometimes
+- removed running without admin rights feature
+- changed notification about errors logic
+- cosmetic fixes for tooltips
+- fixed disabling windows firewall on some systems
+- fixed settings tabstop doesn't work
+- fixed incorrect listview icons for some apps
+- fixed process list some apps have no icons
+- fixed possible duplicate filters
+- fixed purge unused apps
+- stability improvements
+- updated translations
+- updated system rules
+- updated pugixml
+- fixed bugs
+
v1.4.6 (5 April 2017)
+ added write error logs into a file feature
- updated translations
diff --git a/bin/History.txt b/bin/History.txt
index 59234204..6ef44e82 100644
--- a/bin/History.txt
+++ b/bin/History.txt
@@ -1,3 +1,28 @@
+v1.5 (15 April 2017)
++ added index flag to the filters, to help enable faster lookup during classification (win8 and above)
++ added app container loopback traffic permission (win8 and above)
++ added "allow listen connections for all" config
++ added ctrl+f4 hotkey to hide main window (request)
++ added loopback indication for dropped packets log
+- copy real path instead display path on copy command in main window listview
+- do not show dropped packets notifications when filters are not installed
+- if boot-time filters enabled then apply system rules for boot-time too
+- custom rules for apps does not saved sometimes
+- removed running without admin rights feature
+- changed notification about errors logic
+- cosmetic fixes for tooltips
+- fixed disabling windows firewall on some systems
+- fixed settings tabstop doesn't work
+- fixed incorrect listview icons for some apps
+- fixed process list some apps have no icons
+- fixed possible duplicate filters
+- fixed purge unused apps
+- stability improvements
+- updated translations
+- updated system rules
+- updated pugixml
+- fixed bugs
+
v1.4.6 (5 April 2017)
+ added write error logs into a file feature
- fixed process list does not recognize pico applications on win10
diff --git a/bin/i18n/!example.txt b/bin/i18n/!example.txt
index e313150d..45a09bc5 100644
Binary files a/bin/i18n/!example.txt and b/bin/i18n/!example.txt differ
diff --git a/bin/i18n/Brazilian Portuguese.ini b/bin/i18n/Brazilian Portuguese.ini
index dcf83cd6..186dab58 100644
Binary files a/bin/i18n/Brazilian Portuguese.ini and b/bin/i18n/Brazilian Portuguese.ini differ
diff --git a/bin/i18n/Farsi.ini b/bin/i18n/Farsi.ini
index 5889ba9f..2e7ad4ce 100644
Binary files a/bin/i18n/Farsi.ini and b/bin/i18n/Farsi.ini differ
diff --git a/bin/i18n/French.ini b/bin/i18n/French.ini
index 99fc642e..4ea2ca0b 100644
Binary files a/bin/i18n/French.ini and b/bin/i18n/French.ini differ
diff --git a/bin/i18n/Korean.ini b/bin/i18n/Korean.ini
index 236c45de..7b66bd0a 100644
Binary files a/bin/i18n/Korean.ini and b/bin/i18n/Korean.ini differ
diff --git a/bin/i18n/Polish.ini b/bin/i18n/Polish.ini
index 19882761..507c8a55 100644
Binary files a/bin/i18n/Polish.ini and b/bin/i18n/Polish.ini differ
diff --git a/bin/i18n/Russian.ini b/bin/i18n/Russian.ini
index 5e6be270..d524018e 100644
Binary files a/bin/i18n/Russian.ini and b/bin/i18n/Russian.ini differ
diff --git a/bin/i18n/Simplified Chinese.ini b/bin/i18n/Simplified Chinese.ini
index 29a3ec23..b6ad796b 100644
Binary files a/bin/i18n/Simplified Chinese.ini and b/bin/i18n/Simplified Chinese.ini differ
diff --git a/bin/i18n/Spanish.ini b/bin/i18n/Spanish.ini
index 7fefd846..eb41c1cd 100644
Binary files a/bin/i18n/Spanish.ini and b/bin/i18n/Spanish.ini differ
diff --git a/bin/rules_custom.xml b/bin/rules_custom.xml
index e7763b71..913cfa57 100644
Binary files a/bin/rules_custom.xml and b/bin/rules_custom.xml differ
diff --git a/bin/rules_system.xml b/bin/rules_system.xml
index d2ff08ce..448dd4a3 100644
Binary files a/bin/rules_system.xml and b/bin/rules_system.xml differ
diff --git a/simplewall.vcxproj b/simplewall.vcxproj
index fd99f397..e7bd9aa8 100644
--- a/simplewall.vcxproj
+++ b/simplewall.vcxproj
@@ -29,28 +29,28 @@
Application
true
- v140_xp
+ v140
Unicode
true
Application
false
- v140_xp
+ v140
true
Unicode
Application
true
- v140_xp
+ v140
Unicode
true
Application
false
- v140_xp
+ v140
true
Unicode
@@ -121,12 +121,12 @@
Neither
true
FastCall
- MultiThreadedDebugDLL
+ MultiThreadedDebug
false
Sync
false
true
- _UNICODE;UNICODE;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
+ _UNICODE;UNICODE;_APP_NO_GUEST;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
true
@@ -160,12 +160,12 @@
Neither
true
FastCall
- MultiThreadedDebugDLL
+ MultiThreadedDebug
false
Sync
false
true
- _UNICODE;UNICODE;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
+ _UNICODE;UNICODE;_APP_NO_GUEST;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
true
@@ -210,7 +210,7 @@
None
- _UNICODE;UNICODE;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
+ _UNICODE;UNICODE;_APP_NO_GUEST;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
true
@@ -254,7 +254,7 @@
None
- _UNICODE;UNICODE;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
+ _UNICODE;UNICODE;_APP_NO_GUEST;_APP_HAVE_AUTORUN;_APP_HAVE_SKIPUAC;_APP_HAVE_TRAY;%(PreprocessorDefinitions)
false
diff --git a/src/include/pugixml/pugiconfig.hpp b/src/include/pugixml/pugiconfig.hpp
index b0794d8c..1382718e 100644
--- a/src/include/pugixml/pugiconfig.hpp
+++ b/src/include/pugixml/pugiconfig.hpp
@@ -18,7 +18,7 @@
#define PUGIXML_WCHAR_MODE
// Uncomment this to enable compact mode
-#define PUGIXML_COMPACT
+// #define PUGIXML_COMPACT
// Uncomment this to disable XPath
#define PUGIXML_NO_XPATH
diff --git a/src/include/pugixml/pugixml.cpp b/src/include/pugixml/pugixml.cpp
index 5b77a271..cac51a53 100644
--- a/src/include/pugixml/pugixml.cpp
+++ b/src/include/pugixml/pugixml.cpp
@@ -1,7 +1,7 @@
/**
- * pugixml parser - version 1.6
+ * pugixml parser - version 1.8
* --------------------------------------------------------
- * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#ifdef PUGIXML_WCHAR_MODE
# include
@@ -53,7 +54,7 @@
#endif
#ifdef __INTEL_COMPILER
-# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 177) // function was declared but never referenced
# pragma warning(disable: 279) // controlling expression is constant
# pragma warning(disable: 1478 1786) // function was declared "deprecated"
# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
@@ -81,7 +82,7 @@
#elif defined(__GNUC__)
# define PUGI__NO_INLINE __attribute__((noinline))
#else
-# define PUGI__NO_INLINE
+# define PUGI__NO_INLINE
#endif
// Branch weight controls
@@ -105,6 +106,14 @@
#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
using std::memcpy;
using std::memmove;
+using std::memset;
+#endif
+
+// Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
+#if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
+# define LLONG_MAX 9223372036854775807LL
+# define LLONG_MIN (-LLONG_MAX-1)
+# define ULLONG_MAX (2ULL*LLONG_MAX+1)
#endif
// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
@@ -130,19 +139,19 @@ using std::memmove;
#endif
// uintptr_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-# include
-#else
+#if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
+namespace pugi
+{
# ifndef _UINTPTR_T_DEFINED
-// No native uintptr_t in MSVC6 and in some WinCE versions
-typedef size_t uintptr_t;
-#define _UINTPTR_T_DEFINED
+ typedef size_t uintptr_t;
# endif
-PUGI__NS_BEGIN
+
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
-PUGI__NS_END
+}
+#else
+# include
#endif
// Memory allocation
@@ -204,7 +213,7 @@ PUGI__NS_BEGIN
for (size_t i = 0; i < count; ++i)
if (lhs[i] != rhs[i])
return false;
-
+
return lhs[count] == 0;
}
@@ -221,63 +230,206 @@ PUGI__NS_BEGIN
return static_cast(end - s);
#endif
}
-
-#ifdef PUGIXML_WCHAR_MODE
- // Convert string to wide string, assuming all symbols are ASCII
- PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
- {
- for (const char* i = source; *i; ++i) *dest++ = *i;
- *dest = 0;
- }
-#endif
PUGI__NS_END
-#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
-// auto_ptr-like buffer holder for exception recovery
+// auto_ptr-like object for exception recovery
PUGI__NS_BEGIN
- struct buffer_holder
+ template struct auto_deleter
{
- void* data;
- void (*deleter)(void*);
+ typedef void (*D)(T*);
- buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
+ T* data;
+ D deleter;
+
+ auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
{
}
- ~buffer_holder()
+ ~auto_deleter()
{
if (data) deleter(data);
}
- void* release()
+ T* release()
{
- void* result = data;
+ T* result = data;
data = 0;
return result;
}
};
PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+ class compact_hash_table
+ {
+ public:
+ compact_hash_table(): _items(0), _capacity(0), _count(0)
+ {
+ }
+
+ void clear()
+ {
+ if (_items)
+ {
+ xml_memory::deallocate(_items);
+ _items = 0;
+ _capacity = 0;
+ _count = 0;
+ }
+ }
+
+ void** find(const void* key)
+ {
+ assert(key);
+
+ if (_capacity == 0) return 0;
+
+ size_t hashmod = _capacity - 1;
+ size_t bucket = hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe)
+ {
+ item_t& probe_item = _items[bucket];
+
+ if (probe_item.key == key)
+ return &probe_item.value;
+
+ if (probe_item.key == 0)
+ return 0;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(false && "Hash table is full");
+ return 0;
+ }
+
+ void** insert(const void* key)
+ {
+ assert(key);
+ assert(_capacity != 0 && _count < _capacity - _capacity / 4);
+
+ size_t hashmod = _capacity - 1;
+ size_t bucket = hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe)
+ {
+ item_t& probe_item = _items[bucket];
+
+ if (probe_item.key == 0)
+ {
+ probe_item.key = key;
+ _count++;
+ return &probe_item.value;
+ }
+
+ if (probe_item.key == key)
+ return &probe_item.value;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(false && "Hash table is full");
+ return 0;
+ }
+
+ bool reserve()
+ {
+ if (_count + 16 >= _capacity - _capacity / 4)
+ return rehash();
+
+ return true;
+ }
+
+ private:
+ struct item_t
+ {
+ const void* key;
+ void* value;
+ };
+
+ item_t* _items;
+ size_t _capacity;
+
+ size_t _count;
+
+ bool rehash();
+
+ static unsigned int hash(const void* key)
+ {
+ unsigned int h = static_cast(reinterpret_cast(key));
+
+ // MurmurHash3 32-bit finalizer
+ h ^= h >> 16;
+ h *= 0x85ebca6bu;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35u;
+ h ^= h >> 16;
+
+ return h;
+ }
+ };
+
+ PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
+ {
+ compact_hash_table rt;
+ rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
+ rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity));
+
+ if (!rt._items)
+ return false;
+
+ memset(rt._items, 0, sizeof(item_t) * rt._capacity);
+
+ for (size_t i = 0; i < _capacity; ++i)
+ if (_items[i].key)
+ *rt.insert(_items[i].key) = _items[i].value;
+
+ if (_items)
+ xml_memory::deallocate(_items);
+
+ _capacity = rt._capacity;
+ _items = rt._items;
+
+ assert(_count == rt._count);
+
+ return true;
+ }
+
+PUGI__NS_END
#endif
PUGI__NS_BEGIN
- static const size_t xml_memory_page_size =
- #ifdef PUGIXML_MEMORY_PAGE_SIZE
- PUGIXML_MEMORY_PAGE_SIZE
- #else
- 32768
- #endif
- ;
+#ifdef PUGIXML_COMPACT
+ static const uintptr_t xml_memory_block_alignment = 4;
+#else
+ static const uintptr_t xml_memory_block_alignment = sizeof(void*);
+#endif
- static const uintptr_t xml_memory_page_alignment = 64;
- static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
- static const uintptr_t xml_memory_page_contents_shared_mask = 32;
- static const uintptr_t xml_memory_page_name_allocated_mask = 16;
- static const uintptr_t xml_memory_page_value_allocated_mask = 8;
- static const uintptr_t xml_memory_page_type_mask = 7;
+ // extra metadata bits
+ static const uintptr_t xml_memory_page_contents_shared_mask = 64;
+ static const uintptr_t xml_memory_page_name_allocated_mask = 32;
+ static const uintptr_t xml_memory_page_value_allocated_mask = 16;
+ static const uintptr_t xml_memory_page_type_mask = 15;
+
+ // combined masks for string uniqueness
static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
- #define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1)
+#ifdef PUGIXML_COMPACT
+ #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
+ #define PUGI__GETPAGE_IMPL(header) (header).get_page()
+#else
+ #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast(object) - reinterpret_cast(page)) << 8) | (flags))
+ // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ #define PUGI__GETPAGE_IMPL(header) static_cast(const_cast(static_cast(reinterpret_cast(&header) - (header >> 8))))
+#endif
+
+ #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
+ #define PUGI__NODETYPE(n) static_cast((n)->header & impl::xml_memory_page_type_mask)
struct xml_allocator;
@@ -293,6 +445,12 @@ PUGI__NS_BEGIN
result->busy_size = 0;
result->freed_size = 0;
+ #ifdef PUGIXML_COMPACT
+ result->compact_string_base = 0;
+ result->compact_shared_parent = 0;
+ result->compact_page_marker = 0;
+ #endif
+
return result;
}
@@ -303,8 +461,22 @@ PUGI__NS_BEGIN
size_t busy_size;
size_t freed_size;
+
+ #ifdef PUGIXML_COMPACT
+ char_t* compact_string_base;
+ void* compact_shared_parent;
+ uint32_t* compact_page_marker;
+ #endif
};
+ static const size_t xml_memory_page_size =
+ #ifdef PUGIXML_MEMORY_PAGE_SIZE
+ (PUGIXML_MEMORY_PAGE_SIZE)
+ #else
+ 32768
+ #endif
+ - sizeof(xml_memory_page);
+
struct xml_memory_string_header
{
uint16_t page_offset; // offset from page->data
@@ -315,6 +487,9 @@ PUGI__NS_BEGIN
{
xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
{
+ #ifdef PUGIXML_COMPACT
+ _hash = 0;
+ #endif
}
xml_memory_page* allocate_page(size_t data_size)
@@ -322,37 +497,29 @@ PUGI__NS_BEGIN
size_t size = sizeof(xml_memory_page) + data_size;
// allocate block with some alignment, leaving memory for worst-case padding
- void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+ void* memory = xml_memory::allocate(size);
if (!memory) return 0;
- // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
- char* page_memory = reinterpret_cast((reinterpret_cast(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
-
// prepare page structure
- xml_memory_page* page = xml_memory_page::construct(page_memory);
+ xml_memory_page* page = xml_memory_page::construct(memory);
assert(page);
page->allocator = _root->allocator;
- // record the offset for freeing the memory block
- assert(page_memory > memory && page_memory - static_cast(memory) <= 127);
- page_memory[-1] = static_cast(page_memory - static_cast(memory));
-
return page;
}
static void deallocate_page(xml_memory_page* page)
{
- char* page_memory = reinterpret_cast(page);
-
- xml_memory::deallocate(page_memory - page_memory[-1]);
+ xml_memory::deallocate(page);
}
void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
void* allocate_memory(size_t size, xml_memory_page*& out_page)
{
- if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+ if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
+ return allocate_memory_oob(size, out_page);
void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size;
@@ -363,6 +530,44 @@ PUGI__NS_BEGIN
return buf;
}
+ #ifdef PUGIXML_COMPACT
+ void* allocate_object(size_t size, xml_memory_page*& out_page)
+ {
+ void* result = allocate_memory(size + sizeof(uint32_t), out_page);
+ if (!result) return 0;
+
+ // adjust for marker
+ ptrdiff_t offset = static_cast(result) - reinterpret_cast(out_page->compact_page_marker);
+
+ if (PUGI__UNLIKELY(static_cast(offset) >= 256 * xml_memory_block_alignment))
+ {
+ // insert new marker
+ uint32_t* marker = static_cast(result);
+
+ *marker = static_cast(reinterpret_cast(marker) - reinterpret_cast(out_page));
+ out_page->compact_page_marker = marker;
+
+ // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
+ // this will make sure deallocate_memory correctly tracks the size
+ out_page->freed_size += sizeof(uint32_t);
+
+ return marker + 1;
+ }
+ else
+ {
+ // roll back uint32_t part
+ _busy_size -= sizeof(uint32_t);
+
+ return result;
+ }
+ }
+ #else
+ void* allocate_object(size_t size, xml_memory_page*& out_page)
+ {
+ return allocate_memory(size, out_page);
+ }
+ #endif
+
void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
{
if (page == _root) page->busy_size = _busy_size;
@@ -380,7 +585,16 @@ PUGI__NS_BEGIN
assert(_root == page);
// top page freed, just reset sizes
- page->busy_size = page->freed_size = 0;
+ page->busy_size = 0;
+ page->freed_size = 0;
+
+ #ifdef PUGIXML_COMPACT
+ // reset compact state to maximize efficiency
+ page->compact_string_base = 0;
+ page->compact_shared_parent = 0;
+ page->compact_page_marker = 0;
+ #endif
+
_busy_size = 0;
}
else
@@ -400,15 +614,15 @@ PUGI__NS_BEGIN
char_t* allocate_string(size_t length)
{
- static const size_t max_encoded_offset = (1 << 16) * sizeof(void*);
+ static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
// allocate memory for string and header block
size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
-
- // round size up to pointer alignment boundary
- size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+
+ // round size up to block alignment boundary
+ size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
xml_memory_page* page;
xml_memory_string_header* header = static_cast(allocate_memory(full_size, page));
@@ -418,14 +632,14 @@ PUGI__NS_BEGIN
// setup header
ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page);
- assert(page_offset % sizeof(void*) == 0);
+ assert(page_offset % xml_memory_block_alignment == 0);
assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset);
- header->page_offset = static_cast(static_cast(page_offset) / sizeof(void*));
+ header->page_offset = static_cast(static_cast(page_offset) / xml_memory_block_alignment);
// full_size == 0 for large strings that occupy the whole page
- assert(full_size % sizeof(void*) == 0);
+ assert(full_size % xml_memory_block_alignment == 0);
assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
- header->full_size = static_cast(full_size < max_encoded_offset ? full_size / sizeof(void*) : 0);
+ header->full_size = static_cast(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
// round-trip through void* to avoid 'cast increases required alignment of target type' warning
// header is guaranteed a pointer-sized alignment, which should be enough for char_t
@@ -441,103 +655,459 @@ PUGI__NS_BEGIN
xml_memory_string_header* header = static_cast(static_cast(string)) - 1;
assert(header);
- // deallocate
- size_t page_offset = sizeof(xml_memory_page) + header->page_offset * sizeof(void*);
- xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset));
+ // deallocate
+ size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
+ xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset));
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ bool reserve()
+ {
+ #ifdef PUGIXML_COMPACT
+ return _hash->reserve();
+ #else
+ return true;
+ #endif
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+
+ #ifdef PUGIXML_COMPACT
+ compact_hash_table* _hash;
+ #endif
+ };
+
+ PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+ {
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
+
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold)
+ {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ }
+ else
+ {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+
+ page->busy_size = size;
+ }
+
+ return reinterpret_cast(page) + sizeof(xml_memory_page);
+ }
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+ static const uintptr_t compact_alignment_log2 = 2;
+ static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
+
+ class compact_header
+ {
+ public:
+ compact_header(xml_memory_page* page, unsigned int flags)
+ {
+ PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
+
+ ptrdiff_t offset = (reinterpret_cast(this) - reinterpret_cast(page->compact_page_marker));
+ assert(offset % compact_alignment == 0 && static_cast(offset) < 256 * compact_alignment);
+
+ _page = static_cast(offset >> compact_alignment_log2);
+ _flags = static_cast(flags);
+ }
+
+ void operator&=(uintptr_t mod)
+ {
+ _flags &= static_cast(mod);
+ }
+
+ void operator|=(uintptr_t mod)
+ {
+ _flags |= static_cast(mod);
+ }
+
+ uintptr_t operator&(uintptr_t mod) const
+ {
+ return _flags & mod;
+ }
+
+ xml_memory_page* get_page() const
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2);
+ const char* page = page_marker - *reinterpret_cast(static_cast(page_marker));
+
+ return const_cast(reinterpret_cast(static_cast(page)));
+ }
+
+ private:
+ unsigned char _page;
+ unsigned char _flags;
+ };
+
+ PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
+ {
+ const compact_header* header = reinterpret_cast(static_cast(object) - header_offset);
+
+ return header->get_page();
+ }
+
+ template PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
+ {
+ return static_cast(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
+ }
+
+ template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
+ {
+ *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
+ }
+
+ template class compact_pointer
+ {
+ public:
+ compact_pointer(): _data(0)
+ {
+ }
+
+ void operator=(const compact_pointer& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value)
+ {
+ if (value)
+ {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift rounding for negative values
+ ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
+
+ if (static_cast(offset) <= 253)
+ _data = static_cast(offset + 1);
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ else
+ _data = 0;
+ }
+
+ operator T*() const
+ {
+ if (_data)
+ {
+ if (_data < 255)
+ {
+ uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast(base + ((_data - 1 + start) << compact_alignment_log2));
+ }
+ else
+ return compact_get_value(this);
+ }
+ else
+ return 0;
+ }
+
+ T* operator->() const
+ {
+ return *this;
+ }
+
+ private:
+ unsigned char _data;
+ };
+
+ template class compact_pointer_parent
+ {
+ public:
+ compact_pointer_parent(): _data(0)
+ {
+ }
+
+ void operator=(const compact_pointer_parent& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value)
+ {
+ if (value)
+ {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift behavior for negative values
+ ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
+
+ if (static_cast(offset) <= 65533)
+ {
+ _data = static_cast(offset + 1);
+ }
+ else
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
+ page->compact_shared_parent = value;
+
+ if (page->compact_shared_parent == value)
+ {
+ _data = 65534;
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 65535;
+ }
+ }
+ }
+ else
+ {
+ _data = 0;
+ }
+ }
+
+ operator T*() const
+ {
+ if (_data)
+ {
+ if (_data < 65534)
+ {
+ uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast(base + ((_data - 1 - 65533) << compact_alignment_log2));
+ }
+ else if (_data == 65534)
+ return static_cast(compact_get_page(this, header_offset)->compact_shared_parent);
+ else
+ return compact_get_value(this);
+ }
+ else
+ return 0;
+ }
+
+ T* operator->() const
+ {
+ return *this;
+ }
+
+ private:
+ uint16_t _data;
+ };
+
+ template class compact_string
+ {
+ public:
+ compact_string(): _data(0)
+ {
+ }
+
+ void operator=(const compact_string& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(char_t* value)
+ {
+ if (value)
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_string_base == 0))
+ page->compact_string_base = value;
+
+ ptrdiff_t offset = value - page->compact_string_base;
+
+ if (static_cast(offset) < (65535 << 7))
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset));
+
+ if (*base == 0)
+ {
+ *base = static_cast((offset >> 7) + 1);
+ _data = static_cast((offset & 127) + 1);
+ }
+ else
+ {
+ ptrdiff_t remainder = offset - ((*base - 1) << 7);
+
+ if (static_cast(remainder) <= 253)
+ {
+ _data = static_cast(remainder + 1);
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ else
+ {
+ _data = 0;
+ }
+ }
+
+ operator char_t*() const
+ {
+ if (_data)
+ {
+ if (_data < 255)
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset));
+ assert(*base);
- // if full_size == 0 then this string occupies the whole page
- size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * sizeof(void*);
+ ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
- deallocate_memory(header, full_size, page);
+ return page->compact_string_base + offset;
+ }
+ else
+ {
+ return compact_get_value(this);
+ }
+ }
+ else
+ return 0;
}
- xml_memory_page* _root;
- size_t _busy_size;
+ private:
+ unsigned char _data;
};
+PUGI__NS_END
+#endif
- PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+#ifdef PUGIXML_COMPACT
+namespace pugi
+{
+ struct xml_attribute_struct
{
- const size_t large_allocation_threshold = xml_memory_page_size / 4;
+ xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
+ {
+ PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
+ }
- xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
- out_page = page;
+ impl::compact_header header;
- if (!page) return 0;
+ uint16_t namevalue_base;
- if (size <= large_allocation_threshold)
- {
- _root->busy_size = _busy_size;
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
- // insert page at the end of linked list
- page->prev = _root;
- _root->next = page;
- _root = page;
+ impl::compact_pointer prev_attribute_c;
+ impl::compact_pointer next_attribute;
+ };
- _busy_size = size;
- }
- else
+ struct xml_node_struct
+ {
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
{
- // insert page before the end of linked list, so that it is deleted as soon as possible
- // the last page is not deleted even if it's empty (see deallocate_memory)
- assert(_root->prev);
+ PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
+ }
- page->prev = _root->prev;
- page->next = _root;
+ impl::compact_header header;
- _root->prev->next = page;
- _root->prev = page;
- }
+ uint16_t namevalue_base;
- // allocate inside page
- page->busy_size = size;
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
- return reinterpret_cast(page) + sizeof(xml_memory_page);
- }
-PUGI__NS_END
+ impl::compact_pointer_parent parent;
+
+ impl::compact_pointer first_child;
+ impl::compact_pointer prev_sibling_c;
+ impl::compact_pointer next_sibling;
+
+ impl::compact_pointer first_attribute;
+ };
+}
+#else
namespace pugi
{
- /// A 'name=value' XML attribute structure.
struct xml_attribute_struct
{
- /// Default ctor
- xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
+ xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
{
+ header = PUGI__GETHEADER_IMPL(this, page, 0);
}
uintptr_t header;
- char_t* name; ///< Pointer to attribute name.
- char_t* value; ///< Pointer to attribute value.
+ char_t* name;
+ char_t* value;
- xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
- xml_attribute_struct* next_attribute; ///< Next attribute
+ xml_attribute_struct* prev_attribute_c;
+ xml_attribute_struct* next_attribute;
};
- /// An XML document tree node.
struct xml_node_struct
{
- /// Default ctor
- /// \param type - node type
- xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
{
+ header = PUGI__GETHEADER_IMPL(this, page, type);
}
uintptr_t header;
- xml_node_struct* parent; ///< Pointer to parent
+ char_t* name;
+ char_t* value;
+
+ xml_node_struct* parent;
- char_t* name; ///< Pointer to element name.
- char_t* value; ///< Pointer to any associated string data.
+ xml_node_struct* first_child;
- xml_node_struct* first_child; ///< First child
-
- xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
- xml_node_struct* next_sibling; ///< Right brother
-
- xml_attribute_struct* first_attribute; ///< First attribute
+ xml_node_struct* prev_sibling_c;
+ xml_node_struct* next_sibling;
+
+ xml_attribute_struct* first_attribute;
};
}
+#endif
PUGI__NS_BEGIN
struct xml_extra_buffer
@@ -555,20 +1125,24 @@ PUGI__NS_BEGIN
const char_t* buffer;
xml_extra_buffer* extra_buffers;
+
+ #ifdef PUGIXML_COMPACT
+ compact_hash_table hash;
+ #endif
};
- inline xml_allocator& get_allocator(const xml_node_struct* node)
+ template inline xml_allocator& get_allocator(const Object* object)
{
- assert(node);
+ assert(object);
- return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator;
+ return *PUGI__GETPAGE(object)->allocator;
}
template inline xml_document_struct& get_document(const Object* object)
{
assert(object);
- return *static_cast(reinterpret_cast(object->header & xml_memory_page_pointer_mask)->allocator);
+ return *static_cast(PUGI__GETPAGE(object)->allocator);
}
PUGI__NS_END
@@ -577,7 +1151,8 @@ PUGI__NS_BEGIN
inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
{
xml_memory_page* page;
- void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+ void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
+ if (!memory) return 0;
return new (memory) xml_attribute_struct(page);
}
@@ -585,27 +1160,30 @@ PUGI__NS_BEGIN
inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
{
xml_memory_page* page;
- void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+ void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
+ if (!memory) return 0;
return new (memory) xml_node_struct(page, type);
}
inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
{
- uintptr_t header = a->header;
+ if (a->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(a->name);
- if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
- if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+ if (a->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(a->value);
- alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask));
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
}
inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
{
- uintptr_t header = n->header;
+ if (n->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(n->name);
- if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
- if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+ if (n->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(n->value);
for (xml_attribute_struct* attr = n->first_attribute; attr; )
{
@@ -625,7 +1203,7 @@ PUGI__NS_BEGIN
child = next;
}
- alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask));
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
}
inline void append_node(xml_node_struct* child, xml_node_struct* node)
@@ -797,6 +1375,8 @@ PUGI__NS_BEGIN
PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
{
+ if (!alloc.reserve()) return 0;
+
xml_node_struct* child = allocate_node(alloc, type);
if (!child) return 0;
@@ -807,6 +1387,8 @@ PUGI__NS_BEGIN
PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
{
+ if (!alloc.reserve()) return 0;
+
xml_attribute_struct* attr = allocate_attribute(alloc);
if (!attr) return 0;
@@ -1012,28 +1594,11 @@ PUGI__NS_BEGIN
}
};
- template struct wchar_selector;
-
- template <> struct wchar_selector<2>
- {
- typedef uint16_t type;
- typedef utf16_counter counter;
- typedef utf16_writer writer;
- };
-
- template <> struct wchar_selector<4>
+ struct utf8_decoder
{
- typedef uint32_t type;
- typedef utf32_counter counter;
- typedef utf32_writer writer;
- };
-
- typedef wchar_selector::counter wchar_counter;
- typedef wchar_selector::writer wchar_writer;
+ typedef uint8_t type;
- template struct utf_decoder
- {
- static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
{
const uint8_t utf8_byte_mask = 0x3f;
@@ -1094,29 +1659,34 @@ PUGI__NS_BEGIN
return result;
}
+ };
- static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- const uint16_t* end = data + size;
+ template struct utf16_decoder
+ {
+ typedef uint16_t type;
- while (data < end)
+ template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
{
- unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
// U+0000..U+D7FF
if (lead < 0xD800)
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// U+E000..U+FFFF
else if (static_cast(lead - 0xE000) < 0x2000)
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// surrogate pair lead
- else if (static_cast(lead - 0xD800) < 0x400 && data + 1 < end)
+ else if (static_cast(lead - 0xD800) < 0x400 && size >= 2)
{
uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
@@ -1124,26 +1694,32 @@ PUGI__NS_BEGIN
{
result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
data += 2;
+ size -= 2;
}
else
{
data += 1;
+ size -= 1;
}
}
else
{
data += 1;
+ size -= 1;
}
}
return result;
}
+ };
- static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- const uint32_t* end = data + size;
+ template struct utf32_decoder
+ {
+ typedef uint32_t type;
- while (data < end)
+ template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
{
uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
@@ -1152,53 +1728,76 @@ PUGI__NS_BEGIN
{
result = Traits::low(result, lead);
data += 1;
+ size -= 1;
}
// U+10000..U+10FFFF
else
{
result = Traits::high(result, lead);
data += 1;
+ size -= 1;
}
}
return result;
}
+ };
+
+ struct latin1_decoder
+ {
+ typedef uint8_t type;
- static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
{
- for (size_t i = 0; i < size; ++i)
+ while (size)
{
- result = Traits::low(result, data[i]);
+ result = Traits::low(result, *data);
+ data += 1;
+ size -= 1;
}
return result;
}
+ };
- static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf16_block(data, size, result);
- }
+ template struct wchar_selector;
- static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_utf32_block(data, size, result);
- }
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ typedef utf16_decoder decoder;
+ };
- static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
- {
- return decode_wchar_block_impl(reinterpret_cast::type*>(data), size, result);
- }
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ typedef utf32_decoder decoder;
};
- template PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
+ typedef wchar_selector::counter wchar_counter;
+ typedef wchar_selector::writer wchar_writer;
+
+ struct wchar_decoder
{
- for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
- }
+ typedef wchar_t type;
+
+ template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
+ {
+ typedef wchar_selector::decoder decoder;
+
+ return decoder::process(reinterpret_cast(data), size, result, traits);
+ }
+ };
#ifdef PUGIXML_WCHAR_MODE
PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
{
- for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i])));
+ for (size_t i = 0; i < length; ++i)
+ result[i] = static_cast(endian_swap(static_cast::type>(data[i])));
}
#endif
PUGI__NS_END
@@ -1245,7 +1844,7 @@ PUGI__NS_BEGIN
ctx_digit = 8, // 0-9
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
};
-
+
static const unsigned char chartypex_table[256] =
{
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
@@ -1267,7 +1866,7 @@ PUGI__NS_BEGIN
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
};
-
+
#ifdef PUGIXML_WCHAR_MODE
#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct))
#else
@@ -1290,12 +1889,71 @@ PUGI__NS_BEGIN
if (sizeof(wchar_t) == 2)
return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- else
+ else
return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
}
- PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+ PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
+ {
+ #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
+ #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
+
+ // check if we have a non-empty XML declaration
+ if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
+ return false;
+
+ // scan XML declaration until the encoding field
+ for (size_t i = 6; i + 1 < size; ++i)
+ {
+ // declaration can not contain ? in quoted values
+ if (data[i] == '?')
+ return false;
+
+ if (data[i] == 'e' && data[i + 1] == 'n')
+ {
+ size_t offset = i;
+
+ // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
+ PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
+ PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
+
+ // S? = S?
+ PUGI__SCANCHARTYPE(ct_space);
+ PUGI__SCANCHAR('=');
+ PUGI__SCANCHARTYPE(ct_space);
+
+ // the only two valid delimiters are ' and "
+ uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
+
+ PUGI__SCANCHAR(delimiter);
+
+ size_t start = offset;
+
+ out_encoding = data + offset;
+
+ PUGI__SCANCHARTYPE(ct_symbol);
+
+ out_length = offset - start;
+
+ PUGI__SCANCHAR(delimiter);
+
+ return true;
+ }
+ }
+
+ return false;
+
+ #undef PUGI__SCANCHAR
+ #undef PUGI__SCANCHARTYPE
+ }
+
+ PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
{
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
+
+ uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
// look for BOM in first few bytes
if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
@@ -1308,13 +1966,32 @@ PUGI__NS_BEGIN
if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
- if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
- // no known BOM detected, assume utf8
+ // no known BOM detected; parse declaration
+ const uint8_t* enc = 0;
+ size_t enc_length = 0;
+
+ if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
+ {
+ // iso-8859-1 (case-insensitive)
+ if (enc_length == 10
+ && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
+ && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
+ && enc[8] == '-' && enc[9] == '1')
+ return encoding_latin1;
+
+ // latin1 (case-insensitive)
+ if (enc_length == 6
+ && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
+ && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
+ && enc[5] == '1')
+ return encoding_latin1;
+ }
+
return encoding_utf8;
}
@@ -1332,15 +2009,10 @@ PUGI__NS_BEGIN
// only do autodetection if no explicit encoding is requested
if (encoding != encoding_auto) return encoding;
- // skip encoding autodetection if input buffer is too small
- if (size < 4) return encoding_utf8;
-
// try to guess encoding (based on XML specification, Appendix F.1)
const uint8_t* data = static_cast(contents);
- PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
-
- return guess_buffer_encoding(d0, d1, d2, d3);
+ return guess_buffer_encoding(data, size);
}
PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
@@ -1364,139 +2036,64 @@ PUGI__NS_BEGIN
buffer[length] = 0;
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
-#ifdef PUGIXML_WCHAR_MODE
- PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
- {
- return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
- (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
- }
-
- PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
- {
- const char_t* data = static_cast(contents);
- size_t length = size / sizeof(char_t);
-
- if (is_mutable)
- {
- char_t* buffer = const_cast(data);
-
- convert_wchar_endian_swap(buffer, data, length);
-
- out_buffer = buffer;
- out_length = length;
- }
- else
- {
- char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- convert_wchar_endian_swap(buffer, data, length);
- buffer[length] = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
- }
-
- return true;
- }
-
- PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
- {
- const uint8_t* data = static_cast(contents);
- size_t data_length = size;
-
- // first pass: get length in wchar_t units
- size_t length = utf_decoder::decode_utf8_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf8 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast(buffer);
- wchar_writer::value_type oend = utf_decoder::decode_utf8_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint16_t* data = static_cast(contents);
- size_t data_length = size / sizeof(uint16_t);
-
- // first pass: get length in wchar_t units
- size_t length = utf_decoder::decode_utf16_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf16 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast(buffer);
- wchar_writer::value_type oend = utf_decoder::decode_utf16_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
return true;
}
- template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
{
- const uint32_t* data = static_cast(contents);
- size_t data_length = size / sizeof(uint32_t);
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+ }
- // first pass: get length in wchar_t units
- size_t length = utf_decoder::decode_utf32_block(data, data_length, 0);
+ PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const char_t* data = static_cast(contents);
+ size_t length = size / sizeof(char_t);
- // allocate buffer of suitable length
- char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
+ if (is_mutable)
+ {
+ char_t* buffer = const_cast(data);
- // second pass: convert utf32 input to wchar_t
- wchar_writer::value_type obegin = reinterpret_cast(buffer);
- wchar_writer::value_type oend = utf_decoder::decode_utf32_block(data, data_length, obegin);
+ convert_wchar_endian_swap(buffer, data, length);
- assert(oend == obegin + length);
- *oend = 0;
+ out_buffer = buffer;
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
- out_buffer = buffer;
- out_length = length + 1;
+ convert_wchar_endian_swap(buffer, data, length);
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
return true;
}
- PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
{
- const uint8_t* data = static_cast(contents);
- size_t data_length = size;
+ const typename D::type* data = static_cast(contents);
+ size_t data_length = size / sizeof(typename D::type);
- // get length in wchar_t units
- size_t length = data_length;
+ // first pass: get length in wchar_t units
+ size_t length = D::process(data, data_length, 0, wchar_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
if (!buffer) return false;
- // convert latin1 input to wchar_t
+ // second pass: convert utf16 input to wchar_t
wchar_writer::value_type obegin = reinterpret_cast(buffer);
- wchar_writer::value_type oend = utf_decoder::decode_latin1_block(data, data_length, obegin);
+ wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -1513,13 +2110,16 @@ PUGI__NS_BEGIN
xml_encoding wchar_encoding = get_wchar_encoding();
// fast path: no conversion required
- if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == wchar_encoding)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// only endian-swapping is required
- if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+ if (need_endian_swap_utf(encoding, wchar_encoding))
+ return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
// source encoding is utf8
- if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+ if (encoding == encoding_utf8)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
// source encoding is utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
@@ -1527,8 +2127,8 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder());
}
// source encoding is utf32
@@ -1537,24 +2137,25 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder());
}
// source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+ if (encoding == encoding_latin1)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
- assert(!"Invalid encoding");
+ assert(false && "Invalid encoding");
return false;
}
#else
- template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
{
- const uint16_t* data = static_cast(contents);
- size_t data_length = size / sizeof(uint16_t);
+ const typename D::type* data = static_cast(contents);
+ size_t data_length = size / sizeof(typename D::type);
// first pass: get length in utf8 units
- size_t length = utf_decoder::decode_utf16_block(data, data_length, 0);
+ size_t length = D::process(data, data_length, 0, utf8_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
@@ -1562,32 +2163,7 @@ PUGI__NS_BEGIN
// second pass: convert utf16 input to utf8
uint8_t* obegin = reinterpret_cast(buffer);
- uint8_t* oend = utf_decoder::decode_utf16_block(data, data_length, obegin);
-
- assert(oend == obegin + length);
- *oend = 0;
-
- out_buffer = buffer;
- out_length = length + 1;
-
- return true;
- }
-
- template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
- {
- const uint32_t* data = static_cast(contents);
- size_t data_length = size / sizeof(uint32_t);
-
- // first pass: get length in utf8 units
- size_t length = utf_decoder::decode_utf32_block(data, data_length, 0);
-
- // allocate buffer of suitable length
- char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
- if (!buffer) return false;
-
- // second pass: convert utf32 input to utf8
- uint8_t* obegin = reinterpret_cast(buffer);
- uint8_t* oend = utf_decoder::decode_utf32_block(data, data_length, obegin);
+ uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -1623,7 +2199,7 @@ PUGI__NS_BEGIN
if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// first pass: get length in utf8 units
- size_t length = prefix_length + utf_decoder::decode_latin1_block(postfix, postfix_length, 0);
+ size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
// allocate buffer of suitable length
char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
@@ -1633,7 +2209,7 @@ PUGI__NS_BEGIN
memcpy(buffer, data, prefix_length);
uint8_t* obegin = reinterpret_cast(buffer);
- uint8_t* oend = utf_decoder::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
+ uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
assert(oend == obegin + length);
*oend = 0;
@@ -1647,7 +2223,8 @@ PUGI__NS_BEGIN
PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
{
// fast path: no conversion required
- if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == encoding_utf8)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
// source encoding is utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
@@ -1655,8 +2232,8 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return (native_encoding == encoding) ?
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder());
}
// source encoding is utf32
@@ -1665,14 +2242,15 @@ PUGI__NS_BEGIN
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return (native_encoding == encoding) ?
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
- convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder());
}
// source encoding is latin1
- if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+ if (encoding == encoding_latin1)
+ return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
- assert(!"Invalid encoding");
+ assert(false && "Invalid encoding");
return false;
}
#endif
@@ -1680,22 +2258,20 @@ PUGI__NS_BEGIN
PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
{
// get length in utf8 characters
- return utf_decoder::decode_wchar_block(str, length, 0);
+ return wchar_decoder::process(str, length, 0, utf8_counter());
}
PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
{
// convert to utf8
uint8_t* begin = reinterpret_cast(buffer);
- uint8_t* end = utf_decoder::decode_wchar_block(str, length, begin);
-
+ uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
+
assert(begin + size == end);
(void)!end;
-
- // zero-terminate
- buffer[size] = 0;
+ (void)!size;
}
-
+
#ifndef PUGIXML_NO_STL
PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
{
@@ -1717,7 +2293,7 @@ PUGI__NS_BEGIN
const uint8_t* data = reinterpret_cast(str);
// first pass: get length in wchar_t units
- size_t length = utf_decoder::decode_utf8_block(data, size, 0);
+ size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
// allocate resulting string
std::basic_string result;
@@ -1727,7 +2303,7 @@ PUGI__NS_BEGIN
if (length > 0)
{
wchar_writer::value_type begin = reinterpret_cast(&result[0]);
- wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin);
+ wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
assert(begin + length == end);
(void)!end;
@@ -1737,7 +2313,8 @@ PUGI__NS_BEGIN
}
#endif
- inline bool strcpy_insitu_allow(size_t length, uintptr_t header, uintptr_t header_mask, char_t* target)
+ template
+ inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
{
// never reuse shared memory
if (header & xml_memory_page_contents_shared_mask) return false;
@@ -1753,19 +2330,16 @@ PUGI__NS_BEGIN
return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
}
- PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
+ template
+ PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
{
- assert(header);
-
- size_t source_length = strlength(source);
-
if (source_length == 0)
{
// empty string and null pointer are equivalent, so just deallocate old memory
- xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator;
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
if (header & header_mask) alloc->deallocate_string(dest);
-
+
// mark the string as not allocated
dest = 0;
header &= ~header_mask;
@@ -1775,24 +2349,28 @@ PUGI__NS_BEGIN
else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
{
// we can reuse old buffer, so just copy the new data (including zero terminator)
- memcpy(dest, source, (source_length + 1) * sizeof(char_t));
-
+ memcpy(dest, source, source_length * sizeof(char_t));
+ dest[source_length] = 0;
+
return true;
}
else
{
- xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator;
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+ if (!alloc->reserve()) return false;
// allocate new buffer
char_t* buf = alloc->allocate_string(source_length + 1);
if (!buf) return false;
// copy the string (including zero terminator)
- memcpy(buf, source, (source_length + 1) * sizeof(char_t));
+ memcpy(buf, source, source_length * sizeof(char_t));
+ buf[source_length] = 0;
// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
if (header & header_mask) alloc->deallocate_string(dest);
-
+
// the string is now allocated, so set the flag
dest = buf;
header |= header_mask;
@@ -1805,11 +2383,11 @@ PUGI__NS_BEGIN
{
char_t* end;
size_t size;
-
+
gap(): end(0), size(0)
{
}
-
+
// Push new gap, move s count bytes further (skipping the gap).
// Collapse previous gap.
void push(char_t*& s, size_t count)
@@ -1820,14 +2398,14 @@ PUGI__NS_BEGIN
assert(s >= end);
memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end));
}
-
+
s += count; // end of current gap
-
+
// "merge" two gaps
end = s;
size += count;
}
-
+
// Collapse all gaps, return past-the-end pointer
char_t* flush(char_t* s)
{
@@ -1842,7 +2420,7 @@ PUGI__NS_BEGIN
else return s;
}
};
-
+
PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
{
char_t* stre = s + 1;
@@ -1874,7 +2452,7 @@ PUGI__NS_BEGIN
ch = *++stre;
}
-
+
++stre;
}
else // ... (dec code)
@@ -1894,7 +2472,7 @@ PUGI__NS_BEGIN
ch = *++stre;
}
-
+
++stre;
}
@@ -1903,7 +2481,7 @@ PUGI__NS_BEGIN
#else
s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc));
#endif
-
+
g.push(s, stre - s);
return stre;
}
@@ -1918,7 +2496,7 @@ PUGI__NS_BEGIN
{
*s++ = '&';
++stre;
-
+
g.push(s, stre - s);
return stre;
}
@@ -1943,7 +2521,7 @@ PUGI__NS_BEGIN
{
*s++ = '>';
++stre;
-
+
g.push(s, stre - s);
return stre;
}
@@ -1956,7 +2534,7 @@ PUGI__NS_BEGIN
{
*s++ = '<';
++stre;
-
+
g.push(s, stre - s);
return stre;
}
@@ -1969,7 +2547,7 @@ PUGI__NS_BEGIN
{
*s++ = '"';
++stre;
-
+
g.push(s, stre - s);
return stre;
}
@@ -1979,7 +2557,7 @@ PUGI__NS_BEGIN
default:
break;
}
-
+
return stre;
}
@@ -1987,7 +2565,7 @@ PUGI__NS_BEGIN
#define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
#define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
#define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
- #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+ #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
#define PUGI__POPNODE() { cursor = cursor->parent; }
#define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
#define PUGI__SCANWHILE(X) { while (X) ++s; }
@@ -1999,21 +2577,21 @@ PUGI__NS_BEGIN
PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
{
gap g;
-
+
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
-
+
if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
{
*s++ = '\n'; // replace first one with 0x0a
-
+
if (*s == '\n') g.push(s, 1);
}
else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
{
*g.flush(s) = 0;
-
+
return s + (s[2] == '>' ? 3 : 2);
}
else if (*s == 0)
@@ -2027,21 +2605,21 @@ PUGI__NS_BEGIN
PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
{
gap g;
-
+
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
-
+
if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
{
*s++ = '\n'; // replace first one with 0x0a
-
+
if (*s == '\n') g.push(s, 1);
}
else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
{
*g.flush(s) = 0;
-
+
return s + 1;
}
else if (*s == 0)
@@ -2051,9 +2629,9 @@ PUGI__NS_BEGIN
else ++s;
}
}
-
+
typedef char_t* (*strconv_pcdata_t)(char_t*);
-
+
template struct strconv_pcdata_impl
{
static char_t* parse(char_t* s)
@@ -2075,13 +2653,13 @@ PUGI__NS_BEGIN
--end;
*end = 0;
-
+
return s + 1;
}
else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
{
*s++ = '\n'; // replace first one with 0x0a
-
+
if (*s == '\n') g.push(s, 1);
}
else if (opt_escape::value && *s == '&')
@@ -2104,7 +2682,7 @@ PUGI__NS_BEGIN
}
}
};
-
+
PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
{
PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
@@ -2124,7 +2702,7 @@ PUGI__NS_BEGIN
}
typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
-
+
template struct strconv_attribute_impl
{
static char_t* parse_wnorm(char_t* s, char_t end_quote)
@@ -2135,35 +2713,35 @@ PUGI__NS_BEGIN
if (PUGI__IS_CHARTYPE(*s, ct_space))
{
char_t* str = s;
-
+
do ++str;
while (PUGI__IS_CHARTYPE(*str, ct_space));
-
+
g.push(s, str - s);
}
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
-
+
if (*s == end_quote)
{
char_t* str = g.flush(s);
-
+
do *str-- = 0;
while (PUGI__IS_CHARTYPE(*str, ct_space));
-
+
return s + 1;
}
else if (PUGI__IS_CHARTYPE(*s, ct_space))
{
*s++ = ' ';
-
+
if (PUGI__IS_CHARTYPE(*s, ct_space))
{
char_t* str = s + 1;
while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
-
+
g.push(s, str - s);
}
}
@@ -2186,11 +2764,11 @@ PUGI__NS_BEGIN
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
-
+
if (*s == end_quote)
{
*g.flush(s) = 0;
-
+
return s + 1;
}
else if (PUGI__IS_CHARTYPE(*s, ct_space))
@@ -2198,7 +2776,7 @@ PUGI__NS_BEGIN
if (*s == '\r')
{
*s++ = ' ';
-
+
if (*s == '\n') g.push(s, 1);
}
else *s++ = ' ';
@@ -2222,17 +2800,17 @@ PUGI__NS_BEGIN
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
+
if (*s == end_quote)
{
*g.flush(s) = 0;
-
+
return s + 1;
}
else if (*s == '\r')
{
*s++ = '\n';
-
+
if (*s == '\n') g.push(s, 1);
}
else if (opt_escape::value && *s == '&')
@@ -2254,11 +2832,11 @@ PUGI__NS_BEGIN
while (true)
{
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
-
+
if (*s == end_quote)
{
*g.flush(s) = 0;
-
+
return s + 1;
}
else if (opt_escape::value && *s == '&')
@@ -2277,7 +2855,7 @@ PUGI__NS_BEGIN
PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
{
PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
-
+
switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
{
case 0: return strconv_attribute_impl::parse_simple;
@@ -2311,11 +2889,11 @@ PUGI__NS_BEGIN
struct xml_parser
{
- xml_allocator alloc;
+ xml_allocator* alloc;
char_t* error_offset;
xml_parse_status error_status;
-
- xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
+
+ xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
{
}
@@ -2614,6 +3192,7 @@ PUGI__NS_BEGIN
{
// store value and step over >
cursor->value = value;
+
PUGI__POPNODE();
PUGI__ENDSEG();
@@ -2642,7 +3221,7 @@ PUGI__NS_BEGIN
{
strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
-
+
char_t ch = 0;
xml_node_struct* cursor = root;
char_t* mark = s;
@@ -2673,10 +3252,10 @@ PUGI__NS_BEGIN
while (true)
{
PUGI__SKIPWS(); // Eat any whitespace.
-
+
if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
{
- xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
+ xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
a->name = s; // Save the offset.
@@ -2691,7 +3270,7 @@ PUGI__NS_BEGIN
ch = *s;
++s;
}
-
+
if (ch == '=') // '<... #=...'
{
PUGI__SKIPWS(); // Eat any whitespace.
@@ -2703,7 +3282,7 @@ PUGI__NS_BEGIN
a->value = s; // Save the offset.
s = strconv_attribute(s, ch);
-
+
if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
// After this line the loop continues from the start;
@@ -2718,7 +3297,7 @@ PUGI__NS_BEGIN
else if (*s == '/')
{
++s;
-
+
if (*s == '>')
{
PUGI__POPNODE();
@@ -2759,7 +3338,7 @@ PUGI__NS_BEGIN
{
// we stepped over null terminator, backtrack & handle closing tag
--s;
-
+
if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
}
else PUGI__THROW_ERROR(status_bad_start_element, s);
@@ -2768,20 +3347,22 @@ PUGI__NS_BEGIN
{
++s;
+ mark = s;
+
char_t* name = cursor->name;
- if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
+
while (PUGI__IS_CHARTYPE(*s, ct_symbol))
{
- if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
}
if (*name)
{
if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
- else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
}
-
+
PUGI__POPNODE(); // Pop.
PUGI__SKIPWS();
@@ -2835,23 +3416,31 @@ PUGI__NS_BEGIN
if (!PUGI__OPTSET(parse_trim_pcdata))
s = mark;
-
+
if (cursor->parent || PUGI__OPTSET(parse_fragment))
{
- PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
- cursor->value = s; // Save the offset.
+ if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
+ {
+ cursor->value = s; // Save the offset.
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+
+ cursor->value = s; // Save the offset.
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+ }
s = strconv_pcdata(s);
-
- PUGI__POPNODE(); // Pop since this is a standalone.
-
+
if (!*s) break;
}
else
{
PUGI__SCANFOR(*s == '<'); // '...<'
if (!*s) break;
-
+
++s;
}
@@ -2893,32 +3482,26 @@ PUGI__NS_BEGIN
static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
{
- // allocator object is a part of document object
- xml_allocator& alloc_ = *static_cast(xmldoc);
-
// early-out for empty documents
if (length == 0)
return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
// get last child of the root before parsing
- xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
-
+ xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
+
// create parser on stack
- xml_parser parser(alloc_);
+ xml_parser parser(static_cast(xmldoc));
// save last character and make buffer zero-terminated (speeds up parsing)
char_t endch = buffer[length - 1];
buffer[length - 1] = 0;
-
+
// skip BOM to make sure it does not end up as part of parse output
char_t* buffer_data = parse_skip_bom(buffer);
// perform actual parsing
parser.parse_tree(buffer_data, root, optmsk, endch);
- // update allocator state
- alloc_ = parser.alloc;
-
xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
assert(result.offset >= 0 && static_cast(result.offset) <= length);
@@ -2929,7 +3512,7 @@ PUGI__NS_BEGIN
return make_parse_result(status_unrecognized_tag, length - 1);
// check if there are any element nodes parsed
- xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
+ xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
return make_parse_result(status_no_document_element, length - 1);
@@ -2973,12 +3556,36 @@ PUGI__NS_BEGIN
return encoding_utf8;
}
+ template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
+ {
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T());
+
+ return static_cast(end - dest) * sizeof(*dest);
+ }
+
+ template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
+ {
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T());
+
+ if (opt_swap)
+ {
+ for (typename T::value_type i = dest; i != end; ++i)
+ *i = endian_swap(*i);
+ }
+
+ return static_cast(end - dest) * sizeof(*dest);
+ }
+
#ifdef PUGIXML_WCHAR_MODE
PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
{
if (length < 1) return 0;
- // discard last character if it's the lead of a surrogate pair
+ // discard last character if it's the lead of a surrogate pair
return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
}
@@ -2991,58 +3598,32 @@ PUGI__NS_BEGIN
return length * sizeof(char_t);
}
-
+
// convert to utf8
if (encoding == encoding_utf8)
- {
- uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest);
-
- return static_cast(end - dest);
- }
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
// convert to utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
{
- uint16_t* dest = r_u16;
-
- // convert to native utf16
- uint16_t* end = utf_decoder::decode_wchar_block(data, length, dest);
-
- // swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
-
- return static_cast(end - dest) * sizeof(uint16_t);
+ return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
}
// convert to utf32
if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
{
- uint32_t* dest = r_u32;
-
- // convert to native utf32
- uint32_t* end = utf_decoder::decode_wchar_block(data, length, dest);
-
- // swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
-
- return static_cast(end - dest) * sizeof(uint32_t);
+ return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
}
// convert to latin1
if (encoding == encoding_latin1)
- {
- uint8_t* dest = r_u8;
- uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest);
-
- return static_cast(end - dest);
- }
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
- assert(!"Invalid encoding");
+ assert(false && "Invalid encoding");
return 0;
}
#else
@@ -3066,43 +3647,22 @@ PUGI__NS_BEGIN
{
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
{
- uint16_t* dest = r_u16;
-
- // convert to native utf16
- uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest);
-
- // swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
-
- return static_cast(end - dest) * sizeof(uint16_t);
+ return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
}
if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
{
- uint32_t* dest = r_u32;
-
- // convert to native utf32
- uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest);
-
- // swap if necessary
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
- if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast