From 6b2dce7ebdd6a43bdea1a35e42e0bae45801d254 Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Tue, 18 Apr 2017 17:18:38 +0200 Subject: [PATCH 1/6] Test added for #7 --- src/N3Lexer.php | 3 ++- test/TriGParserTest.php | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/N3Lexer.php b/src/N3Lexer.php index 664b4c2..d83595f 100644 --- a/src/N3Lexer.php +++ b/src/N3Lexer.php @@ -66,7 +66,8 @@ public function __construct($options = []) { private $prefix = '/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:(?=[#\\s<])/'; private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/"; - //OLD VERSION private $prefixed = "/^((?:[A-Za-z\xc0-\xd6\xd8-\xf6])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?:[ \t]+|(?=\.?[,;!\^\s#()\[\]\{\}\"'<]))/"; + //OLD VERSION + //private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/"; private $variable = '/^\\?(?:(?:[A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?=[.,;!\\^\\s#()\\[\\]\\{\\}"\'<])/'; private $blank = '/^_:((?:[0-9A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?:[ \\t]+|(?=\\.?[,;:\\s#()\\[\\]\\{\\}"\'<]))/'; diff --git a/test/TriGParserTest.php b/test/TriGParserTest.php index bc08f1d..5f69490 100644 --- a/test/TriGParserTest.php +++ b/test/TriGParserTest.php @@ -638,6 +638,14 @@ public function testUnicodeSequences () // ### should parse a graph with 8-bit unicode escape sequences $this->shouldParse('<\\U0001d400> {'."\n".'<\\U0001d400> <\\U0001d400> "\\U0001d400"^^<\\U0001d400>'."\n".'}' . "\n", ['๐€', '๐€', '"๐€"^^๐€', '๐€']); + $this->shouldParse('@prefix c: . + @prefix c: . +c:test a .' , ['http://example.org/test','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); + + // ### should parse unicode after prefix + $this->shouldParse('@prefix c: . +c:test c:ใƒ†ใ‚นใƒˆ .', ['http://example.org/test','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); + } public function testParseErrors () From e0e5d841da94e4ce795d8efac2fa9aa6c8437bc5 Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Tue, 18 Apr 2017 17:32:42 +0200 Subject: [PATCH 2/6] Fixed small check in test --- test/TriGParserTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/TriGParserTest.php b/test/TriGParserTest.php index 5f69490..b57d270 100644 --- a/test/TriGParserTest.php +++ b/test/TriGParserTest.php @@ -640,7 +640,7 @@ public function testUnicodeSequences () ['๐€', '๐€', '"๐€"^^๐€', '๐€']); $this->shouldParse('@prefix c: . @prefix c: . -c:test a .' , ['http://example.org/test','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); +c:test .' , ['http://example.org/test','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); // ### should parse unicode after prefix $this->shouldParse('@prefix c: . From 0d612d15d2332a4cb079b0a09c71b2a2b22621e0 Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Tue, 18 Apr 2017 17:46:16 +0200 Subject: [PATCH 3/6] Test original prefixed version instead --- src/N3Lexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/N3Lexer.php b/src/N3Lexer.php index d83595f..445427c 100644 --- a/src/N3Lexer.php +++ b/src/N3Lexer.php @@ -65,7 +65,7 @@ public function __construct($options = []) { private $langcode = '/^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\\-])/i'; private $prefix = '/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:(?=[#\\s<])/'; - private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/"; + private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/u"; //OLD VERSION //private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/"; private $variable = '/^\\?(?:(?:[A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?=[.,;!\\^\\s#()\\[\\]\\{\\}"\'<])/'; From 66c192fbbf71581eca95c3131398186901e547ac Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Wed, 19 Apr 2017 13:27:27 +0200 Subject: [PATCH 4/6] Added performance of ARC2 to the table in README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ef7bdaf..4ce5344 100644 --- a/README.md +++ b/README.md @@ -208,11 +208,12 @@ We compared the performance on two turtle files, and parsed it with the EasyRDF |1,866 | __Hardf__ with opcache | 24.5 | 0.380 | |1,866 | [EasyRDF](https://github.com/njh/easyrdf) without opcache | 5,166.5 | 2.772 | |1,866 | [EasyRDF](https://github.com/njh/easyrdf) with opcache | 5,176.2 | 2.421 | +|1,866 | [ARC2](https://github.com/semsol/arc2) with opcache | 71.9 | 1.966 | | 1,866 | [N3.js](https://github.com/RubenVerborgh/N3.js) | 24.0 | 28.xxx | | 3,896,560 | __Hardf__ without opcache | 40,017.7 | 0.722 | | 3,896,560 | __Hardf__ with opcache | 33,155.3 | 0.380 | | 3,896,560 | [N3.js](https://github.com/RubenVerborgh/N3.js) | 7,004.0 | 59.xxx | - +| 3,896,560 | [ARC2](https://github.com/semsol/arc2) with opcache | 203,152.6 | 3,570.808 | ## License, status and contributions The N3.js library is copyrighted by [Ruben Verborgh](http://ruben.verborgh.org/) and [Pieter Colpaert](https://pietercolpaert.be) From 50f136ad6dd995bf8f82b58558d8605b97fcfe6f Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Thu, 20 Apr 2017 10:12:33 +0200 Subject: [PATCH 5/6] Fixed #7 --- src/N3Lexer.php | 6 ++---- src/TriGParser.php | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/N3Lexer.php b/src/N3Lexer.php index 445427c..6a64946 100644 --- a/src/N3Lexer.php +++ b/src/N3Lexer.php @@ -64,10 +64,8 @@ public function __construct($options = []) { private $tripleQuotedString = '/^""("[^\\\\"]*(?:(?:\\\\.|"(?!""))[^\\\\"]*)*")""|^\'\'(\'[^\\\\\']*(?:(?:\\\\.|\'(?!\'\'))[^\\\\\']*)*\')\'\'/'; private $langcode = '/^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\\-])/i'; private $prefix = '/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:(?=[#\\s<])/'; - - private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/u"; - //OLD VERSION - //private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff\\uf900-\\ufdcf\\ufdf0-\\ufffd]|[\\ud800-\\udb7f][\\udc00-\\udfff]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/"; + private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{02ff}\\x{0370}-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{02ff}\\x{0370}-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/u"; + private $variable = '/^\\?(?:(?:[A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?=[.,;!\\^\\s#()\\[\\]\\{\\}"\'<])/'; private $blank = '/^_:((?:[0-9A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?:[ \\t]+|(?=\\.?[,;:\\s#()\\[\\]\\{\\}"\'<]))/'; diff --git a/src/TriGParser.php b/src/TriGParser.php index ccafcea..5e4ca75 100644 --- a/src/TriGParser.php +++ b/src/TriGParser.php @@ -130,7 +130,7 @@ private function saveContext($type, $graph, $subject, $predicate, $object) { $this->quantified = $this->quantified; } } - + // ### `_restoreContext` restores the parent context // when leaving a scope (list, blank node, formula) private function restoreContext() { From 58f7132c76765b702693618cb49af07e3d7861af Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Thu, 20 Apr 2017 10:23:20 +0200 Subject: [PATCH 6/6] Added unicode tests --- test/TriGParserTest.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/TriGParserTest.php b/test/TriGParserTest.php index b57d270..42335dc 100644 --- a/test/TriGParserTest.php +++ b/test/TriGParserTest.php @@ -633,7 +633,7 @@ public function testLiterals () ['a', 'b', '" abc' . "\n" . 'abc "']); } - public function testUnicodeSequences () + public function testUnicode () { // ### should parse a graph with 8-bit unicode escape sequences $this->shouldParse('<\\U0001d400> {'."\n".'<\\U0001d400> <\\U0001d400> "\\U0001d400"^^<\\U0001d400>'."\n".'}' . "\n", @@ -645,6 +645,15 @@ public function testUnicodeSequences () // ### should parse unicode after prefix $this->shouldParse('@prefix c: . c:test c:ใƒ†ใ‚นใƒˆ .', ['http://example.org/test','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); + + // ### should parse unicode in literal + $this->shouldParse('@prefix c: . +c:test "c:ใƒ†ใ‚นใƒˆ" .', ['http://example.org/test','b','"c:ใƒ†ใ‚นใƒˆ"','']); + + // ### should parse unicode in prefixname + $this->shouldParse('@prefix c: . + c:ใ‚นใƒˆ .', ['http://example.org/testprefixname','b','http://example.org/ใƒ†ใ‚นใƒˆ','']); + }