diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..dadf56c --- /dev/null +++ b/COPYING @@ -0,0 +1,2 @@ +This module is licensed under the same terms as perl itself. + diff --git a/Changes b/Changes new file mode 100644 index 0000000..3e7be33 --- /dev/null +++ b/Changes @@ -0,0 +1,440 @@ +Revision history for Perl extension JSON::XS + +TODO: maybe detetc and croak on more invalid inputs (e.g. +-inf/nan) +TODO: maybe avoid the reblessing and better support readonly objects. +TODO: http://stevehanov.ca/blog/index.php?id=104 compression +TODO: how to cope with tagged values and standard json decoders +TODO: investigate magic (Eric Brine) +TODO: rfc7464 +TODO: Victor Efimov +TODO: move json_xs to types::serialiser + +3.04 Thu Aug 17 04:30:47 CEST 2017 + - change exponential realloc algorithm on encoding and string decoding to be + really exponential (this helps slow/debugging allocators such as libumem) + (reported by Matthew Horsfall). + - string encoding would needlessly overallocate output space + (testcase by Matthew Horsfall). + - be very paranoid about extending buffer lengths and croak if buffers get too large, + which might (or might not) improve security. + - add cbor-packed type to json_xs. + - switch from YAML to YAML::XS in json_xs, as YAML is way too buggy and outdated. + +3.03 Wed Nov 16 20:20:59 CET 2016 + - fix a bug introduced by a perl bug workaround that would cause + incremental parsing to fail with a sv_chop panic. + - json_xs: toformat failure error message fixed. + - json_xs: allow cyclic data structures in CBOR. + +3.02 Fri Feb 26 22:45:20 CET 2016 + - allow_nonref now affects booleans (\1, $Types::Serialiser::Boolean) + as well (reported by Alex Efros). + - allow literal tabs in strings in relaxed mode (patch by + lubo.rintel@gooddata.com). + - support "cbor" format in json_xs tool. + - support (and fix) calling encode and decode in list context + (reported by Вадим Власов). + - work around a bug in older perls crashing when presented + with shared hash keys (Reini Urban). + - use stability canary. + +3.01 Tue Oct 29 16:55:15 CET 2013 + - backport to perls < 5.18 (reported by Paul Howarth). + +3.0 Tue Oct 29 01:35:37 CET 2013 + - implemented an object tagging extension (using the + Types::Serialiser serialisation protocol). + - reworked the documentation regarding object serialisation, + add a new OBJECT SERIALISATION section that explains th + whole process. + - new setting: allow_tags. + - switch to Types::Serialiser booleans. + - remove to_json/from_json. + - other minor improvements to the documentation. + +2.34 Thu May 23 11:30:34 CEST 2013 + - work around bugs in perl 5.18 breaking more than 100 + widely used modules, without a fix in sight because + p5pers don't care about CPAN anymore. + - when canonicalising, only allocate up to 64 hash key + pointers on the stack. for larger hashes, use the heap, + to avoid using too much stackspace. + - discuss the problem with setlocale (reported by a few victims). + +2.33 Wed Aug 1 21:03:52 CEST 2012 + - internal encode/decode XS wrappers did not expect stack + moves caused by callbacks (analyzed and testcase by Jesse Luehrs). + - add bencode as to/from option in bin/json_xs. + - add -e option to json_xs, and none and string in/out formats. + +2.32 Thu Aug 11 19:06:38 CEST 2011 + - fix a bug in the initial whitespace accumulation. + +2.31 Wed Jul 27 17:53:05 CEST 2011 + - don't accumulate initial whitespace in the incremental buffer + (this can be useful to allow whitespace-keepalive on a tcp + connection without triggering the max_size limit). + - properly croak on some invalid inputs that are not strings + (e.g. undef) when trying to decode a json text (reported + and analyzed by Goro Fuji). + +2.3 Wed Aug 18 01:26:47 CEST 2010 + - make sure decoder doesn't change the decoding in the incremental + parser (testcase provided by Hendrik Schumacher). + - applied patch by DaTa for Data::Dumper support in json_xs. + - added -t dump support to json_xs, using Data::Dump. + - added -f eval support to json_xs. + +2.29 Wed Mar 17 02:39:12 CET 2010 + - fix a memory leak when callbacks set using filter_json_object + or filter_json_single_key_object were called (great testcase + by Eric Wilhelm). + +2.28 Thu Mar 11 20:30:46 CET 2010 + - implement our own atof function - perl's can be orders of + magnitudes slower than even the system one. on the positive + side, ours seems to be more exact in general than perl's. + (testcase provided by Tim Meadowcroft). + - clarify floating point conversion issues a bit. + - update jpsykes csrf article url. + - updated benchmark section - JSON::PP became much faster! + +2.27 Thu Jan 7 07:35:08 CET 2010 + - support relaxed option inside the incremental parser + (testcase provided by IKEGAMI via Makamaka). + +2.26 Sat Oct 10 03:26:19 CEST 2009 + - big integers could become truncated (based on patch + by Strobl Anton). + - output format change: indent now adds a final newline, which is + more expected and more true to the documentation. + +2.25 Sat Aug 8 12:04:41 CEST 2009 + - the perl debugger completely breaks lvalue subs - try to work + around the issue. + - ignore RMAGICAL hashes w.r.t. CANONICAL. + - try to work around a possible char signedness issue on aix. + - require common sense. + +2.24 Sat May 30 08:25:45 CEST 2009 + - the incremental parser did not update its parse offset + pointer correctly when parsing utf8-strings (nicely + debugged by Martin Evans). + - appending a non-utf8-string to the incremental parser + in utf8 mode failed to upgrade the string. + - wording of parse error messages has been improved. + +2.232 Sun Feb 22 11:12:25 CET 2009 + - use an exponential algorithm to extend strings, to + help platforms with bad or abysmal==windows memory + allocater performance, at the expense of some memory + wastage (use shrink to recover this extra memory). + (nicely analysed by Dmitry Karasik). + +2.2311 Thu Feb 19 02:12:54 CET 2009 + - add a section "JSON and ECMAscript" to explain some + incompatibilities between the two (problem was noted by + various people). + - add t/20_faihu.t. + +2.231 Thu Nov 20 04:59:08 CET 2008 + - work around 5.10.0 magic bugs where manipulating magic values + (such as $1) would permanently damage them as perl would + ignore the magicalness, by making a full copy of the string, + reported by Dmitry Karasik. + - work around spurious warnings under older perl 5.8's. + +2.23 Mon Sep 29 05:08:29 CEST 2008 + - fix a compilation problem when perl is not using char * as, well, + char *. + - use PL_hexdigit in favour of rolling our own. + +2.2222 Sun Jul 20 18:49:00 CEST 2008 + - same game again, broken 5.10 finds yet another assertion + failure, and the workaround causes additional runtime warnings. + Work around the next assertion AND the warning. 5.10 seriously + needs to adjust it's attitude against working code. + +2.222 Sat Jul 19 06:15:34 CEST 2008 + - you work around one -DDEBUGGING assertion bug in perl 5.10 + just to hit the next one. work around this one, too. + +2.22 Tue Jul 15 13:26:51 CEST 2008 + - allow higher nesting levels in incremental parser. + - error out earlier in some cases in the incremental parser + (as suggested by Yuval Kogman). + - improve incr-parser test (Yuval Kogman). + +2.21 Tue Jun 3 08:43:23 CEST 2008 + - (hopefully) work around a perl 5.10 bug with -DDEBUGGING. + - remove the experimental status of the incremental parser interface. + - move =encoding around again, to avoid bugs with search.cpan.org. + when can we finally have utf-8 in pod??? + - add ->incr_reset method. + +2.2 Wed Apr 16 20:37:25 CEST 2008 + - lifted the log2 rounding restriction of max_depth and max_size. + - make booleans mutable by creating a copy instead of handing out + the same scalar (reported by pasha sadri). + - added support for incremental json parsing (still EXPERIMENTAL). + - implemented and added a json_xs command line utility that can convert + from/to a number of serialisation formats - tell me if you need more. + - implement allow_unknown/get_allow_unknown methods. + - fixed documentation of max_depth w.r.t. higher and equal. + - moved down =encoding directive a bit, too much breaks if it's the first + pod directive :/. + - removed documentation section on other modules, it became somewhat + outdated and is nowadays mostly of historical interest. + +2.1 Wed Mar 19 23:23:18 CET 2008 + - update documentation here and there: add a large section + about utf8/latin1/ascii flags, add a security consideration + and extend and clarify the JSON and YAML section. + - medium speed enhancements when encoding/decoding non-ascii chars. + - minor speedup in number encoding case. + - extend and clarify the section on incompatibilities + between YAML and JSON. + - switch to static inline from just inline when using gcc. + - add =encoding utf-8 to the manpage, now that perl 5.10 supports it. + - fix some issues with UV to JSON conversion of unknown impact. + - published the yahoo locals search result used in benchmarks as the + original url changes so comparison is impossible. + +2.01 Wed Dec 5 11:40:28 CET 2007 + - INCOMPATIBLE API CHANGE: to_json and from_json have been + renamed to encode_json/decode_json for JSON.pm compatibility. + The old functions croak and might be replaced by JSON.pm + comaptible versions in some later release. + +2.0 Tue Dec 4 11:30:46 CET 2007 + - this is supposed to be the first version of JSON::XS + compatible with version 2.0+ of the JSON module. + Using the JSON module as frontend to JSON::XS should be + as fast as using JSON::XS directly, so consider using it + instead. + - added get_* methods for all "simple" options. + - make JSON::XS subclassable. + +1.53 Tue Nov 13 23:58:33 CET 2007 + - minor doc clarifications. + - fixed many doc typos (patch by Thomas L. Shinnick). + +1.52 Mon Oct 15 03:22:06 CEST 2007 + - remove =encoding pod directive again, it confuses too many pod + parsers :/. + +1.51 Sat Oct 13 03:55:56 CEST 2007 + - encode empty arrays/hashes in a compact way when pretty is enabled. + - apparently JSON::XS was used to find some bugs in the + JSON_checker testsuite, so add (the corrected) JSON_checker tests to + the testsuite. + - quite a bit of doc updates/extension. + - require 5.8.2, as this seems to be the first unicode-stable version. + +1.5 Tue Aug 28 04:05:38 CEST 2007 + - add support for tied hashes, based on ideas and testcase by + Marcus Holland-Moritz. + - implemented relaxed parsing mode where some extensions are being + accepted. generation is still JSON-only. + +1.44 Wed Aug 22 01:02:44 CEST 2007 + - very experimental process-emulation support, slowing everything down. + the horribly broken perl threads are still not supported - YMMV. + +1.43 Thu Jul 26 13:26:37 CEST 2007 + - convert big json numbers exclusively consisting of digits to NV + only when there is no loss of precision, otherwise to string. + +1.42 Tue Jul 24 00:51:18 CEST 2007 + - fix a crash caused by not handling missing array elements + (report and testcase by Jay Kuri). + +1.41 Tue Jul 10 18:21:44 CEST 2007 + - fix compilation with NDEBUG (assert side-effect), + affects convert_blessed only. + - fix a bug in decode filters calling ENTER; SAVETMPS; + one time too often. + - catch a typical error in TO_JSON methods. + - antique-ised XS.xs again to work with outdated + C compilers (windows...). + +1.4 Mon Jul 2 10:06:30 CEST 2007 + - add convert_blessed setting. + - encode did not catch all blessed objects, encoding their + contents in most cases. This has been fixed by introducing + the allow_blessed setting. + - added filter_json_object and filter_json_single_key_object + settings that specify a callback to be called when + all/specific json objects are encountered. + - assume that most object keys are simple ascii words and + optimise this case, penalising the general case. This can + speed up decoding by 30% in typical cases and gives + a smaller and faster perl hash. + - implemented simpleminded, optional resource size checking + in decode_json. + - remove objToJson/jsonToObj aliases, as the next version + of JSON will not have them either. + - bit the bullet and converted the very simple json object + into a more complex one. + - work around a bug where perl wrongly claims an integer + is not an integer. + - unbundle JSON::XS::Boolean into own pm file so Storable + and similar modules can resolve the overloading when thawing. + +1.3 Sun Jun 24 01:55:02 CEST 2007 + - make JSON::XS::true and false special overloaded objects + and return those instead of 1 and 0 for those json atoms + (JSON::PP compatibility is NOT achieved yet). + - add JSON::XS::is_bool predicate to test for those special + values. + - add a reference to + http://jpsykes.com/47/practical-csrf-and-json-security. + - removed require 5.8.8 again, it is just not very expert-friendly. + Also try to be more compatible with slightly older versions, + which are not recommended (because they are buggy). + +1.24 Mon Jun 11 05:40:49 CEST 2007 + - added informative section on JSON-as-YAML. + - get rid of some c99-isms again. + - localise dec->cur in decode_str, speeding up + string decoding considerably (>15% on my amd64 + gcc). + - increased SHORT_STRING_LEN to 16kb: stack space is + usually plenty, and this actually saves memory + when !shrinking as short strings will fit perfectly. + +1.23 Wed Jun 6 20:13:06 CEST 2007 + - greatly improved small integer encoding and decoding speed. + - implement a number of µ-optimisations. + - updated benchmarks. + +1.22 Thu May 24 00:07:25 CEST 2007 + - require 5.8.8 explicitly as older perls do not seem to offer + the required macros. + - possibly made it compile on so-called C compilers by microsoft. + +1.21 Wed May 9 18:40:32 CEST 2007 + - character offset reported for trailing garbage was random. + +1.2 Wed May 9 18:35:01 CEST 2007 + - decode did not work with magical scalars (doh!). + - added latin1 flag to produce JSON texts in the latin1 subset + of unicode. + - flag trailing garbage as error. + - new decode_prefix method that returns the number + of characters consumed by a decode. + - max octets/char in perls UTF-X is actually 13, not 11, + as pointed out by Glenn Linderman. + - fixed typoe reported by YAMASHINA Hio. + +1.11 Mon Apr 9 07:05:49 CEST 2007 + - properly 0-terminate sv's returned by encode to help + C libraries that expect that 0 to be there. + - partially "port" JSON from C to microsofts fucking broken + pseudo-C. They should be burned to the ground for pissing + on standards. And I should be stoned for even trying to + support this filthy excuse for a c compiler. + +1.1 Wed Apr 4 01:45:00 CEST 2007 + - clarify documentation (pointed out by Quinn Weaver). + - decode_utf8 sometimes did not correctly flag errors, + leading to segfaults. + - further reduced default nesting depth to 512 due to the test + failure by that anonymous "chris" whose e-mail address seems + to be impossible to get. Tests on other freebsd systems indicate + that this is likely a problem in his/her configuration and not this + module. + - renamed json => JSON in error messages. + - corrected the character offset in some error messages. + +1.01 Sat Mar 31 16:15:40 CEST 2007 + - do not segfault when from_json/decode gets passed + a non-string object (reported by Florian Ragwitz). + This has no effect on normal operation. + +1.0 Thu Mar 29 04:43:34 CEST 2007 + - the long awaited (by me) 1.0 version. + - add \0 (JSON::XS::false) and \1 (JSON::XS::true) mappings to JSON + true and false. + - add some more notes to shrink, as suggested by Alex Efros. + - improve testsuite. + - halve the default nesting depth limit, to hopefully make it + work on Freebsd (unfortunately, the cpan tester did not + send me his report, so I cannot ask about the stack limit on fbsd). + +0.8 Mon Mar 26 00:10:48 CEST 2007 + - fix a memleak when decoding hashes. + - export jsonToBj and objToJson as aliases + to to_json and from_json, to reduce incompatibilities + between JSON/JSON::PC and JSON::XS. (experimental). + - implement a maximum nesting depth for both en- and de-coding. + - added a security considerations sections. + +0.7 Sun Mar 25 01:46:30 CET 2007 + - code cleanup. + - fix a memory overflow bug when indenting. + - pretty-printing now up to 15% faster. + - improve decoding speed of strings by + up to 50% by specialcasing short strings. + - further decoding speedups for strings using + lots of \u escapes. + - improve utf8 decoding speed for U+80 .. U+7FF. + +0.5 Sat Mar 24 20:41:51 CET 2007 + - added the UTF-16 encoding example hinted at in previous + versions. + - minor documentation fixes. + - fix a bug in and optimise canonicalising fastpath + (reported by Craig Manley). + - remove a subtest that breaks with bleadperl (reported + by Andreas König). + +0.31 Sat Mar 24 02:14:34 CET 2007 + - documentation updates. + - do some casting to hopefully fix Andreas' problem. + - nuke bogus json rpc stuff. + +0.3 Fri Mar 23 19:33:21 CET 2007 + - remove spurious PApp::Util reference (John McNamara). + - adapted lots of tests from other json modules + (idea by Chris Carline). + - documented mapping from json to perl and vice versa. + - improved the documentation by adding more examples. + - added short escaping forms, reducing the created + json texts a bit. + - added shrink flag. + - when flag methods are called without enable argument + they will by default enable their flag. + - considerably improved string encoding speed (at least + with gcc 4). + - added a test that covers lots of different characters. + - clarified some error messages. + - error messages now use correct character offset + with F_UTF8. + - improve the "no bytes" and "no warnings" hacks in + case the called functions do... stuff. + - croak when encoding to ascii and an out-of-range + (non-unicode) codepoint is encountered. + +0.2 Fri Mar 23 00:23:34 CET 2007 + - the "could not sleep without debugging release". + it should basically work now, with many bugs as + no production tests have been run yet. + - added more testcases. + - the expected shitload of bugfixes. + - handle utf8 flag correctly in decode. + - fix segfault in decoder. + - utf8n_to_uvuni sets retlen to -1, but retlen is an + unsigned types (argh). + - fix decoding of utf-8 strings. + - improved error diagnostics. + - fix decoding of 'null'. + - fix parsing of empty array/hashes + - silence warnings when we prepare the croak message. + +0.1 Thu Mar 22 22:13:43 CET 2007 + - first release, very untested, basically just to claim + the namespace. + +0.01 Thu Mar 22 06:08:12 CET 2007 + - original version; cloned from Convert-Scalar + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..aca65d0 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,38 @@ +README +Changes +MANIFEST +COPYING +Makefile.PL +XS.pm +XS.xs +XS/Boolean.pm +bin/json_xs +eg/bench +t/00_load.t +t/01_utf8.t +t/02_error.t +t/03_types.t +t/04_dwiw_encode.t +t/05_dwiw_decode.t +t/06_pc_pretty.t +t/07_pc_esc.t +t/08_pc_base.t +t/09_pc_extra_number.t +t/10_pc_keysort.t +t/11_pc_expo.t +t/12_blessed.t +t/13_limit.t +t/14_latin1.t +t/15_prefix.t +t/16_tied.t +t/17_relaxed.t +t/18_json_checker.t +t/19_incr.t +t/20_faihu.t +t/21_evans.t +t/22_comment_at_eof.t +t/52_object.t +t/99_binary.t +typemap +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..f100dc0 --- /dev/null +++ b/META.json @@ -0,0 +1,44 @@ +{ + "abstract" : "unknown", + "author" : [ + "unknown" + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010", + "license" : [ + "unknown" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "JSON-XS", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "Canary::Stability" : "0", + "ExtUtils::MakeMaker" : "6.52" + } + }, + "runtime" : { + "requires" : { + "Types::Serialiser" : "0", + "common::sense" : "0" + } + } + }, + "release_status" : "stable", + "version" : 3.04, + "x_serialization_backend" : "JSON::PP version 2.27300" +} diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..55a079c --- /dev/null +++ b/META.yml @@ -0,0 +1,25 @@ +--- +abstract: unknown +author: + - unknown +build_requires: + ExtUtils::MakeMaker: '0' +configure_requires: + Canary::Stability: '0' + ExtUtils::MakeMaker: '6.52' +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010' +license: unknown +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: JSON-XS +no_index: + directory: + - t + - inc +requires: + Types::Serialiser: '0' + common::sense: '0' +version: 3.04 +x_serialization_backend: 'CPAN::Meta::YAML version 0.012' diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..ef913cf --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,21 @@ +use 5.008003; +use ExtUtils::MakeMaker; + +use Canary::Stability JSON::XS => 1, 5.008003; + +WriteMakefile( + dist => { + PREOP => 'pod2text XS.pm | tee README >$(DISTVNAME)/README; chmod -R u=rwX,go=rX . ;', + COMPRESS => 'gzip -9v', + SUFFIX => '.gz', + }, + EXE_FILES => [ "bin/json_xs" ], + VERSION_FROM => "XS.pm", + NAME => "JSON::XS", + PREREQ_PM => { + common::sense => 0, + Types::Serialiser => 0, + }, + CONFIGURE_REQUIRES => { ExtUtils::MakeMaker => 6.52, Canary::Stability => 0 }, +); + diff --git a/README b/README new file mode 100644 index 0000000..bc6ef97 --- /dev/null +++ b/README @@ -0,0 +1,1604 @@ +NAME + JSON::XS - JSON serialising/deserialising, done correctly and fast + + JSON::XS - 正しくて高速な JSON シリアライザ/デシリアライザ + (http://fleur.hio.jp/perldoc/mix/lib/JSON/XS.html) + +SYNOPSIS + use JSON::XS; + + # exported functions, they croak on error + # and expect/generate UTF-8 + + $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; + $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; + + # OO-interface + + $coder = JSON::XS->new->ascii->pretty->allow_nonref; + $pretty_printed_unencoded = $coder->encode ($perl_scalar); + $perl_scalar = $coder->decode ($unicode_json_text); + + # Note that JSON version 2.0 and above will automatically use JSON::XS + # if available, at virtually no speed overhead either, so you should + # be able to just: + + use JSON; + + # and do the same things, except that you have a pure-perl fallback now. + +DESCRIPTION + This module converts Perl data structures to JSON and vice versa. Its + primary goal is to be *correct* and its secondary goal is to be *fast*. + To reach the latter goal it was written in C. + + Beginning with version 2.0 of the JSON module, when both JSON and + JSON::XS are installed, then JSON will fall back on JSON::XS (this can + be overridden) with no overhead due to emulation (by inheriting + constructor and methods). If JSON::XS is not available, it will fall + back to the compatible JSON::PP module as backend, so using JSON instead + of JSON::XS gives you a portable JSON API that can be fast when you need + it and doesn't require a C compiler when that is a problem. + + As this is the n-th-something JSON module on CPAN, what was the reason + to write yet another JSON module? While it seems there are many JSON + modules, none of them correctly handle all corner cases, and in most + cases their maintainers are unresponsive, gone missing, or not listening + to bug reports for other reasons. + + See MAPPING, below, on how JSON::XS maps perl values to JSON values and + vice versa. + + FEATURES + * correct Unicode handling + + This module knows how to handle Unicode, documents how and when it + does so, and even documents what "correct" means. + + * round-trip integrity + + When you serialise a perl data structure using only data types + supported by JSON and Perl, the deserialised data structure is + identical on the Perl level. (e.g. the string "2.0" doesn't suddenly + become "2" just because it looks like a number). There *are* minor + exceptions to this, read the MAPPING section below to learn about + those. + + * strict checking of JSON correctness + + There is no guessing, no generating of illegal JSON texts by + default, and only JSON is accepted as input by default (the latter + is a security feature). + + * fast + + Compared to other JSON modules and other serialisers such as + Storable, this module usually compares favourably in terms of speed, + too. + + * simple to use + + This module has both a simple functional interface as well as an + object oriented interface. + + * reasonably versatile output formats + + You can choose between the most compact guaranteed-single-line + format possible (nice for simple line-based protocols), a pure-ASCII + format (for when your transport is not 8-bit clean, still supports + the whole Unicode range), or a pretty-printed format (for when you + want to read that stuff). Or you can combine those features in + whatever way you like. + +FUNCTIONAL INTERFACE + The following convenience methods are provided by this module. They are + exported by default: + + $json_text = encode_json $perl_scalar + Converts the given Perl data structure to a UTF-8 encoded, binary + string (that is, the string contains octets only). Croaks on error. + + This function call is functionally identical to: + + $json_text = JSON::XS->new->utf8->encode ($perl_scalar) + + Except being faster. + + $perl_scalar = decode_json $json_text + The opposite of "encode_json": expects an UTF-8 (binary) string and + tries to parse that as an UTF-8 encoded JSON text, returning the + resulting reference. Croaks on error. + + This function call is functionally identical to: + + $perl_scalar = JSON::XS->new->utf8->decode ($json_text) + + Except being faster. + +A FEW NOTES ON UNICODE AND PERL + Since this often leads to confusion, here are a few very clear words on + how Unicode works in Perl, modulo bugs. + + 1. Perl strings can store characters with ordinal values > 255. + This enables you to store Unicode characters as single characters in + a Perl string - very natural. + + 2. Perl does *not* associate an encoding with your strings. + ... until you force it to, e.g. when matching it against a regex, or + printing the scalar to a file, in which case Perl either interprets + your string as locale-encoded text, octets/binary, or as Unicode, + depending on various settings. In no case is an encoding stored + together with your data, it is *use* that decides encoding, not any + magical meta data. + + 3. The internal utf-8 flag has no meaning with regards to the encoding + of your string. + Just ignore that flag unless you debug a Perl bug, a module written + in XS or want to dive into the internals of perl. Otherwise it will + only confuse you, as, despite the name, it says nothing about how + your string is encoded. You can have Unicode strings with that flag + set, with that flag clear, and you can have binary data with that + flag set and that flag clear. Other possibilities exist, too. + + If you didn't know about that flag, just the better, pretend it + doesn't exist. + + 4. A "Unicode String" is simply a string where each character can be + validly interpreted as a Unicode code point. + If you have UTF-8 encoded data, it is no longer a Unicode string, + but a Unicode string encoded in UTF-8, giving you a binary string. + + 5. A string containing "high" (> 255) character values is *not* a UTF-8 + string. + It's a fact. Learn to live with it. + + I hope this helps :) + +OBJECT-ORIENTED INTERFACE + The object oriented interface lets you configure your own encoding or + decoding style, within the limits of supported formats. + + $json = new JSON::XS + Creates a new JSON::XS object that can be used to de/encode JSON + strings. All boolean flags described below are by default + *disabled*. + + The mutators for flags all return the JSON object again and thus + calls can be chained: + + my $json = JSON::XS->new->utf8->space_after->encode ({a => [1,2]}) + => {"a": [1, 2]} + + $json = $json->ascii ([$enable]) + $enabled = $json->get_ascii + If $enable is true (or missing), then the "encode" method will not + generate characters outside the code range 0..127 (which is ASCII). + Any Unicode characters outside that range will be escaped using + either a single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL + escape sequence, as per RFC4627. The resulting encoded JSON text can + be treated as a native Unicode string, an ascii-encoded, + latin1-encoded or UTF-8 encoded string, or any other superset of + ASCII. + + If $enable is false, then the "encode" method will not escape + Unicode characters unless required by the JSON syntax or other + flags. This results in a faster and more compact format. + + See also the section *ENCODING/CODESET FLAG NOTES* later in this + document. + + The main use for this flag is to produce JSON texts that can be + transmitted over a 7-bit channel, as the encoded JSON texts will not + contain any 8 bit characters. + + JSON::XS->new->ascii (1)->encode ([chr 0x10401]) + => ["\ud801\udc01"] + + $json = $json->latin1 ([$enable]) + $enabled = $json->get_latin1 + If $enable is true (or missing), then the "encode" method will + encode the resulting JSON text as latin1 (or iso-8859-1), escaping + any characters outside the code range 0..255. The resulting string + can be treated as a latin1-encoded JSON text or a native Unicode + string. The "decode" method will not be affected in any way by this + flag, as "decode" by default expects Unicode, which is a strict + superset of latin1. + + If $enable is false, then the "encode" method will not escape + Unicode characters unless required by the JSON syntax or other + flags. + + See also the section *ENCODING/CODESET FLAG NOTES* later in this + document. + + The main use for this flag is efficiently encoding binary data as + JSON text, as most octets will not be escaped, resulting in a + smaller encoded size. The disadvantage is that the resulting JSON + text is encoded in latin1 (and must correctly be treated as such + when storing and transferring), a rare encoding for JSON. It is + therefore most useful when you want to store data structures known + to contain binary data efficiently in files or databases, not when + talking to other JSON encoders/decoders. + + JSON::XS->new->latin1->encode (["\x{89}\x{abc}"] + => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) + + $json = $json->utf8 ([$enable]) + $enabled = $json->get_utf8 + If $enable is true (or missing), then the "encode" method will + encode the JSON result into UTF-8, as required by many protocols, + while the "decode" method expects to be handled an UTF-8-encoded + string. Please note that UTF-8-encoded strings do not contain any + characters outside the range 0..255, they are thus useful for + bytewise/binary I/O. In future versions, enabling this option might + enable autodetection of the UTF-16 and UTF-32 encoding families, as + described in RFC4627. + + If $enable is false, then the "encode" method will return the JSON + string as a (non-encoded) Unicode string, while "decode" expects + thus a Unicode string. Any decoding or encoding (e.g. to UTF-8 or + UTF-16) needs to be done yourself, e.g. using the Encode module. + + See also the section *ENCODING/CODESET FLAG NOTES* later in this + document. + + Example, output UTF-16BE-encoded JSON: + + use Encode; + $jsontext = encode "UTF-16BE", JSON::XS->new->encode ($object); + + Example, decode UTF-32LE-encoded JSON: + + use Encode; + $object = JSON::XS->new->decode (decode "UTF-32LE", $jsontext); + + $json = $json->pretty ([$enable]) + This enables (or disables) all of the "indent", "space_before" and + "space_after" (and in the future possibly more) flags in one call to + generate the most readable (or most compact) form possible. + + Example, pretty-print some simple structure: + + my $json = JSON::XS->new->pretty(1)->encode ({a => [1,2]}) + => + { + "a" : [ + 1, + 2 + ] + } + + $json = $json->indent ([$enable]) + $enabled = $json->get_indent + If $enable is true (or missing), then the "encode" method will use a + multiline format as output, putting every array member or + object/hash key-value pair into its own line, indenting them + properly. + + If $enable is false, no newlines or indenting will be produced, and + the resulting JSON text is guaranteed not to contain any "newlines". + + This setting has no effect when decoding JSON texts. + + $json = $json->space_before ([$enable]) + $enabled = $json->get_space_before + If $enable is true (or missing), then the "encode" method will add + an extra optional space before the ":" separating keys from values + in JSON objects. + + If $enable is false, then the "encode" method will not add any extra + space at those places. + + This setting has no effect when decoding JSON texts. You will also + most likely combine this setting with "space_after". + + Example, space_before enabled, space_after and indent disabled: + + {"key" :"value"} + + $json = $json->space_after ([$enable]) + $enabled = $json->get_space_after + If $enable is true (or missing), then the "encode" method will add + an extra optional space after the ":" separating keys from values in + JSON objects and extra whitespace after the "," separating key-value + pairs and array members. + + If $enable is false, then the "encode" method will not add any extra + space at those places. + + This setting has no effect when decoding JSON texts. + + Example, space_before and indent disabled, space_after enabled: + + {"key": "value"} + + $json = $json->relaxed ([$enable]) + $enabled = $json->get_relaxed + If $enable is true (or missing), then "decode" will accept some + extensions to normal JSON syntax (see below). "encode" will not be + affected in anyway. *Be aware that this option makes you accept + invalid JSON texts as if they were valid!*. I suggest only to use + this option to parse application-specific files written by humans + (configuration files, resource files etc.) + + If $enable is false (the default), then "decode" will only accept + valid JSON texts. + + Currently accepted extensions are: + + * list items can have an end-comma + + JSON *separates* array elements and key-value pairs with commas. + This can be annoying if you write JSON texts manually and want + to be able to quickly append elements, so this extension accepts + comma at the end of such items not just between them: + + [ + 1, + 2, <- this comma not normally allowed + ] + { + "k1": "v1", + "k2": "v2", <- this comma not normally allowed + } + + * shell-style '#'-comments + + Whenever JSON allows whitespace, shell-style comments are + additionally allowed. They are terminated by the first + carriage-return or line-feed character, after which more + white-space and comments are allowed. + + [ + 1, # this comment not allowed in JSON + # neither this one... + ] + + * literal ASCII TAB characters in strings + + Literal ASCII TAB characters are now allowed in strings (and + treated as "\t"). + + [ + "Hello\tWorld", + "HelloWorld", # literal would not normally be allowed + ] + + $json = $json->canonical ([$enable]) + $enabled = $json->get_canonical + If $enable is true (or missing), then the "encode" method will + output JSON objects by sorting their keys. This is adding a + comparatively high overhead. + + If $enable is false, then the "encode" method will output key-value + pairs in the order Perl stores them (which will likely change + between runs of the same script, and can change even within the same + run from 5.18 onwards). + + This option is useful if you want the same data structure to be + encoded as the same JSON text (given the same overall settings). If + it is disabled, the same hash might be encoded differently even if + contains the same data, as key-value pairs have no inherent ordering + in Perl. + + This setting has no effect when decoding JSON texts. + + This setting has currently no effect on tied hashes. + + $json = $json->allow_nonref ([$enable]) + $enabled = $json->get_allow_nonref + If $enable is true (or missing), then the "encode" method can + convert a non-reference into its corresponding string, number or + null JSON value, which is an extension to RFC4627. Likewise, + "decode" will accept those JSON values instead of croaking. + + If $enable is false, then the "encode" method will croak if it isn't + passed an arrayref or hashref, as JSON texts must either be an + object or array. Likewise, "decode" will croak if given something + that is not a JSON object or array. + + Example, encode a Perl scalar as JSON value with enabled + "allow_nonref", resulting in an invalid JSON text: + + JSON::XS->new->allow_nonref->encode ("Hello, World!") + => "Hello, World!" + + $json = $json->allow_unknown ([$enable]) + $enabled = $json->get_allow_unknown + If $enable is true (or missing), then "encode" will *not* throw an + exception when it encounters values it cannot represent in JSON (for + example, filehandles) but instead will encode a JSON "null" value. + Note that blessed objects are not included here and are handled + separately by c. + + If $enable is false (the default), then "encode" will throw an + exception when it encounters anything it cannot encode as JSON. + + This option does not affect "decode" in any way, and it is + recommended to leave it off unless you know your communications + partner. + + $json = $json->allow_blessed ([$enable]) + $enabled = $json->get_allow_blessed + See "OBJECT SERIALISATION" for details. + + If $enable is true (or missing), then the "encode" method will not + barf when it encounters a blessed reference that it cannot convert + otherwise. Instead, a JSON "null" value is encoded instead of the + object. + + If $enable is false (the default), then "encode" will throw an + exception when it encounters a blessed object that it cannot convert + otherwise. + + This setting has no effect on "decode". + + $json = $json->convert_blessed ([$enable]) + $enabled = $json->get_convert_blessed + See "OBJECT SERIALISATION" for details. + + If $enable is true (or missing), then "encode", upon encountering a + blessed object, will check for the availability of the "TO_JSON" + method on the object's class. If found, it will be called in scalar + context and the resulting scalar will be encoded instead of the + object. + + The "TO_JSON" method may safely call die if it wants. If "TO_JSON" + returns other blessed objects, those will be handled in the same + way. "TO_JSON" must take care of not causing an endless recursion + cycle (== crash) in this case. The name of "TO_JSON" was chosen + because other methods called by the Perl core (== not by the user of + the object) are usually in upper case letters and to avoid + collisions with any "to_json" function or method. + + If $enable is false (the default), then "encode" will not consider + this type of conversion. + + This setting has no effect on "decode". + + $json = $json->allow_tags ([$enable]) + $enabled = $json->allow_tags + See "OBJECT SERIALISATION" for details. + + If $enable is true (or missing), then "encode", upon encountering a + blessed object, will check for the availability of the "FREEZE" + method on the object's class. If found, it will be used to serialise + the object into a nonstandard tagged JSON value (that JSON decoders + cannot decode). + + It also causes "decode" to parse such tagged JSON values and + deserialise them via a call to the "THAW" method. + + If $enable is false (the default), then "encode" will not consider + this type of conversion, and tagged JSON values will cause a parse + error in "decode", as if tags were not part of the grammar. + + $json = $json->filter_json_object ([$coderef->($hashref)]) + When $coderef is specified, it will be called from "decode" each + time it decodes a JSON object. The only argument is a reference to + the newly-created hash. If the code references returns a single + scalar (which need not be a reference), this value (i.e. a copy of + that scalar to avoid aliasing) is inserted into the deserialised + data structure. If it returns an empty list (NOTE: *not* "undef", + which is a valid scalar), the original deserialised hash will be + inserted. This setting can slow down decoding considerably. + + When $coderef is omitted or undefined, any existing callback will be + removed and "decode" will not change the deserialised hash in any + way. + + Example, convert all JSON objects into the integer 5: + + my $js = JSON::XS->new->filter_json_object (sub { 5 }); + # returns [5] + $js->decode ('[{}]') + # throw an exception because allow_nonref is not enabled + # so a lone 5 is not allowed. + $js->decode ('{"a":1, "b":2}'); + + $json = $json->filter_json_single_key_object ($key [=> + $coderef->($value)]) + Works remotely similar to "filter_json_object", but is only called + for JSON objects having a single key named $key. + + This $coderef is called before the one specified via + "filter_json_object", if any. It gets passed the single value in the + JSON object. If it returns a single value, it will be inserted into + the data structure. If it returns nothing (not even "undef" but the + empty list), the callback from "filter_json_object" will be called + next, as if no single-key callback were specified. + + If $coderef is omitted or undefined, the corresponding callback will + be disabled. There can only ever be one callback for a given key. + + As this callback gets called less often then the + "filter_json_object" one, decoding speed will not usually suffer as + much. Therefore, single-key objects make excellent targets to + serialise Perl objects into, especially as single-key JSON objects + are as close to the type-tagged value concept as JSON gets (it's + basically an ID/VALUE tuple). Of course, JSON does not support this + in any way, so you need to make sure your data never looks like a + serialised Perl hash. + + Typical names for the single object key are "__class_whatever__", or + "$__dollars_are_rarely_used__$" or "}ugly_brace_placement", or even + things like "__class_md5sum(classname)__", to reduce the risk of + clashing with real hashes. + + Example, decode JSON objects of the form "{ "__widget__" => }" + into the corresponding $WIDGET{} object: + + # return whatever is in $WIDGET{5}: + JSON::XS + ->new + ->filter_json_single_key_object (__widget__ => sub { + $WIDGET{ $_[0] } + }) + ->decode ('{"__widget__": 5') + + # this can be used with a TO_JSON method in some "widget" class + # for serialisation to json: + sub WidgetBase::TO_JSON { + my ($self) = @_; + + unless ($self->{id}) { + $self->{id} = ..get..some..id..; + $WIDGET{$self->{id}} = $self; + } + + { __widget__ => $self->{id} } + } + + $json = $json->shrink ([$enable]) + $enabled = $json->get_shrink + Perl usually over-allocates memory a bit when allocating space for + strings. This flag optionally resizes strings generated by either + "encode" or "decode" to their minimum size possible. This can save + memory when your JSON texts are either very very long or you have + many short strings. It will also try to downgrade any strings to + octet-form if possible: perl stores strings internally either in an + encoding called UTF-X or in octet-form. The latter cannot store + everything but uses less space in general (and some buggy Perl or C + code might even rely on that internal representation being used). + + The actual definition of what shrink does might change in future + versions, but it will always try to save space at the expense of + time. + + If $enable is true (or missing), the string returned by "encode" + will be shrunk-to-fit, while all strings generated by "decode" will + also be shrunk-to-fit. + + If $enable is false, then the normal perl allocation algorithms are + used. If you work with your data, then this is likely to be faster. + + In the future, this setting might control other things, such as + converting strings that look like integers or floats into integers + or floats internally (there is no difference on the Perl level), + saving space. + + $json = $json->max_depth ([$maximum_nesting_depth]) + $max_depth = $json->get_max_depth + Sets the maximum nesting level (default 512) accepted while encoding + or decoding. If a higher nesting level is detected in JSON text or a + Perl data structure, then the encoder and decoder will stop and + croak at that point. + + Nesting level is defined by number of hash- or arrayrefs that the + encoder needs to traverse to reach a given point or the number of + "{" or "[" characters without their matching closing parenthesis + crossed to reach a given character in a string. + + Setting the maximum depth to one disallows any nesting, so that + ensures that the object is only a single hash/object or array. + + If no argument is given, the highest possible setting will be used, + which is rarely useful. + + Note that nesting is implemented by recursion in C. The default + value has been chosen to be as large as typical operating systems + allow without crashing. + + See SECURITY CONSIDERATIONS, below, for more info on why this is + useful. + + $json = $json->max_size ([$maximum_string_size]) + $max_size = $json->get_max_size + Set the maximum length a JSON text may have (in bytes) where + decoding is being attempted. The default is 0, meaning no limit. + When "decode" is called on a string that is longer then this many + bytes, it will not attempt to decode the string but throw an + exception. This setting has no effect on "encode" (yet). + + If no argument is given, the limit check will be deactivated (same + as when 0 is specified). + + See SECURITY CONSIDERATIONS, below, for more info on why this is + useful. + + $json_text = $json->encode ($perl_scalar) + Converts the given Perl value or data structure to its JSON + representation. Croaks on error. + + $perl_scalar = $json->decode ($json_text) + The opposite of "encode": expects a JSON text and tries to parse it, + returning the resulting simple scalar or reference. Croaks on error. + + ($perl_scalar, $characters) = $json->decode_prefix ($json_text) + This works like the "decode" method, but instead of raising an + exception when there is trailing garbage after the first JSON + object, it will silently stop parsing there and return the number of + characters consumed so far. + + This is useful if your JSON texts are not delimited by an outer + protocol and you need to know where the JSON text ends. + + JSON::XS->new->decode_prefix ("[1] the tail") + => ([1], 3) + +INCREMENTAL PARSING + In some cases, there is the need for incremental parsing of JSON texts. + While this module always has to keep both JSON text and resulting Perl + data structure in memory at one time, it does allow you to parse a JSON + stream incrementally. It does so by accumulating text until it has a + full JSON object, which it then can decode. This process is similar to + using "decode_prefix" to see if a full JSON object is available, but is + much more efficient (and can be implemented with a minimum of method + calls). + + JSON::XS will only attempt to parse the JSON text once it is sure it has + enough text to get a decisive result, using a very simple but truly + incremental parser. This means that it sometimes won't stop as early as + the full parser, for example, it doesn't detect mismatched parentheses. + The only thing it guarantees is that it starts decoding as soon as a + syntactically valid JSON text has been seen. This means you need to set + resource limits (e.g. "max_size") to ensure the parser will stop parsing + in the presence if syntax errors. + + The following methods implement this incremental parser. + + [void, scalar or list context] = $json->incr_parse ([$string]) + This is the central parsing function. It can both append new text + and extract objects from the stream accumulated so far (both of + these functions are optional). + + If $string is given, then this string is appended to the already + existing JSON fragment stored in the $json object. + + After that, if the function is called in void context, it will + simply return without doing anything further. This can be used to + add more text in as many chunks as you want. + + If the method is called in scalar context, then it will try to + extract exactly *one* JSON object. If that is successful, it will + return this object, otherwise it will return "undef". If there is a + parse error, this method will croak just as "decode" would do (one + can then use "incr_skip" to skip the erroneous part). This is the + most common way of using the method. + + And finally, in list context, it will try to extract as many objects + from the stream as it can find and return them, or the empty list + otherwise. For this to work, there must be no separators (other than + whitespace) between the JSON objects or arrays, instead they must be + concatenated back-to-back. If an error occurs, an exception will be + raised as in the scalar context case. Note that in this case, any + previously-parsed JSON texts will be lost. + + Example: Parse some JSON arrays/objects in a given string and return + them. + + my @objs = JSON::XS->new->incr_parse ("[5][7][1,2]"); + + $lvalue_string = $json->incr_text + This method returns the currently stored JSON fragment as an lvalue, + that is, you can manipulate it. This *only* works when a preceding + call to "incr_parse" in *scalar context* successfully returned an + object. Under all other circumstances you must not call this + function (I mean it. although in simple tests it might actually + work, it *will* fail under real world conditions). As a special + exception, you can also call this method before having parsed + anything. + + That means you can only use this function to look at or manipulate + text before or after complete JSON objects, not while the parser is + in the middle of parsing a JSON object. + + This function is useful in two cases: a) finding the trailing text + after a JSON object or b) parsing multiple JSON objects separated by + non-JSON text (such as commas). + + $json->incr_skip + This will reset the state of the incremental parser and will remove + the parsed text from the input buffer so far. This is useful after + "incr_parse" died, in which case the input buffer and incremental + parser state is left unchanged, to skip the text parsed so far and + to reset the parse state. + + The difference to "incr_reset" is that only text until the parse + error occurred is removed. + + $json->incr_reset + This completely resets the incremental parser, that is, after this + call, it will be as if the parser had never parsed anything. + + This is useful if you want to repeatedly parse JSON objects and want + to ignore any trailing data, which means you have to reset the + parser after each successful decode. + + LIMITATIONS + All options that affect decoding are supported, except "allow_nonref". + The reason for this is that it cannot be made to work sensibly: JSON + objects and arrays are self-delimited, i.e. you can concatenate them + back to back and still decode them perfectly. This does not hold true + for JSON numbers, however. + + For example, is the string 1 a single JSON number, or is it simply the + start of 12? Or is 12 a single JSON number, or the concatenation of 1 + and 2? In neither case you can tell, and this is why JSON::XS takes the + conservative route and disallows this case. + + EXAMPLES + Some examples will make all this clearer. First, a simple example that + works similarly to "decode_prefix": We want to decode the JSON object at + the start of a string and identify the portion after the JSON object: + + my $text = "[1,2,3] hello"; + + my $json = new JSON::XS; + + my $obj = $json->incr_parse ($text) + or die "expected JSON object or array at beginning of string"; + + my $tail = $json->incr_text; + # $tail now contains " hello" + + Easy, isn't it? + + Now for a more complicated example: Imagine a hypothetical protocol + where you read some requests from a TCP stream, and each request is a + JSON array, without any separation between them (in fact, it is often + useful to use newlines as "separators", as these get interpreted as + whitespace at the start of the JSON text, which makes it possible to + test said protocol with "telnet"...). + + Here is how you'd do it (it is trivial to write this in an event-based + manner): + + my $json = new JSON::XS; + + # read some data from the socket + while (sysread $socket, my $buf, 4096) { + + # split and decode as many requests as possible + for my $request ($json->incr_parse ($buf)) { + # act on the $request + } + } + + Another complicated example: Assume you have a string with JSON objects + or arrays, all separated by (optional) comma characters (e.g. "[1],[2], + [3]"). To parse them, we have to skip the commas between the JSON texts, + and here is where the lvalue-ness of "incr_text" comes in useful: + + my $text = "[1],[2], [3]"; + my $json = new JSON::XS; + + # void context, so no parsing done + $json->incr_parse ($text); + + # now extract as many objects as possible. note the + # use of scalar context so incr_text can be called. + while (my $obj = $json->incr_parse) { + # do something with $obj + + # now skip the optional comma + $json->incr_text =~ s/^ \s* , //x; + } + + Now lets go for a very complex example: Assume that you have a gigantic + JSON array-of-objects, many gigabytes in size, and you want to parse it, + but you cannot load it into memory fully (this has actually happened in + the real world :). + + Well, you lost, you have to implement your own JSON parser. But JSON::XS + can still help you: You implement a (very simple) array parser and let + JSON decode the array elements, which are all full JSON objects on their + own (this wouldn't work if the array elements could be JSON numbers, for + example): + + my $json = new JSON::XS; + + # open the monster + open my $fh, "incr_parse ($buf); # void context, so no parsing + + # Exit the loop once we found and removed(!) the initial "[". + # In essence, we are (ab-)using the $json object as a simple scalar + # we append data to. + last if $json->incr_text =~ s/^ \s* \[ //x; + } + + # now we have the skipped the initial "[", so continue + # parsing all the elements. + for (;;) { + # in this loop we read data until we got a single JSON object + for (;;) { + if (my $obj = $json->incr_parse) { + # do something with $obj + last; + } + + # add more data + sysread $fh, my $buf, 65536 + or die "read error: $!"; + $json->incr_parse ($buf); # void context, so no parsing + } + + # in this loop we read data until we either found and parsed the + # separating "," between elements, or the final "]" + for (;;) { + # first skip whitespace + $json->incr_text =~ s/^\s*//; + + # if we find "]", we are done + if ($json->incr_text =~ s/^\]//) { + print "finished.\n"; + exit; + } + + # if we find ",", we can continue with the next element + if ($json->incr_text =~ s/^,//) { + last; + } + + # if we find anything else, we have a parse error! + if (length $json->incr_text) { + die "parse error near ", $json->incr_text; + } + + # else add more data + sysread $fh, my $buf, 65536 + or die "read error: $!"; + $json->incr_parse ($buf); # void context, so no parsing + } + + This is a complex example, but most of the complexity comes from the + fact that we are trying to be correct (bear with me if I am wrong, I + never ran the above example :). + +MAPPING + This section describes how JSON::XS maps Perl values to JSON values and + vice versa. These mappings are designed to "do the right thing" in most + circumstances automatically, preserving round-tripping characteristics + (what you put in comes out as something equivalent). + + For the more enlightened: note that in the following descriptions, + lowercase *perl* refers to the Perl interpreter, while uppercase *Perl* + refers to the abstract Perl language itself. + + JSON -> PERL + object + A JSON object becomes a reference to a hash in Perl. No ordering of + object keys is preserved (JSON does not preserve object key ordering + itself). + + array + A JSON array becomes a reference to an array in Perl. + + string + A JSON string becomes a string scalar in Perl - Unicode codepoints + in JSON are represented by the same codepoints in the Perl string, + so no manual decoding is necessary. + + number + A JSON number becomes either an integer, numeric (floating point) or + string scalar in perl, depending on its range and any fractional + parts. On the Perl level, there is no difference between those as + Perl handles all the conversion details, but an integer may take + slightly less memory and might represent more values exactly than + floating point numbers. + + If the number consists of digits only, JSON::XS will try to + represent it as an integer value. If that fails, it will try to + represent it as a numeric (floating point) value if that is possible + without loss of precision. Otherwise it will preserve the number as + a string value (in which case you lose roundtripping ability, as the + JSON number will be re-encoded to a JSON string). + + Numbers containing a fractional or exponential part will always be + represented as numeric (floating point) values, possibly at a loss + of precision (in which case you might lose perfect roundtripping + ability, but the JSON number will still be re-encoded as a JSON + number). + + Note that precision is not accuracy - binary floating point values + cannot represent most decimal fractions exactly, and when converting + from and to floating point, JSON::XS only guarantees precision up to + but not including the least significant bit. + + true, false + These JSON atoms become "Types::Serialiser::true" and + "Types::Serialiser::false", respectively. They are overloaded to act + almost exactly like the numbers 1 and 0. You can check whether a + scalar is a JSON boolean by using the "Types::Serialiser::is_bool" + function (after "use Types::Serialier", of course). + + null + A JSON null atom becomes "undef" in Perl. + + shell-style comments ("# *text*") + As a nonstandard extension to the JSON syntax that is enabled by the + "relaxed" setting, shell-style comments are allowed. They can start + anywhere outside strings and go till the end of the line. + + tagged values ("(*tag*)*value*"). + Another nonstandard extension to the JSON syntax, enabled with the + "allow_tags" setting, are tagged values. In this implementation, the + *tag* must be a perl package/class name encoded as a JSON string, + and the *value* must be a JSON array encoding optional constructor + arguments. + + See "OBJECT SERIALISATION", below, for details. + + PERL -> JSON + The mapping from Perl to JSON is slightly more difficult, as Perl is a + truly typeless language, so we can only guess which JSON type is meant + by a Perl value. + + hash references + Perl hash references become JSON objects. As there is no inherent + ordering in hash keys (or JSON objects), they will usually be + encoded in a pseudo-random order. JSON::XS can optionally sort the + hash keys (determined by the *canonical* flag), so the same + datastructure will serialise to the same JSON text (given same + settings and version of JSON::XS), but this incurs a runtime + overhead and is only rarely useful, e.g. when you want to compare + some JSON text against another for equality. + + array references + Perl array references become JSON arrays. + + other references + Other unblessed references are generally not allowed and will cause + an exception to be thrown, except for references to the integers 0 + and 1, which get turned into "false" and "true" atoms in JSON. + + Since "JSON::XS" uses the boolean model from Types::Serialiser, you + can also "use Types::Serialiser" and then use + "Types::Serialiser::false" and "Types::Serialiser::true" to improve + readability. + + use Types::Serialiser; + encode_json [\0, Types::Serialiser::true] # yields [false,true] + + Types::Serialiser::true, Types::Serialiser::false + These special values from the Types::Serialiser module become JSON + true and JSON false values, respectively. You can also use "\1" and + "\0" directly if you want. + + blessed objects + Blessed objects are not directly representable in JSON, but + "JSON::XS" allows various ways of handling objects. See "OBJECT + SERIALISATION", below, for details. + + simple scalars + Simple Perl scalars (any scalar that is not a reference) are the + most difficult objects to encode: JSON::XS will encode undefined + scalars as JSON "null" values, scalars that have last been used in a + string context before encoding as JSON strings, and anything else as + number value: + + # dump as number + encode_json [2] # yields [2] + encode_json [-3.0e17] # yields [-3e+17] + my $value = 5; encode_json [$value] # yields [5] + + # used as string, so dump as string + print $value; + encode_json [$value] # yields ["5"] + + # undef becomes null + encode_json [undef] # yields [null] + + You can force the type to be a JSON string by stringifying it: + + my $x = 3.1; # some variable containing a number + "$x"; # stringified + $x .= ""; # another, more awkward way to stringify + print $x; # perl does it for you, too, quite often + + You can force the type to be a JSON number by numifying it: + + my $x = "3"; # some variable containing a string + $x += 0; # numify it, ensuring it will be dumped as a number + $x *= 1; # same thing, the choice is yours. + + You can not currently force the type in other, less obscure, ways. + Tell me if you need this capability (but don't forget to explain why + it's needed :). + + Note that numerical precision has the same meaning as under Perl (so + binary to decimal conversion follows the same rules as in Perl, + which can differ to other languages). Also, your perl interpreter + might expose extensions to the floating point numbers of your + platform, such as infinities or NaN's - these cannot be represented + in JSON, and it is an error to pass those in. + + OBJECT SERIALISATION + As JSON cannot directly represent Perl objects, you have to choose + between a pure JSON representation (without the ability to deserialise + the object automatically again), and a nonstandard extension to the JSON + syntax, tagged values. + + SERIALISATION + What happens when "JSON::XS" encounters a Perl object depends on the + "allow_blessed", "convert_blessed" and "allow_tags" settings, which are + used in this order: + + 1. "allow_tags" is enabled and the object has a "FREEZE" method. + In this case, "JSON::XS" uses the Types::Serialiser object + serialisation protocol to create a tagged JSON value, using a + nonstandard extension to the JSON syntax. + + This works by invoking the "FREEZE" method on the object, with the + first argument being the object to serialise, and the second + argument being the constant string "JSON" to distinguish it from + other serialisers. + + The "FREEZE" method can return any number of values (i.e. zero or + more). These values and the paclkage/classname of the object will + then be encoded as a tagged JSON value in the following format: + + ("classname")[FREEZE return values...] + + e.g.: + + ("URI")["http://www.google.com/"] + ("MyDate")[2013,10,29] + ("ImageData::JPEG")["Z3...VlCg=="] + + For example, the hypothetical "My::Object" "FREEZE" method might use + the objects "type" and "id" members to encode the object: + + sub My::Object::FREEZE { + my ($self, $serialiser) = @_; + + ($self->{type}, $self->{id}) + } + + 2. "convert_blessed" is enabled and the object has a "TO_JSON" method. + In this case, the "TO_JSON" method of the object is invoked in + scalar context. It must return a single scalar that can be directly + encoded into JSON. This scalar replaces the object in the JSON text. + + For example, the following "TO_JSON" method will convert all URI + objects to JSON strings when serialised. The fatc that these values + originally were URI objects is lost. + + sub URI::TO_JSON { + my ($uri) = @_; + $uri->as_string + } + + 3. "allow_blessed" is enabled. + The object will be serialised as a JSON null value. + + 4. none of the above + If none of the settings are enabled or the respective methods are + missing, "JSON::XS" throws an exception. + + DESERIALISATION + For deserialisation there are only two cases to consider: either + nonstandard tagging was used, in which case "allow_tags" decides, or + objects cannot be automatically be deserialised, in which case you can + use postprocessing or the "filter_json_object" or + "filter_json_single_key_object" callbacks to get some real objects our + of your JSON. + + This section only considers the tagged value case: I a tagged JSON + object is encountered during decoding and "allow_tags" is disabled, a + parse error will result (as if tagged values were not part of the + grammar). + + If "allow_tags" is enabled, "JSON::XS" will look up the "THAW" method of + the package/classname used during serialisation (it will not attempt to + load the package as a Perl module). If there is no such method, the + decoding will fail with an error. + + Otherwise, the "THAW" method is invoked with the classname as first + argument, the constant string "JSON" as second argument, and all the + values from the JSON array (the values originally returned by the + "FREEZE" method) as remaining arguments. + + The method must then return the object. While technically you can return + any Perl scalar, you might have to enable the "enable_nonref" setting to + make that work in all cases, so better return an actual blessed + reference. + + As an example, let's implement a "THAW" function that regenerates the + "My::Object" from the "FREEZE" example earlier: + + sub My::Object::THAW { + my ($class, $serialiser, $type, $id) = @_; + + $class->new (type => $type, id => $id) + } + +ENCODING/CODESET FLAG NOTES + The interested reader might have seen a number of flags that signify + encodings or codesets - "utf8", "latin1" and "ascii". There seems to be + some confusion on what these do, so here is a short comparison: + + "utf8" controls whether the JSON text created by "encode" (and expected + by "decode") is UTF-8 encoded or not, while "latin1" and "ascii" only + control whether "encode" escapes character values outside their + respective codeset range. Neither of these flags conflict with each + other, although some combinations make less sense than others. + + Care has been taken to make all flags symmetrical with respect to + "encode" and "decode", that is, texts encoded with any combination of + these flag values will be correctly decoded when the same flags are used + - in general, if you use different flag settings while encoding vs. when + decoding you likely have a bug somewhere. + + Below comes a verbose discussion of these flags. Note that a "codeset" + is simply an abstract set of character-codepoint pairs, while an + encoding takes those codepoint numbers and *encodes* them, in our case + into octets. Unicode is (among other things) a codeset, UTF-8 is an + encoding, and ISO-8859-1 (= latin 1) and ASCII are both codesets *and* + encodings at the same time, which can be confusing. + + "utf8" flag disabled + When "utf8" is disabled (the default), then "encode"/"decode" + generate and expect Unicode strings, that is, characters with high + ordinal Unicode values (> 255) will be encoded as such characters, + and likewise such characters are decoded as-is, no changes to them + will be done, except "(re-)interpreting" them as Unicode codepoints + or Unicode characters, respectively (to Perl, these are the same + thing in strings unless you do funny/weird/dumb stuff). + + This is useful when you want to do the encoding yourself (e.g. when + you want to have UTF-16 encoded JSON texts) or when some other layer + does the encoding for you (for example, when printing to a terminal + using a filehandle that transparently encodes to UTF-8 you certainly + do NOT want to UTF-8 encode your data first and have Perl encode it + another time). + + "utf8" flag enabled + If the "utf8"-flag is enabled, "encode"/"decode" will encode all + characters using the corresponding UTF-8 multi-byte sequence, and + will expect your input strings to be encoded as UTF-8, that is, no + "character" of the input string must have any value > 255, as UTF-8 + does not allow that. + + The "utf8" flag therefore switches between two modes: disabled means + you will get a Unicode string in Perl, enabled means you get an + UTF-8 encoded octet/binary string in Perl. + + "latin1" or "ascii" flags enabled + With "latin1" (or "ascii") enabled, "encode" will escape characters + with ordinal values > 255 (> 127 with "ascii") and encode the + remaining characters as specified by the "utf8" flag. + + If "utf8" is disabled, then the result is also correctly encoded in + those character sets (as both are proper subsets of Unicode, meaning + that a Unicode string with all character values < 256 is the same + thing as a ISO-8859-1 string, and a Unicode string with all + character values < 128 is the same thing as an ASCII string in + Perl). + + If "utf8" is enabled, you still get a correct UTF-8-encoded string, + regardless of these flags, just some more characters will be escaped + using "\uXXXX" then before. + + Note that ISO-8859-1-*encoded* strings are not compatible with UTF-8 + encoding, while ASCII-encoded strings are. That is because the + ISO-8859-1 encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 + *codeset* being a subset of Unicode), while ASCII is. + + Surprisingly, "decode" will ignore these flags and so treat all + input values as governed by the "utf8" flag. If it is disabled, this + allows you to decode ISO-8859-1- and ASCII-encoded strings, as both + strict subsets of Unicode. If it is enabled, you can correctly + decode UTF-8 encoded strings. + + So neither "latin1" nor "ascii" are incompatible with the "utf8" + flag - they only govern when the JSON output engine escapes a + character or not. + + The main use for "latin1" is to relatively efficiently store binary + data as JSON, at the expense of breaking compatibility with most + JSON decoders. + + The main use for "ascii" is to force the output to not contain + characters with values > 127, which means you can interpret the + resulting string as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about + any character set and 8-bit-encoding, and still get the same data + structure back. This is useful when your channel for JSON transfer + is not 8-bit clean or the encoding might be mangled in between (e.g. + in mail), and works because ASCII is a proper subset of most 8-bit + and multibyte encodings in use in the world. + + JSON and ECMAscript + JSON syntax is based on how literals are represented in javascript (the + not-standardised predecessor of ECMAscript) which is presumably why it + is called "JavaScript Object Notation". + + However, JSON is not a subset (and also not a superset of course) of + ECMAscript (the standard) or javascript (whatever browsers actually + implement). + + If you want to use javascript's "eval" function to "parse" JSON, you + might run into parse errors for valid JSON texts, or the resulting data + structure might not be queryable: + + One of the problems is that U+2028 and U+2029 are valid characters + inside JSON strings, but are not allowed in ECMAscript string literals, + so the following Perl fragment will not output something that can be + guaranteed to be parsable by javascript's "eval": + + use JSON::XS; + + print encode_json [chr 0x2028]; + + The right fix for this is to use a proper JSON parser in your javascript + programs, and not rely on "eval" (see for example Douglas Crockford's + json2.js parser). + + If this is not an option, you can, as a stop-gap measure, simply encode + to ASCII-only JSON: + + use JSON::XS; + + print JSON::XS->new->ascii->encode ([chr 0x2028]); + + Note that this will enlarge the resulting JSON text quite a bit if you + have many non-ASCII characters. You might be tempted to run some regexes + to only escape U+2028 and U+2029, e.g.: + + # DO NOT USE THIS! + my $json = JSON::XS->new->utf8->encode ([chr 0x2028]); + $json =~ s/\xe2\x80\xa8/\\u2028/g; # escape U+2028 + $json =~ s/\xe2\x80\xa9/\\u2029/g; # escape U+2029 + print $json; + + Note that *this is a bad idea*: the above only works for U+2028 and + U+2029 and thus only for fully ECMAscript-compliant parsers. Many + existing javascript implementations, however, have issues with other + characters as well - using "eval" naively simply *will* cause problems. + + Another problem is that some javascript implementations reserve some + property names for their own purposes (which probably makes them + non-ECMAscript-compliant). For example, Iceweasel reserves the + "__proto__" property name for its own purposes. + + If that is a problem, you could parse try to filter the resulting JSON + output for these property strings, e.g.: + + $json =~ s/"__proto__"\s*:/"__proto__renamed":/g; + + This works because "__proto__" is not valid outside of strings, so every + occurrence of ""__proto__"\s*:" must be a string used as property name. + + If you know of other incompatibilities, please let me know. + + JSON and YAML + You often hear that JSON is a subset of YAML. This is, however, a mass + hysteria(*) and very far from the truth (as of the time of this + writing), so let me state it clearly: *in general, there is no way to + configure JSON::XS to output a data structure as valid YAML* that works + in all cases. + + If you really must use JSON::XS to generate YAML, you should use this + algorithm (subject to change in future versions): + + my $to_yaml = JSON::XS->new->utf8->space_after (1); + my $yaml = $to_yaml->encode ($ref) . "\n"; + + This will *usually* generate JSON texts that also parse as valid YAML. + Please note that YAML has hardcoded limits on (simple) object key + lengths that JSON doesn't have and also has different and incompatible + unicode character escape syntax, so you should make sure that your hash + keys are noticeably shorter than the 1024 "stream characters" YAML + allows and that you do not have characters with codepoint values outside + the Unicode BMP (basic multilingual page). YAML also does not allow "\/" + sequences in strings (which JSON::XS does not *currently* generate, but + other JSON generators might). + + There might be other incompatibilities that I am not aware of (or the + YAML specification has been changed yet again - it does so quite often). + In general you should not try to generate YAML with a JSON generator or + vice versa, or try to parse JSON with a YAML parser or vice versa: + chances are high that you will run into severe interoperability problems + when you least expect it. + + (*) I have been pressured multiple times by Brian Ingerson (one of the + authors of the YAML specification) to remove this paragraph, despite + him acknowledging that the actual incompatibilities exist. As I was + personally bitten by this "JSON is YAML" lie, I refused and said I + will continue to educate people about these issues, so others do not + run into the same problem again and again. After this, Brian called + me a (quote)*complete and worthless idiot*(unquote). + + In my opinion, instead of pressuring and insulting people who + actually clarify issues with YAML and the wrong statements of some + of its proponents, I would kindly suggest reading the JSON spec + (which is not that difficult or long) and finally make YAML + compatible to it, and educating users about the changes, instead of + spreading lies about the real compatibility for many *years* and + trying to silence people who point out that it isn't true. + + Addendum/2009: the YAML 1.2 spec is still incompatible with JSON, + even though the incompatibilities have been documented (and are + known to Brian) for many years and the spec makes explicit claims + that YAML is a superset of JSON. It would be so easy to fix, but + apparently, bullying people and corrupting userdata is so much + easier. + + SPEED + It seems that JSON::XS is surprisingly fast, as shown in the following + tables. They have been generated with the help of the "eg/bench" program + in the JSON::XS distribution, to make it easy to compare on your own + system. + + First comes a comparison between various modules using a very short + single-line JSON string (also available at + ). + + {"method": "handleMessage", "params": ["user1", + "we were just talking"], "id": null, "array":[1,11,234,-5,1e5,1e7, + 1, 0]} + + It shows the number of encodes/decodes per second (JSON::XS uses the + functional interface, while JSON::XS/2 uses the OO interface with + pretty-printing and hashkey sorting enabled, JSON::XS/3 enables shrink. + JSON::DWIW/DS uses the deserialise function, while JSON::DWIW::FJ uses + the from_json method). Higher is better: + + module | encode | decode | + --------------|------------|------------| + JSON::DWIW/DS | 86302.551 | 102300.098 | + JSON::DWIW/FJ | 86302.551 | 75983.768 | + JSON::PP | 15827.562 | 6638.658 | + JSON::Syck | 63358.066 | 47662.545 | + JSON::XS | 511500.488 | 511500.488 | + JSON::XS/2 | 291271.111 | 388361.481 | + JSON::XS/3 | 361577.931 | 361577.931 | + Storable | 66788.280 | 265462.278 | + --------------+------------+------------+ + + That is, JSON::XS is almost six times faster than JSON::DWIW on + encoding, about five times faster on decoding, and over thirty to + seventy times faster than JSON's pure perl implementation. It also + compares favourably to Storable for small amounts of data. + + Using a longer test string (roughly 18KB, generated from Yahoo! Locals + search API (). + + module | encode | decode | + --------------|------------|------------| + JSON::DWIW/DS | 1647.927 | 2673.916 | + JSON::DWIW/FJ | 1630.249 | 2596.128 | + JSON::PP | 400.640 | 62.311 | + JSON::Syck | 1481.040 | 1524.869 | + JSON::XS | 20661.596 | 9541.183 | + JSON::XS/2 | 10683.403 | 9416.938 | + JSON::XS/3 | 20661.596 | 9400.054 | + Storable | 19765.806 | 10000.725 | + --------------+------------+------------+ + + Again, JSON::XS leads by far (except for Storable which non-surprisingly + decodes a bit faster). + + On large strings containing lots of high Unicode characters, some + modules (such as JSON::PC) seem to decode faster than JSON::XS, but the + result will be broken due to missing (or wrong) Unicode handling. Others + refuse to decode or encode properly, so it was impossible to prepare a + fair comparison table for that case. + +SECURITY CONSIDERATIONS + When you are using JSON in a protocol, talking to untrusted potentially + hostile creatures requires relatively few measures. + + First of all, your JSON decoder should be secure, that is, should not + have any buffer overflows. Obviously, this module should ensure that and + I am trying hard on making that true, but you never know. + + Second, you need to avoid resource-starving attacks. That means you + should limit the size of JSON texts you accept, or make sure then when + your resources run out, that's just fine (e.g. by using a separate + process that can crash safely). The size of a JSON text in octets or + characters is usually a good indication of the size of the resources + required to decode it into a Perl structure. While JSON::XS can check + the size of the JSON text, it might be too late when you already have it + in memory, so you might want to check the size before you accept the + string. + + Third, JSON::XS recurses using the C stack when decoding objects and + arrays. The C stack is a limited resource: for instance, on my amd64 + machine with 8MB of stack size I can decode around 180k nested arrays + but only 14k nested JSON objects (due to perl itself recursing deeply on + croak to free the temporary). If that is exceeded, the program crashes. + To be conservative, the default nesting limit is set to 512. If your + process has a smaller stack, you should adjust this setting accordingly + with the "max_depth" method. + + Something else could bomb you, too, that I forgot to think of. In that + case, you get to keep the pieces. I am always open for hints, though... + + Also keep in mind that JSON::XS might leak contents of your Perl data + structures in its error messages, so when you serialise sensitive + information you might want to make sure that exceptions thrown by + JSON::XS will not end up in front of untrusted eyes. + + If you are using JSON::XS to return packets to consumption by JavaScript + scripts in a browser you should have a look at + + to see whether you are vulnerable to some common attack vectors (which + really are browser design bugs, but it is still you who will have to + deal with it, as major browser developers care only for features, not + about getting security right). + +"OLD" VS. "NEW" JSON (RFC 4627 VS. RFC 7159) + TL;DR: Due to security concerns, JSON::XS will not allow scalar data in + JSON texts by default - you need to create your own JSON::XS object and + enable "allow_nonref": + + my $json = JSON::XS->new->allow_nonref; + + $text = $json->encode ($data); + $data = $json->decode ($text); + + The long version: JSON being an important and supposedly stable format, + the IETF standardised it as RFC 4627 in 2006. Unfortunately, the + inventor of JSON, Dougles Crockford, unilaterally changed the definition + of JSON in javascript. Rather than create a fork, the IETF decided to + standardise the new syntax (apparently, so Iw as told, without finding + it very amusing). + + The biggest difference between thed original JSON and the new JSON is + that the new JSON supports scalars (anything other than arrays and + objects) at the toplevel of a JSON text. While this is strictly + backwards compatible to older versions, it breaks a number of protocols + that relied on sending JSON back-to-back, and is a minor security + concern. + + For example, imagine you have two banks communicating, and on one side, + trhe JSON coder gets upgraded. Two messages, such as 10 and 1000 might + then be confused to mean 101000, something that couldn't happen in the + original JSON, because niether of these messages would be valid JSON. + + If one side accepts these messages, then an upgrade in the coder on + either side could result in this becoming exploitable. + + This module has always allowed these messages as an optional extension, + by default disabled. The security concerns are the reason why the + default is still disabled, but future versions might/will likely upgrade + to the newer RFC as default format, so you are advised to check your + implementation and/or override the default with "->allow_nonref (0)" to + ensure that future versions are safe. + +INTEROPERABILITY WITH OTHER MODULES + "JSON::XS" uses the Types::Serialiser module to provide boolean + constants. That means that the JSON true and false values will be + comaptible to true and false values of other modules that do the same, + such as JSON::PP and CBOR::XS. + +INTEROPERABILITY WITH OTHER JSON DECODERS + As long as you only serialise data that can be directly expressed in + JSON, "JSON::XS" is incapable of generating invalid JSON output (modulo + bugs, but "JSON::XS" has found more bugs in the official JSON testsuite + (1) than the official JSON testsuite has found in "JSON::XS" (0)). + + When you have trouble decoding JSON generated by this module using other + decoders, then it is very likely that you have an encoding mismatch or + the other decoder is broken. + + When decoding, "JSON::XS" is strict by default and will likely catch all + errors. There are currently two settings that change this: "relaxed" + makes "JSON::XS" accept (but not generate) some non-standard extensions, + and "allow_tags" will allow you to encode and decode Perl objects, at + the cost of not outputting valid JSON anymore. + + TAGGED VALUE SYNTAX AND STANDARD JSON EN/DECODERS + When you use "allow_tags" to use the extended (and also nonstandard and + invalid) JSON syntax for serialised objects, and you still want to + decode the generated When you want to serialise objects, you can run a + regex to replace the tagged syntax by standard JSON arrays (it only + works for "normal" package names without comma, newlines or single + colons). First, the readable Perl version: + + # if your FREEZE methods return no values, you need this replace first: + $json =~ s/\( \s* (" (?: [^\\":,]+|\\.|::)* ") \s* \) \s* \[\s*\]/[$1]/gx; + + # this works for non-empty constructor arg lists: + $json =~ s/\( \s* (" (?: [^\\":,]+|\\.|::)* ") \s* \) \s* \[/[$1,/gx; + + And here is a less readable version that is easy to adapt to other + languages: + + $json =~ s/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/[$1,/g; + + Here is an ECMAScript version (same regex): + + json = json.replace (/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/g, "[$1,"); + + Since this syntax converts to standard JSON arrays, it might be hard to + distinguish serialised objects from normal arrays. You can prepend a + "magic number" as first array element to reduce chances of a collision: + + $json =~ s/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/["XU1peReLzT4ggEllLanBYq4G9VzliwKF",$1,/g; + + And after decoding the JSON text, you could walk the data structure + looking for arrays with a first element of + "XU1peReLzT4ggEllLanBYq4G9VzliwKF". + + The same approach can be used to create the tagged format with another + encoder. First, you create an array with the magic string as first + member, the classname as second, and constructor arguments last, encode + it as part of your JSON structure, and then: + + $json =~ s/\[\s*"XU1peReLzT4ggEllLanBYq4G9VzliwKF"\s*,\s*("([^\\":,]+|\\.|::)*")\s*,/($1)[/g; + + Again, this has some limitations - the magic string must not be encoded + with character escapes, and the constructor arguments must be non-empty. + +RFC7159 + Since this module was written, Google has written a new JSON RFC, RFC + 7159 (and RFC7158). Unfortunately, this RFC breaks compatibility with + both the original JSON specification on www.json.org and RFC4627. + + As far as I can see, you can get partial compatibility when parsing by + using "->allow_nonref". However, consider the security implications of + doing so. + + I haven't decided yet when to break compatibility with RFC4627 by + default (and potentially leave applications insecure) and change the + default to follow RFC7159, but application authors are well advised to + call "->allow_nonref(0)" even if this is the current default, if they + cannot handle non-reference values, in preparation for the day when the + default will change. + +(I-)THREADS + This module is *not* guaranteed to be ithread (or MULTIPLICITY-) safe + and there are no plans to change this. Note that perl's builtin + so-called theeads/ithreads are officially deprecated and should not be + used. + +THE PERILS OF SETLOCALE + Sometimes people avoid the Perl locale support and directly call the + system's setlocale function with "LC_ALL". + + This breaks both perl and modules such as JSON::XS, as stringification + of numbers no longer works correctly (e.g. "$x = 0.1; print "$x"+1" + might print 1, and JSON::XS might output illegal JSON as JSON::XS relies + on perl to stringify numbers). + + The solution is simple: don't call "setlocale", or use it for only those + categories you need, such as "LC_MESSAGES" or "LC_CTYPE". + + If you need "LC_NUMERIC", you should enable it only around the code that + actually needs it (avoiding stringification of numbers), and restore it + afterwards. + +BUGS + While the goal of this module is to be correct, that unfortunately does + not mean it's bug-free, only that I think its design is bug-free. If you + keep reporting bugs they will be fixed swiftly, though. + + Please refrain from using rt.cpan.org or any other bug reporting + service. I put the contact address into my modules for a reason. + +SEE ALSO + The json_xs command line utility for quick experiments. + +AUTHOR + Marc Lehmann + http://home.schmorp.de/ + diff --git a/XS.pm b/XS.pm new file mode 100644 index 0000000..674937f --- /dev/null +++ b/XS.pm @@ -0,0 +1,1761 @@ +=head1 NAME + +JSON::XS - JSON serialising/deserialising, done correctly and fast + +=encoding utf-8 + +JSON::XS - 正しくて高速な JSON シリアライザ/デシリアライザ + (http://fleur.hio.jp/perldoc/mix/lib/JSON/XS.html) + +=head1 SYNOPSIS + + use JSON::XS; + + # exported functions, they croak on error + # and expect/generate UTF-8 + + $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; + $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; + + # OO-interface + + $coder = JSON::XS->new->ascii->pretty->allow_nonref; + $pretty_printed_unencoded = $coder->encode ($perl_scalar); + $perl_scalar = $coder->decode ($unicode_json_text); + + # Note that JSON version 2.0 and above will automatically use JSON::XS + # if available, at virtually no speed overhead either, so you should + # be able to just: + + use JSON; + + # and do the same things, except that you have a pure-perl fallback now. + +=head1 DESCRIPTION + +This module converts Perl data structures to JSON and vice versa. Its +primary goal is to be I and its secondary goal is to be +I. To reach the latter goal it was written in C. + +Beginning with version 2.0 of the JSON module, when both JSON and +JSON::XS are installed, then JSON will fall back on JSON::XS (this can be +overridden) with no overhead due to emulation (by inheriting constructor +and methods). If JSON::XS is not available, it will fall back to the +compatible JSON::PP module as backend, so using JSON instead of JSON::XS +gives you a portable JSON API that can be fast when you need it and +doesn't require a C compiler when that is a problem. + +As this is the n-th-something JSON module on CPAN, what was the reason +to write yet another JSON module? While it seems there are many JSON +modules, none of them correctly handle all corner cases, and in most cases +their maintainers are unresponsive, gone missing, or not listening to bug +reports for other reasons. + +See MAPPING, below, on how JSON::XS maps perl values to JSON values and +vice versa. + +=head2 FEATURES + +=over 4 + +=item * correct Unicode handling + +This module knows how to handle Unicode, documents how and when it does +so, and even documents what "correct" means. + +=item * round-trip integrity + +When you serialise a perl data structure using only data types supported +by JSON and Perl, the deserialised data structure is identical on the Perl +level. (e.g. the string "2.0" doesn't suddenly become "2" just because +it looks like a number). There I minor exceptions to this, read the +MAPPING section below to learn about those. + +=item * strict checking of JSON correctness + +There is no guessing, no generating of illegal JSON texts by default, +and only JSON is accepted as input by default (the latter is a security +feature). + +=item * fast + +Compared to other JSON modules and other serialisers such as Storable, +this module usually compares favourably in terms of speed, too. + +=item * simple to use + +This module has both a simple functional interface as well as an object +oriented interface. + +=item * reasonably versatile output formats + +You can choose between the most compact guaranteed-single-line format +possible (nice for simple line-based protocols), a pure-ASCII format +(for when your transport is not 8-bit clean, still supports the whole +Unicode range), or a pretty-printed format (for when you want to read that +stuff). Or you can combine those features in whatever way you like. + +=back + +=cut + +package JSON::XS; + +use common::sense; + +our $VERSION = 3.04; +our @ISA = qw(Exporter); + +our @EXPORT = qw(encode_json decode_json); + +use Exporter; +use XSLoader; + +use Types::Serialiser (); + +=head1 FUNCTIONAL INTERFACE + +The following convenience methods are provided by this module. They are +exported by default: + +=over 4 + +=item $json_text = encode_json $perl_scalar + +Converts the given Perl data structure to a UTF-8 encoded, binary string +(that is, the string contains octets only). Croaks on error. + +This function call is functionally identical to: + + $json_text = JSON::XS->new->utf8->encode ($perl_scalar) + +Except being faster. + +=item $perl_scalar = decode_json $json_text + +The opposite of C: expects an UTF-8 (binary) string and tries +to parse that as an UTF-8 encoded JSON text, returning the resulting +reference. Croaks on error. + +This function call is functionally identical to: + + $perl_scalar = JSON::XS->new->utf8->decode ($json_text) + +Except being faster. + +=back + + +=head1 A FEW NOTES ON UNICODE AND PERL + +Since this often leads to confusion, here are a few very clear words on +how Unicode works in Perl, modulo bugs. + +=over 4 + +=item 1. Perl strings can store characters with ordinal values > 255. + +This enables you to store Unicode characters as single characters in a +Perl string - very natural. + +=item 2. Perl does I associate an encoding with your strings. + +... until you force it to, e.g. when matching it against a regex, or +printing the scalar to a file, in which case Perl either interprets your +string as locale-encoded text, octets/binary, or as Unicode, depending +on various settings. In no case is an encoding stored together with your +data, it is I that decides encoding, not any magical meta data. + +=item 3. The internal utf-8 flag has no meaning with regards to the +encoding of your string. + +Just ignore that flag unless you debug a Perl bug, a module written in +XS or want to dive into the internals of perl. Otherwise it will only +confuse you, as, despite the name, it says nothing about how your string +is encoded. You can have Unicode strings with that flag set, with that +flag clear, and you can have binary data with that flag set and that flag +clear. Other possibilities exist, too. + +If you didn't know about that flag, just the better, pretend it doesn't +exist. + +=item 4. A "Unicode String" is simply a string where each character can be +validly interpreted as a Unicode code point. + +If you have UTF-8 encoded data, it is no longer a Unicode string, but a +Unicode string encoded in UTF-8, giving you a binary string. + +=item 5. A string containing "high" (> 255) character values is I a UTF-8 string. + +It's a fact. Learn to live with it. + +=back + +I hope this helps :) + + +=head1 OBJECT-ORIENTED INTERFACE + +The object oriented interface lets you configure your own encoding or +decoding style, within the limits of supported formats. + +=over 4 + +=item $json = new JSON::XS + +Creates a new JSON::XS object that can be used to de/encode JSON +strings. All boolean flags described below are by default I. + +The mutators for flags all return the JSON object again and thus calls can +be chained: + + my $json = JSON::XS->new->utf8->space_after->encode ({a => [1,2]}) + => {"a": [1, 2]} + +=item $json = $json->ascii ([$enable]) + +=item $enabled = $json->get_ascii + +If C<$enable> is true (or missing), then the C method will not +generate characters outside the code range C<0..127> (which is ASCII). Any +Unicode characters outside that range will be escaped using either a +single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence, +as per RFC4627. The resulting encoded JSON text can be treated as a native +Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string, +or any other superset of ASCII. + +If C<$enable> is false, then the C method will not escape Unicode +characters unless required by the JSON syntax or other flags. This results +in a faster and more compact format. + +See also the section I later in this +document. + +The main use for this flag is to produce JSON texts that can be +transmitted over a 7-bit channel, as the encoded JSON texts will not +contain any 8 bit characters. + + JSON::XS->new->ascii (1)->encode ([chr 0x10401]) + => ["\ud801\udc01"] + +=item $json = $json->latin1 ([$enable]) + +=item $enabled = $json->get_latin1 + +If C<$enable> is true (or missing), then the C method will encode +the resulting JSON text as latin1 (or iso-8859-1), escaping any characters +outside the code range C<0..255>. The resulting string can be treated as a +latin1-encoded JSON text or a native Unicode string. The C method +will not be affected in any way by this flag, as C by default +expects Unicode, which is a strict superset of latin1. + +If C<$enable> is false, then the C method will not escape Unicode +characters unless required by the JSON syntax or other flags. + +See also the section I later in this +document. + +The main use for this flag is efficiently encoding binary data as JSON +text, as most octets will not be escaped, resulting in a smaller encoded +size. The disadvantage is that the resulting JSON text is encoded +in latin1 (and must correctly be treated as such when storing and +transferring), a rare encoding for JSON. It is therefore most useful when +you want to store data structures known to contain binary data efficiently +in files or databases, not when talking to other JSON encoders/decoders. + + JSON::XS->new->latin1->encode (["\x{89}\x{abc}"] + => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) + +=item $json = $json->utf8 ([$enable]) + +=item $enabled = $json->get_utf8 + +If C<$enable> is true (or missing), then the C method will encode +the JSON result into UTF-8, as required by many protocols, while the +C method expects to be handled an UTF-8-encoded string. Please +note that UTF-8-encoded strings do not contain any characters outside the +range C<0..255>, they are thus useful for bytewise/binary I/O. In future +versions, enabling this option might enable autodetection of the UTF-16 +and UTF-32 encoding families, as described in RFC4627. + +If C<$enable> is false, then the C method will return the JSON +string as a (non-encoded) Unicode string, while C expects thus a +Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs +to be done yourself, e.g. using the Encode module. + +See also the section I later in this +document. + +Example, output UTF-16BE-encoded JSON: + + use Encode; + $jsontext = encode "UTF-16BE", JSON::XS->new->encode ($object); + +Example, decode UTF-32LE-encoded JSON: + + use Encode; + $object = JSON::XS->new->decode (decode "UTF-32LE", $jsontext); + +=item $json = $json->pretty ([$enable]) + +This enables (or disables) all of the C, C and +C (and in the future possibly more) flags in one call to +generate the most readable (or most compact) form possible. + +Example, pretty-print some simple structure: + + my $json = JSON::XS->new->pretty(1)->encode ({a => [1,2]}) + => + { + "a" : [ + 1, + 2 + ] + } + +=item $json = $json->indent ([$enable]) + +=item $enabled = $json->get_indent + +If C<$enable> is true (or missing), then the C method will use a multiline +format as output, putting every array member or object/hash key-value pair +into its own line, indenting them properly. + +If C<$enable> is false, no newlines or indenting will be produced, and the +resulting JSON text is guaranteed not to contain any C. + +This setting has no effect when decoding JSON texts. + +=item $json = $json->space_before ([$enable]) + +=item $enabled = $json->get_space_before + +If C<$enable> is true (or missing), then the C method will add an extra +optional space before the C<:> separating keys from values in JSON objects. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. You will also +most likely combine this setting with C. + +Example, space_before enabled, space_after and indent disabled: + + {"key" :"value"} + +=item $json = $json->space_after ([$enable]) + +=item $enabled = $json->get_space_after + +If C<$enable> is true (or missing), then the C method will add an extra +optional space after the C<:> separating keys from values in JSON objects +and extra whitespace after the C<,> separating key-value pairs and array +members. + +If C<$enable> is false, then the C method will not add any extra +space at those places. + +This setting has no effect when decoding JSON texts. + +Example, space_before and indent disabled, space_after enabled: + + {"key": "value"} + +=item $json = $json->relaxed ([$enable]) + +=item $enabled = $json->get_relaxed + +If C<$enable> is true (or missing), then C will accept some +extensions to normal JSON syntax (see below). C will not be +affected in anyway. I. I suggest only to use this option to +parse application-specific files written by humans (configuration files, +resource files etc.) + +If C<$enable> is false (the default), then C will only accept +valid JSON texts. + +Currently accepted extensions are: + +=over 4 + +=item * list items can have an end-comma + +JSON I array elements and key-value pairs with commas. This +can be annoying if you write JSON texts manually and want to be able to +quickly append elements, so this extension accepts comma at the end of +such items not just between them: + + [ + 1, + 2, <- this comma not normally allowed + ] + { + "k1": "v1", + "k2": "v2", <- this comma not normally allowed + } + +=item * shell-style '#'-comments + +Whenever JSON allows whitespace, shell-style comments are additionally +allowed. They are terminated by the first carriage-return or line-feed +character, after which more white-space and comments are allowed. + + [ + 1, # this comment not allowed in JSON + # neither this one... + ] + +=item * literal ASCII TAB characters in strings + +Literal ASCII TAB characters are now allowed in strings (and treated as +C<\t>). + + [ + "Hello\tWorld", + "HelloWorld", # literal would not normally be allowed + ] + +=back + +=item $json = $json->canonical ([$enable]) + +=item $enabled = $json->get_canonical + +If C<$enable> is true (or missing), then the C method will output JSON objects +by sorting their keys. This is adding a comparatively high overhead. + +If C<$enable> is false, then the C method will output key-value +pairs in the order Perl stores them (which will likely change between runs +of the same script, and can change even within the same run from 5.18 +onwards). + +This option is useful if you want the same data structure to be encoded as +the same JSON text (given the same overall settings). If it is disabled, +the same hash might be encoded differently even if contains the same data, +as key-value pairs have no inherent ordering in Perl. + +This setting has no effect when decoding JSON texts. + +This setting has currently no effect on tied hashes. + +=item $json = $json->allow_nonref ([$enable]) + +=item $enabled = $json->get_allow_nonref + +If C<$enable> is true (or missing), then the C method can convert a +non-reference into its corresponding string, number or null JSON value, +which is an extension to RFC4627. Likewise, C will accept those JSON +values instead of croaking. + +If C<$enable> is false, then the C method will croak if it isn't +passed an arrayref or hashref, as JSON texts must either be an object +or array. Likewise, C will croak if given something that is not a +JSON object or array. + +Example, encode a Perl scalar as JSON value with enabled C, +resulting in an invalid JSON text: + + JSON::XS->new->allow_nonref->encode ("Hello, World!") + => "Hello, World!" + +=item $json = $json->allow_unknown ([$enable]) + +=item $enabled = $json->get_allow_unknown + +If C<$enable> is true (or missing), then C will I throw an +exception when it encounters values it cannot represent in JSON (for +example, filehandles) but instead will encode a JSON C value. Note +that blessed objects are not included here and are handled separately by +c. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters anything it cannot encode as JSON. + +This option does not affect C in any way, and it is recommended to +leave it off unless you know your communications partner. + +=item $json = $json->allow_blessed ([$enable]) + +=item $enabled = $json->get_allow_blessed + +See L for details. + +If C<$enable> is true (or missing), then the C method will not +barf when it encounters a blessed reference that it cannot convert +otherwise. Instead, a JSON C value is encoded instead of the object. + +If C<$enable> is false (the default), then C will throw an +exception when it encounters a blessed object that it cannot convert +otherwise. + +This setting has no effect on C. + +=item $json = $json->convert_blessed ([$enable]) + +=item $enabled = $json->get_convert_blessed + +See L for details. + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method +on the object's class. If found, it will be called in scalar context and +the resulting scalar will be encoded instead of the object. + +The C method may safely call die if it wants. If C +returns other blessed objects, those will be handled in the same +way. C must take care of not causing an endless recursion cycle +(== crash) in this case. The name of C was chosen because other +methods called by the Perl core (== not by the user of the object) are +usually in upper case letters and to avoid collisions with any C +function or method. + +If C<$enable> is false (the default), then C will not consider +this type of conversion. + +This setting has no effect on C. + +=item $json = $json->allow_tags ([$enable]) + +=item $enabled = $json->allow_tags + +See L for details. + +If C<$enable> is true (or missing), then C, upon encountering a +blessed object, will check for the availability of the C method on +the object's class. If found, it will be used to serialise the object into +a nonstandard tagged JSON value (that JSON decoders cannot decode). + +It also causes C to parse such tagged JSON values and deserialise +them via a call to the C method. + +If C<$enable> is false (the default), then C will not consider +this type of conversion, and tagged JSON values will cause a parse error +in C, as if tags were not part of the grammar. + +=item $json = $json->filter_json_object ([$coderef->($hashref)]) + +When C<$coderef> is specified, it will be called from C each +time it decodes a JSON object. The only argument is a reference to the +newly-created hash. If the code references returns a single scalar (which +need not be a reference), this value (i.e. a copy of that scalar to avoid +aliasing) is inserted into the deserialised data structure. If it returns +an empty list (NOTE: I C, which is a valid scalar), the +original deserialised hash will be inserted. This setting can slow down +decoding considerably. + +When C<$coderef> is omitted or undefined, any existing callback will +be removed and C will not change the deserialised hash in any +way. + +Example, convert all JSON objects into the integer 5: + + my $js = JSON::XS->new->filter_json_object (sub { 5 }); + # returns [5] + $js->decode ('[{}]') + # throw an exception because allow_nonref is not enabled + # so a lone 5 is not allowed. + $js->decode ('{"a":1, "b":2}'); + +=item $json = $json->filter_json_single_key_object ($key [=> $coderef->($value)]) + +Works remotely similar to C, but is only called for +JSON objects having a single key named C<$key>. + +This C<$coderef> is called before the one specified via +C, if any. It gets passed the single value in the JSON +object. If it returns a single value, it will be inserted into the data +structure. If it returns nothing (not even C but the empty list), +the callback from C will be called next, as if no +single-key callback were specified. + +If C<$coderef> is omitted or undefined, the corresponding callback will be +disabled. There can only ever be one callback for a given key. + +As this callback gets called less often then the C +one, decoding speed will not usually suffer as much. Therefore, single-key +objects make excellent targets to serialise Perl objects into, especially +as single-key JSON objects are as close to the type-tagged value concept +as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not +support this in any way, so you need to make sure your data never looks +like a serialised Perl hash. + +Typical names for the single object key are C<__class_whatever__>, or +C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even +things like C<__class_md5sum(classname)__>, to reduce the risk of clashing +with real hashes. + +Example, decode JSON objects of the form C<< { "__widget__" => } >> +into the corresponding C<< $WIDGET{} >> object: + + # return whatever is in $WIDGET{5}: + JSON::XS + ->new + ->filter_json_single_key_object (__widget__ => sub { + $WIDGET{ $_[0] } + }) + ->decode ('{"__widget__": 5') + + # this can be used with a TO_JSON method in some "widget" class + # for serialisation to json: + sub WidgetBase::TO_JSON { + my ($self) = @_; + + unless ($self->{id}) { + $self->{id} = ..get..some..id..; + $WIDGET{$self->{id}} = $self; + } + + { __widget__ => $self->{id} } + } + +=item $json = $json->shrink ([$enable]) + +=item $enabled = $json->get_shrink + +Perl usually over-allocates memory a bit when allocating space for +strings. This flag optionally resizes strings generated by either +C or C to their minimum size possible. This can save +memory when your JSON texts are either very very long or you have many +short strings. It will also try to downgrade any strings to octet-form +if possible: perl stores strings internally either in an encoding called +UTF-X or in octet-form. The latter cannot store everything but uses less +space in general (and some buggy Perl or C code might even rely on that +internal representation being used). + +The actual definition of what shrink does might change in future versions, +but it will always try to save space at the expense of time. + +If C<$enable> is true (or missing), the string returned by C will +be shrunk-to-fit, while all strings generated by C will also be +shrunk-to-fit. + +If C<$enable> is false, then the normal perl allocation algorithms are used. +If you work with your data, then this is likely to be faster. + +In the future, this setting might control other things, such as converting +strings that look like integers or floats into integers or floats +internally (there is no difference on the Perl level), saving space. + +=item $json = $json->max_depth ([$maximum_nesting_depth]) + +=item $max_depth = $json->get_max_depth + +Sets the maximum nesting level (default C<512>) accepted while encoding +or decoding. If a higher nesting level is detected in JSON text or a Perl +data structure, then the encoder and decoder will stop and croak at that +point. + +Nesting level is defined by number of hash- or arrayrefs that the encoder +needs to traverse to reach a given point or the number of C<{> or C<[> +characters without their matching closing parenthesis crossed to reach a +given character in a string. + +Setting the maximum depth to one disallows any nesting, so that ensures +that the object is only a single hash/object or array. + +If no argument is given, the highest possible setting will be used, which +is rarely useful. + +Note that nesting is implemented by recursion in C. The default value has +been chosen to be as large as typical operating systems allow without +crashing. + +See SECURITY CONSIDERATIONS, below, for more info on why this is useful. + +=item $json = $json->max_size ([$maximum_string_size]) + +=item $max_size = $json->get_max_size + +Set the maximum length a JSON text may have (in bytes) where decoding is +being attempted. The default is C<0>, meaning no limit. When C +is called on a string that is longer then this many bytes, it will not +attempt to decode the string but throw an exception. This setting has no +effect on C (yet). + +If no argument is given, the limit check will be deactivated (same as when +C<0> is specified). + +See SECURITY CONSIDERATIONS, below, for more info on why this is useful. + +=item $json_text = $json->encode ($perl_scalar) + +Converts the given Perl value or data structure to its JSON +representation. Croaks on error. + +=item $perl_scalar = $json->decode ($json_text) + +The opposite of C: expects a JSON text and tries to parse it, +returning the resulting simple scalar or reference. Croaks on error. + +=item ($perl_scalar, $characters) = $json->decode_prefix ($json_text) + +This works like the C method, but instead of raising an exception +when there is trailing garbage after the first JSON object, it will +silently stop parsing there and return the number of characters consumed +so far. + +This is useful if your JSON texts are not delimited by an outer protocol +and you need to know where the JSON text ends. + + JSON::XS->new->decode_prefix ("[1] the tail") + => ([1], 3) + +=back + + +=head1 INCREMENTAL PARSING + +In some cases, there is the need for incremental parsing of JSON +texts. While this module always has to keep both JSON text and resulting +Perl data structure in memory at one time, it does allow you to parse a +JSON stream incrementally. It does so by accumulating text until it has +a full JSON object, which it then can decode. This process is similar to +using C to see if a full JSON object is available, but +is much more efficient (and can be implemented with a minimum of method +calls). + +JSON::XS will only attempt to parse the JSON text once it is sure it +has enough text to get a decisive result, using a very simple but +truly incremental parser. This means that it sometimes won't stop as +early as the full parser, for example, it doesn't detect mismatched +parentheses. The only thing it guarantees is that it starts decoding as +soon as a syntactically valid JSON text has been seen. This means you need +to set resource limits (e.g. C) to ensure the parser will stop +parsing in the presence if syntax errors. + +The following methods implement this incremental parser. + +=over 4 + +=item [void, scalar or list context] = $json->incr_parse ([$string]) + +This is the central parsing function. It can both append new text and +extract objects from the stream accumulated so far (both of these +functions are optional). + +If C<$string> is given, then this string is appended to the already +existing JSON fragment stored in the C<$json> object. + +After that, if the function is called in void context, it will simply +return without doing anything further. This can be used to add more text +in as many chunks as you want. + +If the method is called in scalar context, then it will try to extract +exactly I JSON object. If that is successful, it will return this +object, otherwise it will return C. If there is a parse error, +this method will croak just as C would do (one can then use +C to skip the erroneous part). This is the most common way of +using the method. + +And finally, in list context, it will try to extract as many objects +from the stream as it can find and return them, or the empty list +otherwise. For this to work, there must be no separators (other than +whitespace) between the JSON objects or arrays, instead they must be +concatenated back-to-back. If an error occurs, an exception will be +raised as in the scalar context case. Note that in this case, any +previously-parsed JSON texts will be lost. + +Example: Parse some JSON arrays/objects in a given string and return +them. + + my @objs = JSON::XS->new->incr_parse ("[5][7][1,2]"); + +=item $lvalue_string = $json->incr_text + +This method returns the currently stored JSON fragment as an lvalue, that +is, you can manipulate it. This I works when a preceding call to +C in I successfully returned an object. Under +all other circumstances you must not call this function (I mean it. +although in simple tests it might actually work, it I fail under +real world conditions). As a special exception, you can also call this +method before having parsed anything. + +That means you can only use this function to look at or manipulate text +before or after complete JSON objects, not while the parser is in the +middle of parsing a JSON object. + +This function is useful in two cases: a) finding the trailing text after a +JSON object or b) parsing multiple JSON objects separated by non-JSON text +(such as commas). + +=item $json->incr_skip + +This will reset the state of the incremental parser and will remove +the parsed text from the input buffer so far. This is useful after +C died, in which case the input buffer and incremental parser +state is left unchanged, to skip the text parsed so far and to reset the +parse state. + +The difference to C is that only text until the parse error +occurred is removed. + +=item $json->incr_reset + +This completely resets the incremental parser, that is, after this call, +it will be as if the parser had never parsed anything. + +This is useful if you want to repeatedly parse JSON objects and want to +ignore any trailing data, which means you have to reset the parser after +each successful decode. + +=back + +=head2 LIMITATIONS + +All options that affect decoding are supported, except +C. The reason for this is that it cannot be made to work +sensibly: JSON objects and arrays are self-delimited, i.e. you can +concatenate them back to back and still decode them perfectly. This does +not hold true for JSON numbers, however. + +For example, is the string C<1> a single JSON number, or is it simply the +start of C<12>? Or is C<12> a single JSON number, or the concatenation +of C<1> and C<2>? In neither case you can tell, and this is why JSON::XS +takes the conservative route and disallows this case. + +=head2 EXAMPLES + +Some examples will make all this clearer. First, a simple example that +works similarly to C: We want to decode the JSON object at +the start of a string and identify the portion after the JSON object: + + my $text = "[1,2,3] hello"; + + my $json = new JSON::XS; + + my $obj = $json->incr_parse ($text) + or die "expected JSON object or array at beginning of string"; + + my $tail = $json->incr_text; + # $tail now contains " hello" + +Easy, isn't it? + +Now for a more complicated example: Imagine a hypothetical protocol where +you read some requests from a TCP stream, and each request is a JSON +array, without any separation between them (in fact, it is often useful to +use newlines as "separators", as these get interpreted as whitespace at +the start of the JSON text, which makes it possible to test said protocol +with C...). + +Here is how you'd do it (it is trivial to write this in an event-based +manner): + + my $json = new JSON::XS; + + # read some data from the socket + while (sysread $socket, my $buf, 4096) { + + # split and decode as many requests as possible + for my $request ($json->incr_parse ($buf)) { + # act on the $request + } + } + +Another complicated example: Assume you have a string with JSON objects +or arrays, all separated by (optional) comma characters (e.g. C<[1],[2], +[3]>). To parse them, we have to skip the commas between the JSON texts, +and here is where the lvalue-ness of C comes in useful: + + my $text = "[1],[2], [3]"; + my $json = new JSON::XS; + + # void context, so no parsing done + $json->incr_parse ($text); + + # now extract as many objects as possible. note the + # use of scalar context so incr_text can be called. + while (my $obj = $json->incr_parse) { + # do something with $obj + + # now skip the optional comma + $json->incr_text =~ s/^ \s* , //x; + } + +Now lets go for a very complex example: Assume that you have a gigantic +JSON array-of-objects, many gigabytes in size, and you want to parse it, +but you cannot load it into memory fully (this has actually happened in +the real world :). + +Well, you lost, you have to implement your own JSON parser. But JSON::XS +can still help you: You implement a (very simple) array parser and let +JSON decode the array elements, which are all full JSON objects on their +own (this wouldn't work if the array elements could be JSON numbers, for +example): + + my $json = new JSON::XS; + + # open the monster + open my $fh, "incr_parse ($buf); # void context, so no parsing + + # Exit the loop once we found and removed(!) the initial "[". + # In essence, we are (ab-)using the $json object as a simple scalar + # we append data to. + last if $json->incr_text =~ s/^ \s* \[ //x; + } + + # now we have the skipped the initial "[", so continue + # parsing all the elements. + for (;;) { + # in this loop we read data until we got a single JSON object + for (;;) { + if (my $obj = $json->incr_parse) { + # do something with $obj + last; + } + + # add more data + sysread $fh, my $buf, 65536 + or die "read error: $!"; + $json->incr_parse ($buf); # void context, so no parsing + } + + # in this loop we read data until we either found and parsed the + # separating "," between elements, or the final "]" + for (;;) { + # first skip whitespace + $json->incr_text =~ s/^\s*//; + + # if we find "]", we are done + if ($json->incr_text =~ s/^\]//) { + print "finished.\n"; + exit; + } + + # if we find ",", we can continue with the next element + if ($json->incr_text =~ s/^,//) { + last; + } + + # if we find anything else, we have a parse error! + if (length $json->incr_text) { + die "parse error near ", $json->incr_text; + } + + # else add more data + sysread $fh, my $buf, 65536 + or die "read error: $!"; + $json->incr_parse ($buf); # void context, so no parsing + } + +This is a complex example, but most of the complexity comes from the fact +that we are trying to be correct (bear with me if I am wrong, I never ran +the above example :). + + + +=head1 MAPPING + +This section describes how JSON::XS maps Perl values to JSON values and +vice versa. These mappings are designed to "do the right thing" in most +circumstances automatically, preserving round-tripping characteristics +(what you put in comes out as something equivalent). + +For the more enlightened: note that in the following descriptions, +lowercase I refers to the Perl interpreter, while uppercase I +refers to the abstract Perl language itself. + + +=head2 JSON -> PERL + +=over 4 + +=item object + +A JSON object becomes a reference to a hash in Perl. No ordering of object +keys is preserved (JSON does not preserve object key ordering itself). + +=item array + +A JSON array becomes a reference to an array in Perl. + +=item string + +A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON +are represented by the same codepoints in the Perl string, so no manual +decoding is necessary. + +=item number + +A JSON number becomes either an integer, numeric (floating point) or +string scalar in perl, depending on its range and any fractional parts. On +the Perl level, there is no difference between those as Perl handles all +the conversion details, but an integer may take slightly less memory and +might represent more values exactly than floating point numbers. + +If the number consists of digits only, JSON::XS will try to represent +it as an integer value. If that fails, it will try to represent it as +a numeric (floating point) value if that is possible without loss of +precision. Otherwise it will preserve the number as a string value (in +which case you lose roundtripping ability, as the JSON number will be +re-encoded to a JSON string). + +Numbers containing a fractional or exponential part will always be +represented as numeric (floating point) values, possibly at a loss of +precision (in which case you might lose perfect roundtripping ability, but +the JSON number will still be re-encoded as a JSON number). + +Note that precision is not accuracy - binary floating point values cannot +represent most decimal fractions exactly, and when converting from and to +floating point, JSON::XS only guarantees precision up to but not including +the least significant bit. + +=item true, false + +These JSON atoms become C and +C, respectively. They are overloaded to act +almost exactly like the numbers C<1> and C<0>. You can check whether +a scalar is a JSON boolean by using the C +function (after C, of course). + +=item null + +A JSON null atom becomes C in Perl. + +=item shell-style comments (C<< # I >>) + +As a nonstandard extension to the JSON syntax that is enabled by the +C setting, shell-style comments are allowed. They can start +anywhere outside strings and go till the end of the line. + +=item tagged values (C<< (I)I >>). + +Another nonstandard extension to the JSON syntax, enabled with the +C setting, are tagged values. In this implementation, the +I must be a perl package/class name encoded as a JSON string, and the +I must be a JSON array encoding optional constructor arguments. + +See L, below, for details. + +=back + + +=head2 PERL -> JSON + +The mapping from Perl to JSON is slightly more difficult, as Perl is a +truly typeless language, so we can only guess which JSON type is meant by +a Perl value. + +=over 4 + +=item hash references + +Perl hash references become JSON objects. As there is no inherent +ordering in hash keys (or JSON objects), they will usually be encoded +in a pseudo-random order. JSON::XS can optionally sort the hash keys +(determined by the I flag), so the same datastructure will +serialise to the same JSON text (given same settings and version of +JSON::XS), but this incurs a runtime overhead and is only rarely useful, +e.g. when you want to compare some JSON text against another for equality. + +=item array references + +Perl array references become JSON arrays. + +=item other references + +Other unblessed references are generally not allowed and will cause an +exception to be thrown, except for references to the integers C<0> and +C<1>, which get turned into C and C atoms in JSON. + +Since C uses the boolean model from L, you +can also C and then use C +and C to improve readability. + + use Types::Serialiser; + encode_json [\0, Types::Serialiser::true] # yields [false,true] + +=item Types::Serialiser::true, Types::Serialiser::false + +These special values from the L module become JSON true +and JSON false values, respectively. You can also use C<\1> and C<\0> +directly if you want. + +=item blessed objects + +Blessed objects are not directly representable in JSON, but C +allows various ways of handling objects. See L, +below, for details. + +=item simple scalars + +Simple Perl scalars (any scalar that is not a reference) are the most +difficult objects to encode: JSON::XS will encode undefined scalars as +JSON C values, scalars that have last been used in a string context +before encoding as JSON strings, and anything else as number value: + + # dump as number + encode_json [2] # yields [2] + encode_json [-3.0e17] # yields [-3e+17] + my $value = 5; encode_json [$value] # yields [5] + + # used as string, so dump as string + print $value; + encode_json [$value] # yields ["5"] + + # undef becomes null + encode_json [undef] # yields [null] + +You can force the type to be a JSON string by stringifying it: + + my $x = 3.1; # some variable containing a number + "$x"; # stringified + $x .= ""; # another, more awkward way to stringify + print $x; # perl does it for you, too, quite often + +You can force the type to be a JSON number by numifying it: + + my $x = "3"; # some variable containing a string + $x += 0; # numify it, ensuring it will be dumped as a number + $x *= 1; # same thing, the choice is yours. + +You can not currently force the type in other, less obscure, ways. Tell me +if you need this capability (but don't forget to explain why it's needed +:). + +Note that numerical precision has the same meaning as under Perl (so +binary to decimal conversion follows the same rules as in Perl, which +can differ to other languages). Also, your perl interpreter might expose +extensions to the floating point numbers of your platform, such as +infinities or NaN's - these cannot be represented in JSON, and it is an +error to pass those in. + +=back + +=head2 OBJECT SERIALISATION + +As JSON cannot directly represent Perl objects, you have to choose between +a pure JSON representation (without the ability to deserialise the object +automatically again), and a nonstandard extension to the JSON syntax, +tagged values. + +=head3 SERIALISATION + +What happens when C encounters a Perl object depends on the +C, C and C settings, which are +used in this order: + +=over 4 + +=item 1. C is enabled and the object has a C method. + +In this case, C uses the L object +serialisation protocol to create a tagged JSON value, using a nonstandard +extension to the JSON syntax. + +This works by invoking the C method on the object, with the first +argument being the object to serialise, and the second argument being the +constant string C to distinguish it from other serialisers. + +The C method can return any number of values (i.e. zero or +more). These values and the paclkage/classname of the object will then be +encoded as a tagged JSON value in the following format: + + ("classname")[FREEZE return values...] + +e.g.: + + ("URI")["http://www.google.com/"] + ("MyDate")[2013,10,29] + ("ImageData::JPEG")["Z3...VlCg=="] + +For example, the hypothetical C C method might use the +objects C and C members to encode the object: + + sub My::Object::FREEZE { + my ($self, $serialiser) = @_; + + ($self->{type}, $self->{id}) + } + +=item 2. C is enabled and the object has a C method. + +In this case, the C method of the object is invoked in scalar +context. It must return a single scalar that can be directly encoded into +JSON. This scalar replaces the object in the JSON text. + +For example, the following C method will convert all L +objects to JSON strings when serialised. The fatc that these values +originally were L objects is lost. + + sub URI::TO_JSON { + my ($uri) = @_; + $uri->as_string + } + +=item 3. C is enabled. + +The object will be serialised as a JSON null value. + +=item 4. none of the above + +If none of the settings are enabled or the respective methods are missing, +C throws an exception. + +=back + +=head3 DESERIALISATION + +For deserialisation there are only two cases to consider: either +nonstandard tagging was used, in which case C decides, +or objects cannot be automatically be deserialised, in which +case you can use postprocessing or the C or +C callbacks to get some real objects our of +your JSON. + +This section only considers the tagged value case: I a tagged JSON object +is encountered during decoding and C is disabled, a parse +error will result (as if tagged values were not part of the grammar). + +If C is enabled, C will look up the C method +of the package/classname used during serialisation (it will not attempt +to load the package as a Perl module). If there is no such method, the +decoding will fail with an error. + +Otherwise, the C method is invoked with the classname as first +argument, the constant string C as second argument, and all the +values from the JSON array (the values originally returned by the +C method) as remaining arguments. + +The method must then return the object. While technically you can return +any Perl scalar, you might have to enable the C setting to +make that work in all cases, so better return an actual blessed reference. + +As an example, let's implement a C function that regenerates the +C from the C example earlier: + + sub My::Object::THAW { + my ($class, $serialiser, $type, $id) = @_; + + $class->new (type => $type, id => $id) + } + + +=head1 ENCODING/CODESET FLAG NOTES + +The interested reader might have seen a number of flags that signify +encodings or codesets - C, C and C. There seems to be +some confusion on what these do, so here is a short comparison: + +C controls whether the JSON text created by C (and expected +by C) is UTF-8 encoded or not, while C and C only +control whether C escapes character values outside their respective +codeset range. Neither of these flags conflict with each other, although +some combinations make less sense than others. + +Care has been taken to make all flags symmetrical with respect to +C and C, that is, texts encoded with any combination of +these flag values will be correctly decoded when the same flags are used +- in general, if you use different flag settings while encoding vs. when +decoding you likely have a bug somewhere. + +Below comes a verbose discussion of these flags. Note that a "codeset" is +simply an abstract set of character-codepoint pairs, while an encoding +takes those codepoint numbers and I them, in our case into +octets. Unicode is (among other things) a codeset, UTF-8 is an encoding, +and ISO-8859-1 (= latin 1) and ASCII are both codesets I encodings at +the same time, which can be confusing. + +=over 4 + +=item C flag disabled + +When C is disabled (the default), then C/C generate +and expect Unicode strings, that is, characters with high ordinal Unicode +values (> 255) will be encoded as such characters, and likewise such +characters are decoded as-is, no changes to them will be done, except +"(re-)interpreting" them as Unicode codepoints or Unicode characters, +respectively (to Perl, these are the same thing in strings unless you do +funny/weird/dumb stuff). + +This is useful when you want to do the encoding yourself (e.g. when you +want to have UTF-16 encoded JSON texts) or when some other layer does +the encoding for you (for example, when printing to a terminal using a +filehandle that transparently encodes to UTF-8 you certainly do NOT want +to UTF-8 encode your data first and have Perl encode it another time). + +=item C flag enabled + +If the C-flag is enabled, C/C will encode all +characters using the corresponding UTF-8 multi-byte sequence, and will +expect your input strings to be encoded as UTF-8, that is, no "character" +of the input string must have any value > 255, as UTF-8 does not allow +that. + +The C flag therefore switches between two modes: disabled means you +will get a Unicode string in Perl, enabled means you get an UTF-8 encoded +octet/binary string in Perl. + +=item C or C flags enabled + +With C (or C) enabled, C will escape characters +with ordinal values > 255 (> 127 with C) and encode the remaining +characters as specified by the C flag. + +If C is disabled, then the result is also correctly encoded in those +character sets (as both are proper subsets of Unicode, meaning that a +Unicode string with all character values < 256 is the same thing as a +ISO-8859-1 string, and a Unicode string with all character values < 128 is +the same thing as an ASCII string in Perl). + +If C is enabled, you still get a correct UTF-8-encoded string, +regardless of these flags, just some more characters will be escaped using +C<\uXXXX> then before. + +Note that ISO-8859-1-I strings are not compatible with UTF-8 +encoding, while ASCII-encoded strings are. That is because the ISO-8859-1 +encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I being +a subset of Unicode), while ASCII is. + +Surprisingly, C will ignore these flags and so treat all input +values as governed by the C flag. If it is disabled, this allows you +to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of +Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings. + +So neither C nor C are incompatible with the C flag - +they only govern when the JSON output engine escapes a character or not. + +The main use for C is to relatively efficiently store binary data +as JSON, at the expense of breaking compatibility with most JSON decoders. + +The main use for C is to force the output to not contain characters +with values > 127, which means you can interpret the resulting string +as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and +8-bit-encoding, and still get the same data structure back. This is useful +when your channel for JSON transfer is not 8-bit clean or the encoding +might be mangled in between (e.g. in mail), and works because ASCII is a +proper subset of most 8-bit and multibyte encodings in use in the world. + +=back + + +=head2 JSON and ECMAscript + +JSON syntax is based on how literals are represented in javascript (the +not-standardised predecessor of ECMAscript) which is presumably why it is +called "JavaScript Object Notation". + +However, JSON is not a subset (and also not a superset of course) of +ECMAscript (the standard) or javascript (whatever browsers actually +implement). + +If you want to use javascript's C function to "parse" JSON, you +might run into parse errors for valid JSON texts, or the resulting data +structure might not be queryable: + +One of the problems is that U+2028 and U+2029 are valid characters inside +JSON strings, but are not allowed in ECMAscript string literals, so the +following Perl fragment will not output something that can be guaranteed +to be parsable by javascript's C: + + use JSON::XS; + + print encode_json [chr 0x2028]; + +The right fix for this is to use a proper JSON parser in your javascript +programs, and not rely on C (see for example Douglas Crockford's +F parser). + +If this is not an option, you can, as a stop-gap measure, simply encode to +ASCII-only JSON: + + use JSON::XS; + + print JSON::XS->new->ascii->encode ([chr 0x2028]); + +Note that this will enlarge the resulting JSON text quite a bit if you +have many non-ASCII characters. You might be tempted to run some regexes +to only escape U+2028 and U+2029, e.g.: + + # DO NOT USE THIS! + my $json = JSON::XS->new->utf8->encode ([chr 0x2028]); + $json =~ s/\xe2\x80\xa8/\\u2028/g; # escape U+2028 + $json =~ s/\xe2\x80\xa9/\\u2029/g; # escape U+2029 + print $json; + +Note that I: the above only works for U+2028 and +U+2029 and thus only for fully ECMAscript-compliant parsers. Many existing +javascript implementations, however, have issues with other characters as +well - using C naively simply I cause problems. + +Another problem is that some javascript implementations reserve +some property names for their own purposes (which probably makes +them non-ECMAscript-compliant). For example, Iceweasel reserves the +C<__proto__> property name for its own purposes. + +If that is a problem, you could parse try to filter the resulting JSON +output for these property strings, e.g.: + + $json =~ s/"__proto__"\s*:/"__proto__renamed":/g; + +This works because C<__proto__> is not valid outside of strings, so every +occurrence of C<"__proto__"\s*:> must be a string used as property name. + +If you know of other incompatibilities, please let me know. + + +=head2 JSON and YAML + +You often hear that JSON is a subset of YAML. This is, however, a mass +hysteria(*) and very far from the truth (as of the time of this writing), +so let me state it clearly: I that works in all +cases. + +If you really must use JSON::XS to generate YAML, you should use this +algorithm (subject to change in future versions): + + my $to_yaml = JSON::XS->new->utf8->space_after (1); + my $yaml = $to_yaml->encode ($ref) . "\n"; + +This will I generate JSON texts that also parse as valid +YAML. Please note that YAML has hardcoded limits on (simple) object key +lengths that JSON doesn't have and also has different and incompatible +unicode character escape syntax, so you should make sure that your hash +keys are noticeably shorter than the 1024 "stream characters" YAML allows +and that you do not have characters with codepoint values outside the +Unicode BMP (basic multilingual page). YAML also does not allow C<\/> +sequences in strings (which JSON::XS does not I generate, but +other JSON generators might). + +There might be other incompatibilities that I am not aware of (or the YAML +specification has been changed yet again - it does so quite often). In +general you should not try to generate YAML with a JSON generator or vice +versa, or try to parse JSON with a YAML parser or vice versa: chances are +high that you will run into severe interoperability problems when you +least expect it. + +=over 4 + +=item (*) + +I have been pressured multiple times by Brian Ingerson (one of the +authors of the YAML specification) to remove this paragraph, despite him +acknowledging that the actual incompatibilities exist. As I was personally +bitten by this "JSON is YAML" lie, I refused and said I will continue to +educate people about these issues, so others do not run into the same +problem again and again. After this, Brian called me a (quote)I(unquote). + +In my opinion, instead of pressuring and insulting people who actually +clarify issues with YAML and the wrong statements of some of its +proponents, I would kindly suggest reading the JSON spec (which is not +that difficult or long) and finally make YAML compatible to it, and +educating users about the changes, instead of spreading lies about the +real compatibility for many I and trying to silence people who +point out that it isn't true. + +Addendum/2009: the YAML 1.2 spec is still incompatible with JSON, even +though the incompatibilities have been documented (and are known to Brian) +for many years and the spec makes explicit claims that YAML is a superset +of JSON. It would be so easy to fix, but apparently, bullying people and +corrupting userdata is so much easier. + +=back + + +=head2 SPEED + +It seems that JSON::XS is surprisingly fast, as shown in the following +tables. They have been generated with the help of the C program +in the JSON::XS distribution, to make it easy to compare on your own +system. + +First comes a comparison between various modules using +a very short single-line JSON string (also available at +L). + + {"method": "handleMessage", "params": ["user1", + "we were just talking"], "id": null, "array":[1,11,234,-5,1e5,1e7, + 1, 0]} + +It shows the number of encodes/decodes per second (JSON::XS uses +the functional interface, while JSON::XS/2 uses the OO interface +with pretty-printing and hashkey sorting enabled, JSON::XS/3 enables +shrink. JSON::DWIW/DS uses the deserialise function, while JSON::DWIW::FJ +uses the from_json method). Higher is better: + + module | encode | decode | + --------------|------------|------------| + JSON::DWIW/DS | 86302.551 | 102300.098 | + JSON::DWIW/FJ | 86302.551 | 75983.768 | + JSON::PP | 15827.562 | 6638.658 | + JSON::Syck | 63358.066 | 47662.545 | + JSON::XS | 511500.488 | 511500.488 | + JSON::XS/2 | 291271.111 | 388361.481 | + JSON::XS/3 | 361577.931 | 361577.931 | + Storable | 66788.280 | 265462.278 | + --------------+------------+------------+ + +That is, JSON::XS is almost six times faster than JSON::DWIW on encoding, +about five times faster on decoding, and over thirty to seventy times +faster than JSON's pure perl implementation. It also compares favourably +to Storable for small amounts of data. + +Using a longer test string (roughly 18KB, generated from Yahoo! Locals +search API (L). + + module | encode | decode | + --------------|------------|------------| + JSON::DWIW/DS | 1647.927 | 2673.916 | + JSON::DWIW/FJ | 1630.249 | 2596.128 | + JSON::PP | 400.640 | 62.311 | + JSON::Syck | 1481.040 | 1524.869 | + JSON::XS | 20661.596 | 9541.183 | + JSON::XS/2 | 10683.403 | 9416.938 | + JSON::XS/3 | 20661.596 | 9400.054 | + Storable | 19765.806 | 10000.725 | + --------------+------------+------------+ + +Again, JSON::XS leads by far (except for Storable which non-surprisingly +decodes a bit faster). + +On large strings containing lots of high Unicode characters, some modules +(such as JSON::PC) seem to decode faster than JSON::XS, but the result +will be broken due to missing (or wrong) Unicode handling. Others refuse +to decode or encode properly, so it was impossible to prepare a fair +comparison table for that case. + + +=head1 SECURITY CONSIDERATIONS + +When you are using JSON in a protocol, talking to untrusted potentially +hostile creatures requires relatively few measures. + +First of all, your JSON decoder should be secure, that is, should not have +any buffer overflows. Obviously, this module should ensure that and I am +trying hard on making that true, but you never know. + +Second, you need to avoid resource-starving attacks. That means you should +limit the size of JSON texts you accept, or make sure then when your +resources run out, that's just fine (e.g. by using a separate process that +can crash safely). The size of a JSON text in octets or characters is +usually a good indication of the size of the resources required to decode +it into a Perl structure. While JSON::XS can check the size of the JSON +text, it might be too late when you already have it in memory, so you +might want to check the size before you accept the string. + +Third, JSON::XS recurses using the C stack when decoding objects and +arrays. The C stack is a limited resource: for instance, on my amd64 +machine with 8MB of stack size I can decode around 180k nested arrays but +only 14k nested JSON objects (due to perl itself recursing deeply on croak +to free the temporary). If that is exceeded, the program crashes. To be +conservative, the default nesting limit is set to 512. If your process +has a smaller stack, you should adjust this setting accordingly with the +C method. + +Something else could bomb you, too, that I forgot to think of. In that +case, you get to keep the pieces. I am always open for hints, though... + +Also keep in mind that JSON::XS might leak contents of your Perl data +structures in its error messages, so when you serialise sensitive +information you might want to make sure that exceptions thrown by JSON::XS +will not end up in front of untrusted eyes. + +If you are using JSON::XS to return packets to consumption +by JavaScript scripts in a browser you should have a look at +L to +see whether you are vulnerable to some common attack vectors (which really +are browser design bugs, but it is still you who will have to deal with +it, as major browser developers care only for features, not about getting +security right). + + +=head1 "OLD" VS. "NEW" JSON (RFC 4627 VS. RFC 7159) + +TL;DR: Due to security concerns, JSON::XS will not allow scalar data in +JSON texts by default - you need to create your own JSON::XS object and +enable C: + + + my $json = JSON::XS->new->allow_nonref; + + $text = $json->encode ($data); + $data = $json->decode ($text); + +The long version: JSON being an important and supposedly stable format, +the IETF standardised it as RFC 4627 in 2006. Unfortunately, the inventor +of JSON, Dougles Crockford, unilaterally changed the definition of JSON in +javascript. Rather than create a fork, the IETF decided to standardise the +new syntax (apparently, so Iw as told, without finding it very amusing). + +The biggest difference between thed original JSON and the new JSON is that +the new JSON supports scalars (anything other than arrays and objects) at +the toplevel of a JSON text. While this is strictly backwards compatible +to older versions, it breaks a number of protocols that relied on sending +JSON back-to-back, and is a minor security concern. + +For example, imagine you have two banks communicating, and on one side, +trhe JSON coder gets upgraded. Two messages, such as C<10> and C<1000> +might then be confused to mean C<101000>, something that couldn't happen +in the original JSON, because niether of these messages would be valid +JSON. + +If one side accepts these messages, then an upgrade in the coder on either +side could result in this becoming exploitable. + +This module has always allowed these messages as an optional extension, by +default disabled. The security concerns are the reason why the default is +still disabled, but future versions might/will likely upgrade to the newer +RFC as default format, so you are advised to check your implementation +and/or override the default with C<< ->allow_nonref (0) >> to ensure that +future versions are safe. + + +=head1 INTEROPERABILITY WITH OTHER MODULES + +C uses the L module to provide boolean +constants. That means that the JSON true and false values will be +comaptible to true and false values of other modules that do the same, +such as L and L. + + +=head1 INTEROPERABILITY WITH OTHER JSON DECODERS + +As long as you only serialise data that can be directly expressed in JSON, +C is incapable of generating invalid JSON output (modulo bugs, +but C has found more bugs in the official JSON testsuite (1) +than the official JSON testsuite has found in C (0)). + +When you have trouble decoding JSON generated by this module using other +decoders, then it is very likely that you have an encoding mismatch or the +other decoder is broken. + +When decoding, C is strict by default and will likely catch all +errors. There are currently two settings that change this: C +makes C accept (but not generate) some non-standard extensions, +and C will allow you to encode and decode Perl objects, at the +cost of not outputting valid JSON anymore. + +=head2 TAGGED VALUE SYNTAX AND STANDARD JSON EN/DECODERS + +When you use C to use the extended (and also nonstandard and +invalid) JSON syntax for serialised objects, and you still want to decode +the generated When you want to serialise objects, you can run a regex +to replace the tagged syntax by standard JSON arrays (it only works for +"normal" package names without comma, newlines or single colons). First, +the readable Perl version: + + # if your FREEZE methods return no values, you need this replace first: + $json =~ s/\( \s* (" (?: [^\\":,]+|\\.|::)* ") \s* \) \s* \[\s*\]/[$1]/gx; + + # this works for non-empty constructor arg lists: + $json =~ s/\( \s* (" (?: [^\\":,]+|\\.|::)* ") \s* \) \s* \[/[$1,/gx; + +And here is a less readable version that is easy to adapt to other +languages: + + $json =~ s/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/[$1,/g; + +Here is an ECMAScript version (same regex): + + json = json.replace (/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/g, "[$1,"); + +Since this syntax converts to standard JSON arrays, it might be hard to +distinguish serialised objects from normal arrays. You can prepend a +"magic number" as first array element to reduce chances of a collision: + + $json =~ s/\(\s*("([^\\":,]+|\\.|::)*")\s*\)\s*\[/["XU1peReLzT4ggEllLanBYq4G9VzliwKF",$1,/g; + +And after decoding the JSON text, you could walk the data +structure looking for arrays with a first element of +C. + +The same approach can be used to create the tagged format with another +encoder. First, you create an array with the magic string as first member, +the classname as second, and constructor arguments last, encode it as part +of your JSON structure, and then: + + $json =~ s/\[\s*"XU1peReLzT4ggEllLanBYq4G9VzliwKF"\s*,\s*("([^\\":,]+|\\.|::)*")\s*,/($1)[/g; + +Again, this has some limitations - the magic string must not be encoded +with character escapes, and the constructor arguments must be non-empty. + + +=head1 RFC7159 + +Since this module was written, Google has written a new JSON RFC, RFC 7159 +(and RFC7158). Unfortunately, this RFC breaks compatibility with both the +original JSON specification on www.json.org and RFC4627. + +As far as I can see, you can get partial compatibility when parsing by +using C<< ->allow_nonref >>. However, consider the security implications +of doing so. + +I haven't decided yet when to break compatibility with RFC4627 by default +(and potentially leave applications insecure) and change the default to +follow RFC7159, but application authors are well advised to call C<< +->allow_nonref(0) >> even if this is the current default, if they cannot +handle non-reference values, in preparation for the day when the default +will change. + + +=head1 (I-)THREADS + +This module is I guaranteed to be ithread (or MULTIPLICITY-) safe +and there are no plans to change this. Note that perl's builtin so-called +theeads/ithreads are officially deprecated and should not be used. + + +=head1 THE PERILS OF SETLOCALE + +Sometimes people avoid the Perl locale support and directly call the +system's setlocale function with C. + +This breaks both perl and modules such as JSON::XS, as stringification of +numbers no longer works correctly (e.g. C<$x = 0.1; print "$x"+1> might +print C<1>, and JSON::XS might output illegal JSON as JSON::XS relies on +perl to stringify numbers). + +The solution is simple: don't call C, or use it for only those +categories you need, such as C or C. + +If you need C, you should enable it only around the code that +actually needs it (avoiding stringification of numbers), and restore it +afterwards. + + +=head1 BUGS + +While the goal of this module is to be correct, that unfortunately does +not mean it's bug-free, only that I think its design is bug-free. If you +keep reporting bugs they will be fixed swiftly, though. + +Please refrain from using rt.cpan.org or any other bug reporting +service. I put the contact address into my modules for a reason. + +=cut + +BEGIN { + *true = \$Types::Serialiser::true; + *true = \&Types::Serialiser::true; + *false = \$Types::Serialiser::false; + *false = \&Types::Serialiser::false; + *is_bool = \&Types::Serialiser::is_bool; + + *JSON::XS::Boolean:: = *Types::Serialiser::Boolean::; +} + +XSLoader::load "JSON::XS", $VERSION; + +=head1 SEE ALSO + +The F command line utility for quick experiments. + +=head1 AUTHOR + + Marc Lehmann + http://home.schmorp.de/ + +=cut + +1 + diff --git a/XS.xs b/XS.xs new file mode 100644 index 0000000..9c9c3cf --- /dev/null +++ b/XS.xs @@ -0,0 +1,2298 @@ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__BORLANDC__) || defined(_MSC_VER) +# define snprintf _snprintf // C compilers have this in stdio.h +#endif + +// some old perls do not have this, try to make it work, no +// guarantees, though. if it breaks, you get to keep the pieces. +#ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 13 +#endif + +// compatibility with perl <5.18 +#ifndef HvNAMELEN_get +# define HvNAMELEN_get(hv) strlen (HvNAME (hv)) +#endif +#ifndef HvNAMELEN +# define HvNAMELEN(hv) HvNAMELEN_get (hv) +#endif +#ifndef HvNAMEUTF8 +# define HvNAMEUTF8(hv) 0 +#endif + +// three extra for rounding, sign, and end of string +#define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 3) + +#define F_ASCII 0x00000001UL +#define F_LATIN1 0x00000002UL +#define F_UTF8 0x00000004UL +#define F_INDENT 0x00000008UL +#define F_CANONICAL 0x00000010UL +#define F_SPACE_BEFORE 0x00000020UL +#define F_SPACE_AFTER 0x00000040UL +#define F_ALLOW_NONREF 0x00000100UL +#define F_SHRINK 0x00000200UL +#define F_ALLOW_BLESSED 0x00000400UL +#define F_CONV_BLESSED 0x00000800UL +#define F_RELAXED 0x00001000UL +#define F_ALLOW_UNKNOWN 0x00002000UL +#define F_ALLOW_TAGS 0x00004000UL +#define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing + +#define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER + +#define INIT_SIZE 32 // initial scalar size to be allocated +#define INDENT_STEP 3 // spaces per indentation level + +#define SHORT_STRING_LEN 16384 // special-case strings of up to this size + +#define DECODE_WANTS_OCTETS(json) ((json)->flags & F_UTF8) + +#define SB do { +#define SE } while (0) + +#if __GNUC__ >= 3 +# define expect(expr,value) __builtin_expect ((expr), (value)) +# define INLINE static inline +#else +# define expect(expr,value) (expr) +# define INLINE static +#endif + +#define expect_false(expr) expect ((expr) != 0, 0) +#define expect_true(expr) expect ((expr) != 0, 1) + +#define IN_RANGE_INC(type,val,beg,end) \ + ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ + <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) + +#define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" + +#ifdef USE_ITHREADS +# define JSON_SLOW 1 +# define JSON_STASH (json_stash ? json_stash : gv_stashpv ("JSON::XS", 1)) +# define BOOL_STASH (bool_stash ? bool_stash : gv_stashpv ("Types::Serialiser::Boolean", 1)) +#else +# define JSON_SLOW 0 +# define JSON_STASH json_stash +# define BOOL_STASH bool_stash +#endif + +// the amount of HEs to allocate on the stack, when sorting keys +#define STACK_HES 64 + +static HV *json_stash, *bool_stash; // JSON::XS::, Types::Serialiser::Boolean:: +static SV *bool_true, *bool_false, *sv_json; + +enum { + INCR_M_WS = 0, // initial whitespace skipping, must be 0 + INCR_M_STR, // inside string + INCR_M_BS, // inside backslash + INCR_M_C0, // inside comment in initial whitespace sequence + INCR_M_C1, // inside comment in other places + INCR_M_JSON // outside anything, count nesting +}; + +#define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) + +typedef struct { + U32 flags; + U32 max_depth; + STRLEN max_size; + + SV *cb_object; + HV *cb_sk_object; + + // for the incremental parser + SV *incr_text; // the source text so far + STRLEN incr_pos; // the current offset into the text + int incr_nest; // {[]}-nesting level + unsigned char incr_mode; +} JSON; + +INLINE void +json_init (JSON *json) +{ + Zero (json, 1, JSON); + json->max_depth = 512; +} + +///////////////////////////////////////////////////////////////////////////// +// utility functions + +INLINE SV * +get_bool (const char *name) +{ + SV *sv = get_sv (name, 1); + + SvREADONLY_on (sv); + SvREADONLY_on (SvRV (sv)); + + return sv; +} + +INLINE void +shrink (SV *sv) +{ + sv_utf8_downgrade (sv, 1); + + if (SvLEN (sv) > SvCUR (sv) + 1) + { +#ifdef SvPV_shrink_to_cur + SvPV_shrink_to_cur (sv); +#elif defined (SvPV_renew) + SvPV_renew (sv, SvCUR (sv) + 1); +#endif + } +} + +/* adds two STRLENs together, slow, and with paranoia */ +STRLEN +strlen_sum (STRLEN l1, STRLEN l2) +{ + size_t sum = l1 + l2; + + if (sum < (size_t)l2 || sum != (size_t)(STRLEN)sum) + croak ("JSON::XS: string size overflow"); + + return sum; +} + +/* similar to SvGROW, but somewhat safer and guarantees exponential realloc strategy */ +static char * +json_sv_grow (SV *sv, size_t len1, size_t len2) +{ + len1 = strlen_sum (len1, len2); + len1 = strlen_sum (len1, len1 >> 1); + + if (len1 > 4096 - 24) + len1 = (len1 | 4095) - 24; + + return SvGROW (sv, len1); +} + +// decode an utf-8 character and return it, or (UV)-1 in +// case of an error. +// we special-case "safe" characters from U+80 .. U+7FF, +// but use the very good perl function to parse anything else. +// note that we never call this function for a ascii codepoints +INLINE UV +decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) +{ + if (expect_true (len >= 2 + && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) + && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) + { + *clen = 2; + return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); + } + else + return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); +} + +// likewise for encoding, also never called for ascii codepoints +// this function takes advantage of this fact, although current gccs +// seem to optimise the check for >= 0x80 away anyways +INLINE unsigned char * +encode_utf8 (unsigned char *s, UV ch) +{ + if (expect_false (ch < 0x000080)) + *s++ = ch; + else if (expect_true (ch < 0x000800)) + *s++ = 0xc0 | ( ch >> 6), + *s++ = 0x80 | ( ch & 0x3f); + else if ( ch < 0x010000) + *s++ = 0xe0 | ( ch >> 12), + *s++ = 0x80 | ((ch >> 6) & 0x3f), + *s++ = 0x80 | ( ch & 0x3f); + else if ( ch < 0x110000) + *s++ = 0xf0 | ( ch >> 18), + *s++ = 0x80 | ((ch >> 12) & 0x3f), + *s++ = 0x80 | ((ch >> 6) & 0x3f), + *s++ = 0x80 | ( ch & 0x3f); + + return s; +} + +// convert offset pointer to character index, sv must be string +static STRLEN +ptr_to_index (SV *sv, char *offset) +{ + return SvUTF8 (sv) + ? utf8_distance (offset, SvPVX (sv)) + : offset - SvPVX (sv); +} + +///////////////////////////////////////////////////////////////////////////// +// fp hell + +// scan a group of digits, and a trailing exponent +static void +json_atof_scan1 (const char *s, NV *accum, int *expo, int postdp, int maxdepth) +{ + UV uaccum = 0; + int eaccum = 0; + + // if we recurse too deep, skip all remaining digits + // to avoid a stack overflow attack + if (expect_false (--maxdepth <= 0)) + while (((U8)*s - '0') < 10) + ++s; + + for (;;) + { + U8 dig = (U8)*s - '0'; + + if (expect_false (dig >= 10)) + { + if (dig == (U8)((U8)'.' - (U8)'0')) + { + ++s; + json_atof_scan1 (s, accum, expo, 1, maxdepth); + } + else if ((dig | ' ') == 'e' - '0') + { + int exp2 = 0; + int neg = 0; + + ++s; + + if (*s == '-') + { + ++s; + neg = 1; + } + else if (*s == '+') + ++s; + + while ((dig = (U8)*s - '0') < 10) + exp2 = exp2 * 10 + *s++ - '0'; + + *expo += neg ? -exp2 : exp2; + } + + break; + } + + ++s; + + uaccum = uaccum * 10 + dig; + ++eaccum; + + // if we have too many digits, then recurse for more + // we actually do this for rather few digits + if (uaccum >= (UV_MAX - 9) / 10) + { + if (postdp) *expo -= eaccum; + json_atof_scan1 (s, accum, expo, postdp, maxdepth); + if (postdp) *expo += eaccum; + + break; + } + } + + // this relies greatly on the quality of the pow () + // implementation of the platform, but a good + // implementation is hard to beat. + // (IEEE 754 conformant ones are required to be exact) + if (postdp) *expo -= eaccum; + *accum += uaccum * Perl_pow (10., *expo); + *expo += eaccum; +} + +static NV +json_atof (const char *s) +{ + NV accum = 0.; + int expo = 0; + int neg = 0; + + if (*s == '-') + { + ++s; + neg = 1; + } + + // a recursion depth of ten gives us >>500 bits + json_atof_scan1 (s, &accum, &expo, 0, 10); + + return neg ? -accum : accum; +} + +// target of scalar reference is bool? -1 == nope, 0 == false, 1 == true +static int +ref_bool_type (SV *sv) +{ + svtype svt = SvTYPE (sv); + + if (svt < SVt_PVAV) + { + STRLEN len = 0; + char *pv = svt ? SvPV (sv, len) : 0; + + if (len == 1) + if (*pv == '1') + return 1; + else if (*pv == '0') + return 0; + } + + return -1; +} + +// returns whether scalar is not a reference in the sense of allow_nonref +static int +json_nonref (SV *scalar) +{ + if (!SvROK (scalar)) + return 1; + + scalar = SvRV (scalar); + + if (SvTYPE (scalar) >= SVt_PVMG) + { + if (SvSTASH (scalar) == bool_stash) + return 1; + + if (!SvOBJECT (scalar) && ref_bool_type (scalar) >= 0) + return 1; + } + + return 0; +} + +///////////////////////////////////////////////////////////////////////////// +// encoder + +// structure used for encoding JSON +typedef struct +{ + char *cur; // SvPVX (sv) + current output position + char *end; // SvEND (sv) + SV *sv; // result scalar + JSON json; + U32 indent; // indentation level + UV limit; // escape character values >= this value when encoding +} enc_t; + +INLINE void +need (enc_t *enc, STRLEN len) +{ + if (expect_false ((uintptr_t)(enc->end - enc->cur) < len)) + { + STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); + char *buf = json_sv_grow (enc->sv, cur, len); + enc->cur = buf + cur; + enc->end = buf + SvLEN (enc->sv) - 1; + } +} + +INLINE void +encode_ch (enc_t *enc, char ch) +{ + need (enc, 1); + *enc->cur++ = ch; +} + +static void +encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8) +{ + char *end = str + len; + + need (enc, len); + + while (str < end) + { + unsigned char ch = *(unsigned char *)str; + + if (expect_true (ch >= 0x20 && ch < 0x80)) // most common case + { + if (expect_false (ch == '"')) // but with slow exceptions + { + need (enc, len + 1); + *enc->cur++ = '\\'; + *enc->cur++ = '"'; + } + else if (expect_false (ch == '\\')) + { + need (enc, len + 1); + *enc->cur++ = '\\'; + *enc->cur++ = '\\'; + } + else + *enc->cur++ = ch; + + ++str; + } + else + { + switch (ch) + { + case '\010': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break; + case '\011': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break; + case '\012': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break; + case '\014': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break; + case '\015': need (enc, len + 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break; + + default: + { + STRLEN clen; + UV uch; + + if (is_utf8) + { + uch = decode_utf8 (str, end - str, &clen); + if (clen == (STRLEN)-1) + croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str); + } + else + { + uch = ch; + clen = 1; + } + + if (uch < 0x80/*0x20*/ || uch >= enc->limit) + { + if (uch >= 0x10000UL) + { + if (uch >= 0x110000UL) + croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); + + need (enc, len + 11); + sprintf (enc->cur, "\\u%04x\\u%04x", + (int)((uch - 0x10000) / 0x400 + 0xD800), + (int)((uch - 0x10000) % 0x400 + 0xDC00)); + enc->cur += 12; + } + else + { + need (enc, len + 5); + *enc->cur++ = '\\'; + *enc->cur++ = 'u'; + *enc->cur++ = PL_hexdigit [ uch >> 12 ]; + *enc->cur++ = PL_hexdigit [(uch >> 8) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 4) & 15]; + *enc->cur++ = PL_hexdigit [(uch >> 0) & 15]; + } + + str += clen; + } + else if (enc->json.flags & F_LATIN1) + { + *enc->cur++ = uch; + str += clen; + } + else if (is_utf8) + { + need (enc, len + clen); + do + { + *enc->cur++ = *str++; + } + while (--clen); + } + else + { + need (enc, len + UTF8_MAXBYTES - 1); // never more than 11 bytes needed + enc->cur = encode_utf8 (enc->cur, uch); + ++str; + } + } + } + } + + --len; + } +} + +INLINE void +encode_indent (enc_t *enc) +{ + if (enc->json.flags & F_INDENT) + { + int spaces = enc->indent * INDENT_STEP; + + need (enc, spaces); + memset (enc->cur, ' ', spaces); + enc->cur += spaces; + } +} + +INLINE void +encode_space (enc_t *enc) +{ + need (enc, 1); + encode_ch (enc, ' '); +} + +INLINE void +encode_nl (enc_t *enc) +{ + if (enc->json.flags & F_INDENT) + { + need (enc, 1); + encode_ch (enc, '\n'); + } +} + +INLINE void +encode_comma (enc_t *enc) +{ + encode_ch (enc, ','); + + if (enc->json.flags & F_INDENT) + encode_nl (enc); + else if (enc->json.flags & F_SPACE_AFTER) + encode_space (enc); +} + +static void encode_sv (enc_t *enc, SV *sv); + +static void +encode_av (enc_t *enc, AV *av) +{ + int i, len = av_len (av); + + if (enc->indent >= enc->json.max_depth) + croak (ERR_NESTING_EXCEEDED); + + encode_ch (enc, '['); + + if (len >= 0) + { + encode_nl (enc); ++enc->indent; + + for (i = 0; i <= len; ++i) + { + SV **svp = av_fetch (av, i, 0); + + encode_indent (enc); + + if (svp) + encode_sv (enc, *svp); + else + encode_str (enc, "null", 4, 0); + + if (i < len) + encode_comma (enc); + } + + encode_nl (enc); --enc->indent; encode_indent (enc); + } + + encode_ch (enc, ']'); +} + +static void +encode_hk (enc_t *enc, HE *he) +{ + encode_ch (enc, '"'); + + if (HeKLEN (he) == HEf_SVKEY) + { + SV *sv = HeSVKEY (he); + STRLEN len; + char *str; + + SvGETMAGIC (sv); + str = SvPV (sv, len); + + encode_str (enc, str, len, SvUTF8 (sv)); + } + else + encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he)); + + encode_ch (enc, '"'); + + if (enc->json.flags & F_SPACE_BEFORE) encode_space (enc); + encode_ch (enc, ':'); + if (enc->json.flags & F_SPACE_AFTER ) encode_space (enc); +} + +// compare hash entries, used when all keys are bytestrings +static int +he_cmp_fast (const void *a_, const void *b_) +{ + int cmp; + + HE *a = *(HE **)a_; + HE *b = *(HE **)b_; + + STRLEN la = HeKLEN (a); + STRLEN lb = HeKLEN (b); + + if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la))) + cmp = lb - la; + + return cmp; +} + +// compare hash entries, used when some keys are sv's or utf-x +static int +he_cmp_slow (const void *a, const void *b) +{ + return sv_cmp (HeSVKEY_force (*(HE **)b), HeSVKEY_force (*(HE **)a)); +} + +static void +encode_hv (enc_t *enc, HV *hv) +{ + HE *he; + + if (enc->indent >= enc->json.max_depth) + croak (ERR_NESTING_EXCEEDED); + + encode_ch (enc, '{'); + + // for canonical output we have to sort by keys first + // actually, this is mostly due to the stupid so-called + // security workaround added somewhere in 5.8.x + // that randomises hash orderings + if (enc->json.flags & F_CANONICAL && !SvRMAGICAL (hv)) + { + int count = hv_iterinit (hv); + + if (SvMAGICAL (hv)) + { + // need to count by iterating. could improve by dynamically building the vector below + // but I don't care for the speed of this special case. + // note also that we will run into undefined behaviour when the two iterations + // do not result in the same count, something I might care for in some later release. + + count = 0; + while (hv_iternext (hv)) + ++count; + + hv_iterinit (hv); + } + + if (count) + { + int i, fast = 1; + HE *hes_stack [STACK_HES]; + HE **hes = hes_stack; + + // allocate larger arrays on the heap + if (count > STACK_HES) + { + SV *sv = sv_2mortal (NEWSV (0, count * sizeof (*hes))); + hes = (HE **)SvPVX (sv); + } + + i = 0; + while ((he = hv_iternext (hv))) + { + hes [i++] = he; + if (HeKLEN (he) < 0 || HeKUTF8 (he)) + fast = 0; + } + + assert (i == count); + + if (fast) + qsort (hes, count, sizeof (HE *), he_cmp_fast); + else + { + // hack to forcefully disable "use bytes" + COP cop = *PL_curcop; + cop.op_private = 0; + + ENTER; + SAVETMPS; + + SAVEVPTR (PL_curcop); + PL_curcop = &cop; + + qsort (hes, count, sizeof (HE *), he_cmp_slow); + + FREETMPS; + LEAVE; + } + + encode_nl (enc); ++enc->indent; + + while (count--) + { + encode_indent (enc); + he = hes [count]; + encode_hk (enc, he); + encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); + + if (count) + encode_comma (enc); + } + + encode_nl (enc); --enc->indent; encode_indent (enc); + } + } + else + { + if (hv_iterinit (hv) || SvMAGICAL (hv)) + if ((he = hv_iternext (hv))) + { + encode_nl (enc); ++enc->indent; + + for (;;) + { + encode_indent (enc); + encode_hk (enc, he); + encode_sv (enc, expect_false (SvMAGICAL (hv)) ? hv_iterval (hv, he) : HeVAL (he)); + + if (!(he = hv_iternext (hv))) + break; + + encode_comma (enc); + } + + encode_nl (enc); --enc->indent; encode_indent (enc); + } + } + + encode_ch (enc, '}'); +} + +// encode objects, arrays and special \0=false and \1=true values. +static void +encode_rv (enc_t *enc, SV *sv) +{ + svtype svt; + GV *method; + + SvGETMAGIC (sv); + svt = SvTYPE (sv); + + if (expect_false (SvOBJECT (sv))) + { + HV *stash = SvSTASH (sv); + + if (stash == bool_stash) + { + if (SvIV (sv)) + encode_str (enc, "true", 4, 0); + else + encode_str (enc, "false", 5, 0); + } + else if ((enc->json.flags & F_ALLOW_TAGS) && (method = gv_fetchmethod_autoload (stash, "FREEZE", 0))) + { + int count; + dSP; + + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); + EXTEND (SP, 2); + // we re-bless the reference to get overload and other niceties right + PUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); + PUSHs (sv_json); + + PUTBACK; + count = call_sv ((SV *)GvCV (method), G_ARRAY); + SPAGAIN; + + // catch this surprisingly common error + if (SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::FREEZE method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); + + encode_ch (enc, '('); + encode_ch (enc, '"'); + encode_str (enc, HvNAME (stash), HvNAMELEN (stash), HvNAMEUTF8 (stash)); + encode_ch (enc, '"'); + encode_ch (enc, ')'); + encode_ch (enc, '['); + + while (count) + { + encode_sv (enc, SP[1 - count--]); + + if (count) + encode_ch (enc, ','); + } + + encode_ch (enc, ']'); + + FREETMPS; LEAVE; + } + else if ((enc->json.flags & F_CONV_BLESSED) && (method = gv_fetchmethod_autoload (stash, "TO_JSON", 0))) + { + dSP; + + ENTER; SAVETMPS; + PUSHMARK (SP); + // we re-bless the reference to get overload and other niceties right + XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), stash)); + + // calling with G_SCALAR ensures that we always get a 1 return value + PUTBACK; + call_sv ((SV *)GvCV (method), G_SCALAR); + SPAGAIN; + + // catch this surprisingly common error + if (SvROK (TOPs) && SvRV (TOPs) == sv) + croak ("%s::TO_JSON method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); + + sv = POPs; + PUTBACK; + + encode_sv (enc, sv); + + FREETMPS; LEAVE; + } + else if (enc->json.flags & F_ALLOW_BLESSED) + encode_str (enc, "null", 4, 0); + else + croak ("encountered object '%s', but neither allow_blessed, convert_blessed nor allow_tags settings are enabled (or TO_JSON/FREEZE method missing)", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + } + else if (svt == SVt_PVHV) + encode_hv (enc, (HV *)sv); + else if (svt == SVt_PVAV) + encode_av (enc, (AV *)sv); + else if (svt < SVt_PVAV) + { + int bool_type = ref_bool_type (sv); + + if (bool_type == 1) + encode_str (enc, "true", 4, 0); + else if (bool_type == 0) + encode_str (enc, "false", 5, 0); + else if (enc->json.flags & F_ALLOW_UNKNOWN) + encode_str (enc, "null", 4, 0); + else + croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); + } + else if (enc->json.flags & F_ALLOW_UNKNOWN) + encode_str (enc, "null", 4, 0); + else + croak ("encountered %s, but JSON can only represent references to arrays or hashes", + SvPV_nolen (sv_2mortal (newRV_inc (sv)))); +} + +static void +encode_sv (enc_t *enc, SV *sv) +{ + SvGETMAGIC (sv); + + if (SvPOKp (sv)) + { + STRLEN len; + char *str = SvPV (sv, len); + encode_ch (enc, '"'); + encode_str (enc, str, len, SvUTF8 (sv)); + encode_ch (enc, '"'); + } + else if (SvNOKp (sv)) + { + // trust that perl will do the right thing w.r.t. JSON syntax. + need (enc, NV_DIG + 32); + Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur); + enc->cur += strlen (enc->cur); + } + else if (SvIOKp (sv)) + { + // we assume we can always read an IV as a UV and vice versa + // we assume two's complement + // we assume no aliasing issues in the union + if (SvIsUV (sv) ? SvUVX (sv) <= 59000 + : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) + { + // optimise the "small number case" + // code will likely be branchless and use only a single multiplication + // works for numbers up to 59074 + I32 i = SvIVX (sv); + U32 u; + char digit, nz = 0; + + need (enc, 6); + + *enc->cur = '-'; enc->cur += i < 0 ? 1 : 0; + u = i < 0 ? -i : i; + + // convert to 4.28 fixed-point representation + u = u * ((0xfffffff + 10000) / 10000); // 10**5, 5 fractional digits + + // now output digit by digit, each time masking out the integer part + // and multiplying by 5 while moving the decimal point one to the right, + // resulting in a net multiplication by 10. + // we always write the digit to memory but conditionally increment + // the pointer, to enable the use of conditional move instructions. + digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5; + digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5; + digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5; + digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5; + digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' + } + else + { + // large integer, use the (rather slow) snprintf way. + need (enc, IVUV_MAXCHARS); + enc->cur += + SvIsUV(sv) + ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) + : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); + } + } + else if (SvROK (sv)) + encode_rv (enc, SvRV (sv)); + else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) + encode_str (enc, "null", 4, 0); + else + croak ("encountered perl type (%s,0x%x) that JSON cannot handle, check your input data", + SvPV_nolen (sv), (unsigned int)SvFLAGS (sv)); +} + +static SV * +encode_json (SV *scalar, JSON *json) +{ + enc_t enc; + + if (!(json->flags & F_ALLOW_NONREF) && json_nonref (scalar)) + croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)"); + + enc.json = *json; + enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); + enc.cur = SvPVX (enc.sv); + enc.end = SvEND (enc.sv); + enc.indent = 0; + enc.limit = enc.json.flags & F_ASCII ? 0x000080UL + : enc.json.flags & F_LATIN1 ? 0x000100UL + : 0x110000UL; + + SvPOK_only (enc.sv); + encode_sv (&enc, scalar); + encode_nl (&enc); + + SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); + *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings + + if (!(enc.json.flags & (F_ASCII | F_LATIN1 | F_UTF8))) + SvUTF8_on (enc.sv); + + if (enc.json.flags & F_SHRINK) + shrink (enc.sv); + + return enc.sv; +} + +///////////////////////////////////////////////////////////////////////////// +// decoder + +// structure used for decoding JSON +typedef struct +{ + char *cur; // current parser pointer + char *end; // end of input string + const char *err; // parse error, if != 0 + JSON json; + U32 depth; // recursion depth + U32 maxdepth; // recursion depth limit +} dec_t; + +INLINE void +decode_comment (dec_t *dec) +{ + // only '#'-style comments allowed a.t.m. + + while (*dec->cur && *dec->cur != 0x0a && *dec->cur != 0x0d) + ++dec->cur; +} + +INLINE void +decode_ws (dec_t *dec) +{ + for (;;) + { + char ch = *dec->cur; + + if (ch > 0x20) + { + if (expect_false (ch == '#')) + { + if (dec->json.flags & F_RELAXED) + decode_comment (dec); + else + break; + } + else + break; + } + else if (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09) + break; // parse error, but let higher level handle it, gives better error messages + + ++dec->cur; + } +} + +#define ERR(reason) SB dec->err = reason; goto fail; SE + +#define EXPECT_CH(ch) SB \ + if (*dec->cur != ch) \ + ERR (# ch " expected"); \ + ++dec->cur; \ + SE + +#define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED) +#define DEC_DEC_DEPTH --dec->depth + +static SV *decode_sv (dec_t *dec); + +static signed char decode_hexdigit[256]; + +static UV +decode_4hex (dec_t *dec) +{ + signed char d1, d2, d3, d4; + unsigned char *cur = (unsigned char *)dec->cur; + + d1 = decode_hexdigit [cur [0]]; if (expect_false (d1 < 0)) ERR ("exactly four hexadecimal digits expected"); + d2 = decode_hexdigit [cur [1]]; if (expect_false (d2 < 0)) ERR ("exactly four hexadecimal digits expected"); + d3 = decode_hexdigit [cur [2]]; if (expect_false (d3 < 0)) ERR ("exactly four hexadecimal digits expected"); + d4 = decode_hexdigit [cur [3]]; if (expect_false (d4 < 0)) ERR ("exactly four hexadecimal digits expected"); + + dec->cur += 4; + + return ((UV)d1) << 12 + | ((UV)d2) << 8 + | ((UV)d3) << 4 + | ((UV)d4); + +fail: + return (UV)-1; +} + +static SV * +decode_str (dec_t *dec) +{ + SV *sv = 0; + int utf8 = 0; + char *dec_cur = dec->cur; + + do + { + char buf [SHORT_STRING_LEN + UTF8_MAXBYTES]; + char *cur = buf; + + do + { + unsigned char ch = *(unsigned char *)dec_cur++; + + if (expect_false (ch == '"')) + { + --dec_cur; + break; + } + else if (expect_false (ch == '\\')) + { + switch (*dec_cur) + { + case '\\': + case '/': + case '"': *cur++ = *dec_cur++; break; + + case 'b': ++dec_cur; *cur++ = '\010'; break; + case 't': ++dec_cur; *cur++ = '\011'; break; + case 'n': ++dec_cur; *cur++ = '\012'; break; + case 'f': ++dec_cur; *cur++ = '\014'; break; + case 'r': ++dec_cur; *cur++ = '\015'; break; + + case 'u': + { + UV lo, hi; + ++dec_cur; + + dec->cur = dec_cur; + hi = decode_4hex (dec); + dec_cur = dec->cur; + if (hi == (UV)-1) + goto fail; + + // possibly a surrogate pair + if (hi >= 0xd800) + if (hi < 0xdc00) + { + if (dec_cur [0] != '\\' || dec_cur [1] != 'u') + ERR ("missing low surrogate character in surrogate pair"); + + dec_cur += 2; + + dec->cur = dec_cur; + lo = decode_4hex (dec); + dec_cur = dec->cur; + if (lo == (UV)-1) + goto fail; + + if (lo < 0xdc00 || lo >= 0xe000) + ERR ("surrogate pair expected"); + + hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000; + } + else if (hi < 0xe000) + ERR ("missing high surrogate character in surrogate pair"); + + if (hi >= 0x80) + { + utf8 = 1; + + cur = encode_utf8 (cur, hi); + } + else + *cur++ = hi; + } + break; + + default: + --dec_cur; + ERR ("illegal backslash escape sequence in string"); + } + } + else if (expect_true (ch >= 0x20 && ch < 0x80)) + *cur++ = ch; + else if (ch >= 0x80) + { + STRLEN clen; + + --dec_cur; + + decode_utf8 (dec_cur, dec->end - dec_cur, &clen); + if (clen == (STRLEN)-1) + ERR ("malformed UTF-8 character in JSON string"); + + do + *cur++ = *dec_cur++; + while (--clen); + + utf8 = 1; + } + else if (ch == '\t' && dec->json.flags & F_RELAXED) + *cur++ = ch; + else + { + --dec_cur; + + if (!ch) + ERR ("unexpected end of string while parsing JSON string"); + else + ERR ("invalid character encountered while parsing JSON string"); + } + } + while (cur < buf + SHORT_STRING_LEN); + + { + STRLEN len = cur - buf; + + if (sv) + { + STRLEN cur = SvCUR (sv); + + if (SvLEN (sv) - cur <= len) + json_sv_grow (sv, cur, len); + + memcpy (SvPVX (sv) + SvCUR (sv), buf, len); + SvCUR_set (sv, SvCUR (sv) + len); + } + else + sv = newSVpvn (buf, len); + } + } + while (*dec_cur != '"'); + + ++dec_cur; + + if (sv) + { + SvPOK_only (sv); + *SvEND (sv) = 0; + + if (utf8) + SvUTF8_on (sv); + } + else + sv = newSVpvn ("", 0); + + dec->cur = dec_cur; + return sv; + +fail: + dec->cur = dec_cur; + return 0; +} + +static SV * +decode_num (dec_t *dec) +{ + int is_nv = 0; + char *start = dec->cur; + + // [minus] + if (*dec->cur == '-') + ++dec->cur; + + if (*dec->cur == '0') + { + ++dec->cur; + if (*dec->cur >= '0' && *dec->cur <= '9') + ERR ("malformed number (leading zero must not be followed by another digit)"); + } + else if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after initial minus)"); + else + do + { + ++dec->cur; + } + while (*dec->cur >= '0' && *dec->cur <= '9'); + + // [frac] + if (*dec->cur == '.') + { + ++dec->cur; + + if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after decimal point)"); + + do + { + ++dec->cur; + } + while (*dec->cur >= '0' && *dec->cur <= '9'); + + is_nv = 1; + } + + // [exp] + if (*dec->cur == 'e' || *dec->cur == 'E') + { + ++dec->cur; + + if (*dec->cur == '-' || *dec->cur == '+') + ++dec->cur; + + if (*dec->cur < '0' || *dec->cur > '9') + ERR ("malformed number (no digits after exp sign)"); + + do + { + ++dec->cur; + } + while (*dec->cur >= '0' && *dec->cur <= '9'); + + is_nv = 1; + } + + if (!is_nv) + { + int len = dec->cur - start; + + // special case the rather common 1..5-digit-int case + if (*start == '-') + switch (len) + { + case 2: return newSViv (-(IV)( start [1] - '0' * 1)); + case 3: return newSViv (-(IV)( start [1] * 10 + start [2] - '0' * 11)); + case 4: return newSViv (-(IV)( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); + case 5: return newSViv (-(IV)( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); + case 6: return newSViv (-(IV)(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); + } + else + switch (len) + { + case 1: return newSViv ( start [0] - '0' * 1); + case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); + case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); + case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); + case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); + } + + { + UV uv; + int numtype = grok_number (start, len, &uv); + if (numtype & IS_NUMBER_IN_UV) + if (numtype & IS_NUMBER_NEG) + { + if (uv < (UV)IV_MIN) + return newSViv (-(IV)uv); + } + else + return newSVuv (uv); + } + + len -= *start == '-' ? 1 : 0; + + // does not fit into IV or UV, try NV + if (len <= NV_DIG) + // fits into NV without loss of precision + return newSVnv (json_atof (start)); + + // everything else fails, convert it to a string + return newSVpvn (start, dec->cur - start); + } + + // loss of precision here + return newSVnv (json_atof (start)); + +fail: + return 0; +} + +static SV * +decode_av (dec_t *dec) +{ + AV *av = newAV (); + + DEC_INC_DEPTH; + decode_ws (dec); + + if (*dec->cur == ']') + ++dec->cur; + else + for (;;) + { + SV *value; + + value = decode_sv (dec); + if (!value) + goto fail; + + av_push (av, value); + + decode_ws (dec); + + if (*dec->cur == ']') + { + ++dec->cur; + break; + } + + if (*dec->cur != ',') + ERR (", or ] expected while parsing array"); + + ++dec->cur; + + decode_ws (dec); + + if (*dec->cur == ']' && dec->json.flags & F_RELAXED) + { + ++dec->cur; + break; + } + } + + DEC_DEC_DEPTH; + return newRV_noinc ((SV *)av); + +fail: + SvREFCNT_dec (av); + DEC_DEC_DEPTH; + return 0; +} + +static SV * +decode_hv (dec_t *dec) +{ + SV *sv; + HV *hv = newHV (); + + DEC_INC_DEPTH; + decode_ws (dec); + + if (*dec->cur == '}') + ++dec->cur; + else + for (;;) + { + EXPECT_CH ('"'); + + // heuristic: assume that + // a) decode_str + hv_store_ent are abysmally slow. + // b) most hash keys are short, simple ascii text. + // => try to "fast-match" such strings to avoid + // the overhead of decode_str + hv_store_ent. + { + SV *value; + char *p = dec->cur; + char *e = p + 24; // only try up to 24 bytes + + for (;;) + { + // the >= 0x80 is false on most architectures + if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') + { + // slow path, back up and use decode_str + SV *key = decode_str (dec); + if (!key) + goto fail; + + decode_ws (dec); EXPECT_CH (':'); + + decode_ws (dec); + value = decode_sv (dec); + if (!value) + { + SvREFCNT_dec (key); + goto fail; + } + + hv_store_ent (hv, key, value, 0); + SvREFCNT_dec (key); + + break; + } + else if (*p == '"') + { + // fast path, got a simple key + char *key = dec->cur; + int len = p - key; + dec->cur = p + 1; + + decode_ws (dec); EXPECT_CH (':'); + + decode_ws (dec); + value = decode_sv (dec); + if (!value) + goto fail; + + hv_store (hv, key, len, value, 0); + + break; + } + + ++p; + } + } + + decode_ws (dec); + + if (*dec->cur == '}') + { + ++dec->cur; + break; + } + + if (*dec->cur != ',') + ERR (", or } expected while parsing object/hash"); + + ++dec->cur; + + decode_ws (dec); + + if (*dec->cur == '}' && dec->json.flags & F_RELAXED) + { + ++dec->cur; + break; + } + } + + DEC_DEC_DEPTH; + sv = newRV_noinc ((SV *)hv); + + // check filter callbacks + if (dec->json.flags & F_HOOK) + { + if (dec->json.cb_sk_object && HvKEYS (hv) == 1) + { + HE *cb, *he; + + hv_iterinit (hv); + he = hv_iternext (hv); + hv_iterinit (hv); + + // the next line creates a mortal sv each time it's called. + // might want to optimise this for common cases. + cb = hv_fetch_ent (dec->json.cb_sk_object, hv_iterkeysv (he), 0, 0); + + if (cb) + { + dSP; + int count; + + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); + XPUSHs (HeVAL (he)); + sv_2mortal (sv); + + PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN; + + if (count == 1) + { + sv = newSVsv (POPs); + FREETMPS; LEAVE; + return sv; + } + + SvREFCNT_inc (sv); + FREETMPS; LEAVE; + } + } + + if (dec->json.cb_object) + { + dSP; + int count; + + ENTER; SAVETMPS; + SAVESTACK_POS (); + PUSHMARK (SP); + XPUSHs (sv_2mortal (sv)); + + PUTBACK; count = call_sv (dec->json.cb_object, G_ARRAY); SPAGAIN; + + if (count == 1) + { + sv = newSVsv (POPs); + FREETMPS; LEAVE; + return sv; + } + + SvREFCNT_inc (sv); + FREETMPS; LEAVE; + } + } + + return sv; + +fail: + SvREFCNT_dec (hv); + DEC_DEC_DEPTH; + return 0; +} + +static SV * +decode_tag (dec_t *dec) +{ + SV *tag = 0; + SV *val = 0; + + if (!(dec->json.flags & F_ALLOW_TAGS)) + ERR ("malformed JSON string, neither array, object, number, string or atom"); + + ++dec->cur; + + decode_ws (dec); + + tag = decode_sv (dec); + if (!tag) + goto fail; + + if (!SvPOK (tag)) + ERR ("malformed JSON string, (tag) must be a string"); + + decode_ws (dec); + + if (*dec->cur != ')') + ERR (") expected after tag"); + + ++dec->cur; + + decode_ws (dec); + + val = decode_sv (dec); + if (!val) + goto fail; + + if (!SvROK (val) || SvTYPE (SvRV (val)) != SVt_PVAV) + ERR ("malformed JSON string, tag value must be an array"); + + { + AV *av = (AV *)SvRV (val); + int i, len = av_len (av) + 1; + HV *stash = gv_stashsv (tag, 0); + SV *sv; + + if (!stash) + ERR ("cannot decode perl-object (package does not exist)"); + + GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); + + if (!method) + ERR ("cannot decode perl-object (package does not have a THAW method)"); + + dSP; + + ENTER; SAVETMPS; + PUSHMARK (SP); + EXTEND (SP, len + 2); + // we re-bless the reference to get overload and other niceties right + PUSHs (tag); + PUSHs (sv_json); + + for (i = 0; i < len; ++i) + PUSHs (*av_fetch (av, i, 1)); + + PUTBACK; + call_sv ((SV *)GvCV (method), G_SCALAR); + SPAGAIN; + + SvREFCNT_dec (tag); + SvREFCNT_dec (val); + sv = SvREFCNT_inc (POPs); + + PUTBACK; + + FREETMPS; LEAVE; + + return sv; + } + +fail: + SvREFCNT_dec (tag); + SvREFCNT_dec (val); + return 0; +} + +static SV * +decode_sv (dec_t *dec) +{ + // the beauty of JSON: you need exactly one character lookahead + // to parse everything. + switch (*dec->cur) + { + case '"': ++dec->cur; return decode_str (dec); + case '[': ++dec->cur; return decode_av (dec); + case '{': ++dec->cur; return decode_hv (dec); + case '(': return decode_tag (dec); + + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return decode_num (dec); + + case 't': + if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4)) + { + dec->cur += 4; +#if JSON_SLOW + bool_true = get_bool ("Types::Serialiser::true"); +#endif + return newSVsv (bool_true); + } + else + ERR ("'true' expected"); + + break; + + case 'f': + if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5)) + { + dec->cur += 5; +#if JSON_SLOW + bool_false = get_bool ("Types::Serialiser::false"); +#endif + return newSVsv (bool_false); + } + else + ERR ("'false' expected"); + + break; + + case 'n': + if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4)) + { + dec->cur += 4; + return newSVsv (&PL_sv_undef); + } + else + ERR ("'null' expected"); + + break; + + default: + ERR ("malformed JSON string, neither tag, array, object, number, string or atom"); + break; + } + +fail: + return 0; +} + +static SV * +decode_json (SV *string, JSON *json, STRLEN *offset_return) +{ + dec_t dec; + SV *sv; + + /* work around bugs in 5.10 where manipulating magic values + * makes perl ignore the magic in subsequent accesses. + * also make a copy of non-PV values, to get them into a clean + * state (SvPV should do that, but it's buggy, see below). + * + * SvIsCOW_shared_hash works around a bug in perl (possibly 5.16), + * as reported by Reini Urban. + */ + /*SvGETMAGIC (string);*/ + if (SvMAGICAL (string) || !SvPOK (string) || SvIsCOW_shared_hash (string)) + string = sv_2mortal (newSVsv (string)); + + SvUPGRADE (string, SVt_PV); + + /* work around a bug in perl 5.10, which causes SvCUR to fail an + * assertion with -DDEBUGGING, although SvCUR is documented to + * return the xpv_cur field which certainly exists after upgrading. + * according to nicholas clark, calling SvPOK fixes this. + * But it doesn't fix it, so try another workaround, call SvPV_nolen + * and hope for the best. + * Damnit, SvPV_nolen still trips over yet another assertion. This + * assertion business is seriously broken, try yet another workaround + * for the broken -DDEBUGGING. + */ + { +#ifdef DEBUGGING + STRLEN offset = SvOK (string) ? sv_len (string) : 0; +#else + STRLEN offset = SvCUR (string); +#endif + + if (offset > json->max_size && json->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)SvCUR (string), (unsigned long)json->max_size); + } + + if (DECODE_WANTS_OCTETS (json)) + sv_utf8_downgrade (string, 0); + else + sv_utf8_upgrade (string); + + SvGROW (string, SvCUR (string) + 1); // should basically be a NOP + + dec.json = *json; + dec.cur = SvPVX (string); + dec.end = SvEND (string); + dec.err = 0; + dec.depth = 0; + + if (dec.json.cb_object || dec.json.cb_sk_object) + dec.json.flags |= F_HOOK; + + *dec.end = 0; // this should basically be a nop, too, but make sure it's there + + decode_ws (&dec); + sv = decode_sv (&dec); + + if (offset_return) + *offset_return = dec.cur - SvPVX (string); + else if (sv) + { + // check for trailing garbage + decode_ws (&dec); + + if (*dec.cur) + { + dec.err = "garbage after JSON object"; + SvREFCNT_dec (sv); + sv = 0; + } + } + + if (!sv) + { + SV *uni = sv_newmortal (); + + // horrible hack to silence warning inside pv_uni_display + COP cop = *PL_curcop; + cop.cop_warnings = pWARN_NONE; + ENTER; + SAVEVPTR (PL_curcop); + PL_curcop = &cop; + pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ); + LEAVE; + + croak ("%s, at character offset %d (before \"%s\")", + dec.err, + (int)ptr_to_index (string, dec.cur), + dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)"); + } + + sv = sv_2mortal (sv); + + if (!(dec.json.flags & F_ALLOW_NONREF) && json_nonref (sv)) + croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)"); + + return sv; +} + +///////////////////////////////////////////////////////////////////////////// +// incremental parser + +static void +incr_parse (JSON *self) +{ + const char *p = SvPVX (self->incr_text) + self->incr_pos; + + // the state machine here is a bit convoluted and could be simplified a lot + // but this would make it slower, so... + + for (;;) + { + //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D + switch (self->incr_mode) + { + // only used for initial whitespace skipping + case INCR_M_WS: + for (;;) + { + if (*p > 0x20) + { + if (*p == '#') + { + self->incr_mode = INCR_M_C0; + goto incr_m_c; + } + else + { + self->incr_mode = INCR_M_JSON; + goto incr_m_json; + } + } + else if (!*p) + goto interrupt; + + ++p; + } + + // skip a single char inside a string (for \\-processing) + case INCR_M_BS: + if (!*p) + goto interrupt; + + ++p; + self->incr_mode = INCR_M_STR; + goto incr_m_str; + + // inside #-style comments + case INCR_M_C0: + case INCR_M_C1: + incr_m_c: + for (;;) + { + if (*p == '\n') + { + self->incr_mode = self->incr_mode == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; + break; + } + else if (!*p) + goto interrupt; + + ++p; + } + + break; + + // inside a string + case INCR_M_STR: + incr_m_str: + for (;;) + { + if (*p == '"') + { + ++p; + self->incr_mode = INCR_M_JSON; + + if (!self->incr_nest) + goto interrupt; + + goto incr_m_json; + } + else if (*p == '\\') + { + ++p; // "virtually" consumes character after \ + + if (!*p) // if at end of string we have to switch modes + { + self->incr_mode = INCR_M_BS; + goto interrupt; + } + } + else if (!*p) + goto interrupt; + + ++p; + } + + // after initial ws, outside string + case INCR_M_JSON: + incr_m_json: + for (;;) + { + switch (*p++) + { + case 0: + --p; + goto interrupt; + + case 0x09: + case 0x0a: + case 0x0d: + case 0x20: + if (!self->incr_nest) + { + --p; // do not eat the whitespace, let the next round do it + goto interrupt; + } + break; + + case '"': + self->incr_mode = INCR_M_STR; + goto incr_m_str; + + case '[': + case '{': + case '(': + if (++self->incr_nest > self->max_depth) + croak (ERR_NESTING_EXCEEDED); + break; + + case ']': + case '}': + if (--self->incr_nest <= 0) + goto interrupt; + break; + + case ')': + --self->incr_nest; + break; + + case '#': + self->incr_mode = INCR_M_C1; + goto incr_m_c; + } + } + } + + modechange: + ; + } + +interrupt: + self->incr_pos = p - SvPVX (self->incr_text); + //printf ("interrupt<%.*s>\n", self->incr_pos, SvPVX(self->incr_text));//D + //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D +} + +///////////////////////////////////////////////////////////////////////////// +// XS interface functions + +MODULE = JSON::XS PACKAGE = JSON::XS + +BOOT: +{ + int i; + + for (i = 0; i < 256; ++i) + decode_hexdigit [i] = + i >= '0' && i <= '9' ? i - '0' + : i >= 'a' && i <= 'f' ? i - 'a' + 10 + : i >= 'A' && i <= 'F' ? i - 'A' + 10 + : -1; + + json_stash = gv_stashpv ("JSON::XS" , 1); + bool_stash = gv_stashpv ("Types::Serialiser::Boolean", 1); + bool_true = get_bool ("Types::Serialiser::true"); + bool_false = get_bool ("Types::Serialiser::false"); + + sv_json = newSVpv ("JSON", 0); + SvREADONLY_on (sv_json); + + CvNODEBUG_on (get_cv ("JSON::XS::incr_text", 0)); /* the debugger completely breaks lvalue subs */ +} + +PROTOTYPES: DISABLE + +void CLONE (...) + CODE: + json_stash = 0; + bool_stash = 0; + +void new (char *klass) + PPCODE: +{ + SV *pv = NEWSV (0, sizeof (JSON)); + SvPOK_only (pv); + json_init ((JSON *)SvPVX (pv)); + XPUSHs (sv_2mortal (sv_bless ( + newRV_noinc (pv), + strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) + ))); +} + +void ascii (JSON *self, int enable = 1) + ALIAS: + ascii = F_ASCII + latin1 = F_LATIN1 + utf8 = F_UTF8 + indent = F_INDENT + canonical = F_CANONICAL + space_before = F_SPACE_BEFORE + space_after = F_SPACE_AFTER + pretty = F_PRETTY + allow_nonref = F_ALLOW_NONREF + shrink = F_SHRINK + allow_blessed = F_ALLOW_BLESSED + convert_blessed = F_CONV_BLESSED + relaxed = F_RELAXED + allow_unknown = F_ALLOW_UNKNOWN + allow_tags = F_ALLOW_TAGS + PPCODE: +{ + if (enable) + self->flags |= ix; + else + self->flags &= ~ix; + + XPUSHs (ST (0)); +} + +void get_ascii (JSON *self) + ALIAS: + get_ascii = F_ASCII + get_latin1 = F_LATIN1 + get_utf8 = F_UTF8 + get_indent = F_INDENT + get_canonical = F_CANONICAL + get_space_before = F_SPACE_BEFORE + get_space_after = F_SPACE_AFTER + get_allow_nonref = F_ALLOW_NONREF + get_shrink = F_SHRINK + get_allow_blessed = F_ALLOW_BLESSED + get_convert_blessed = F_CONV_BLESSED + get_relaxed = F_RELAXED + get_allow_unknown = F_ALLOW_UNKNOWN + get_allow_tags = F_ALLOW_TAGS + PPCODE: + XPUSHs (boolSV (self->flags & ix)); + +void max_depth (JSON *self, U32 max_depth = 0x80000000UL) + PPCODE: + self->max_depth = max_depth; + XPUSHs (ST (0)); + +U32 get_max_depth (JSON *self) + CODE: + RETVAL = self->max_depth; + OUTPUT: + RETVAL + +void max_size (JSON *self, U32 max_size = 0) + PPCODE: + self->max_size = max_size; + XPUSHs (ST (0)); + +int get_max_size (JSON *self) + CODE: + RETVAL = self->max_size; + OUTPUT: + RETVAL + +void filter_json_object (JSON *self, SV *cb = &PL_sv_undef) + PPCODE: +{ + SvREFCNT_dec (self->cb_object); + self->cb_object = SvOK (cb) ? newSVsv (cb) : 0; + + XPUSHs (ST (0)); +} + +void filter_json_single_key_object (JSON *self, SV *key, SV *cb = &PL_sv_undef) + PPCODE: +{ + if (!self->cb_sk_object) + self->cb_sk_object = newHV (); + + if (SvOK (cb)) + hv_store_ent (self->cb_sk_object, key, newSVsv (cb), 0); + else + { + hv_delete_ent (self->cb_sk_object, key, G_DISCARD, 0); + + if (!HvKEYS (self->cb_sk_object)) + { + SvREFCNT_dec (self->cb_sk_object); + self->cb_sk_object = 0; + } + } + + XPUSHs (ST (0)); +} + +void encode (JSON *self, SV *scalar) + PPCODE: + PUTBACK; scalar = encode_json (scalar, self); SPAGAIN; + XPUSHs (scalar); + +void decode (JSON *self, SV *jsonstr) + PPCODE: + PUTBACK; jsonstr = decode_json (jsonstr, self, 0); SPAGAIN; + XPUSHs (jsonstr); + +void decode_prefix (JSON *self, SV *jsonstr) + PPCODE: +{ + SV *sv; + STRLEN offset; + PUTBACK; sv = decode_json (jsonstr, self, &offset); SPAGAIN; + EXTEND (SP, 2); + PUSHs (sv); + PUSHs (sv_2mortal (newSVuv (ptr_to_index (jsonstr, SvPV_nolen (jsonstr) + offset)))); +} + +void incr_parse (JSON *self, SV *jsonstr = 0) + PPCODE: +{ + if (!self->incr_text) + self->incr_text = newSVpvn ("", 0); + + /* if utf8-ness doesn't match the decoder, need to upgrade/downgrade */ + if (!DECODE_WANTS_OCTETS (self) == !SvUTF8 (self->incr_text)) + if (DECODE_WANTS_OCTETS (self)) + { + if (self->incr_pos) + self->incr_pos = utf8_length ((U8 *)SvPVX (self->incr_text), + (U8 *)SvPVX (self->incr_text) + self->incr_pos); + + sv_utf8_downgrade (self->incr_text, 0); + } + else + { + sv_utf8_upgrade (self->incr_text); + + if (self->incr_pos) + self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) + - (U8 *)SvPVX (self->incr_text); + } + + // append data, if any + if (jsonstr) + { + /* make sure both strings have same encoding */ + if (SvUTF8 (jsonstr) != SvUTF8 (self->incr_text)) + if (SvUTF8 (jsonstr)) + sv_utf8_downgrade (jsonstr, 0); + else + sv_utf8_upgrade (jsonstr); + + /* and then just blindly append */ + { + STRLEN len; + const char *str = SvPV (jsonstr, len); + STRLEN cur = SvCUR (self->incr_text); + + if (SvLEN (self->incr_text) - cur <= len) + json_sv_grow (self->incr_text, cur, len); + + Move (str, SvEND (self->incr_text), len, char); + SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); + *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there + } + } + + if (GIMME_V != G_VOID) + do + { + SV *sv; + STRLEN offset; + + if (!INCR_DONE (self)) + { + incr_parse (self); + + if (self->incr_pos > self->max_size && self->max_size) + croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", + (unsigned long)self->incr_pos, (unsigned long)self->max_size); + + if (!INCR_DONE (self)) + { + // as an optimisation, do not accumulate white space in the incr buffer + if (self->incr_mode == INCR_M_WS && self->incr_pos) + { + self->incr_pos = 0; + SvCUR_set (self->incr_text, 0); + } + + break; + } + } + + PUTBACK; sv = decode_json (self->incr_text, self, &offset); SPAGAIN; + XPUSHs (sv); + + self->incr_pos -= offset; + self->incr_nest = 0; + self->incr_mode = 0; + + sv_chop (self->incr_text, SvPVX (self->incr_text) + offset); + } + while (GIMME_V == G_ARRAY); +} + +SV *incr_text (JSON *self) + ATTRS: lvalue + CODE: +{ + if (self->incr_pos) + croak ("incr_text can not be called when the incremental parser already started parsing"); + + RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef; +} + OUTPUT: + RETVAL + +void incr_skip (JSON *self) + CODE: +{ + if (self->incr_pos) + { + sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); + self->incr_pos = 0; + self->incr_nest = 0; + self->incr_mode = 0; + } +} + +void incr_reset (JSON *self) + CODE: +{ + SvREFCNT_dec (self->incr_text); + self->incr_text = 0; + self->incr_pos = 0; + self->incr_nest = 0; + self->incr_mode = 0; +} + +void DESTROY (JSON *self) + CODE: + SvREFCNT_dec (self->cb_sk_object); + SvREFCNT_dec (self->cb_object); + SvREFCNT_dec (self->incr_text); + +PROTOTYPES: ENABLE + +void encode_json (SV *scalar) + PPCODE: +{ + JSON json; + json_init (&json); + json.flags |= F_UTF8; + PUTBACK; scalar = encode_json (scalar, &json); SPAGAIN; + XPUSHs (scalar); +} + +void decode_json (SV *jsonstr) + PPCODE: +{ + JSON json; + json_init (&json); + json.flags |= F_UTF8; + PUTBACK; jsonstr = decode_json (jsonstr, &json, 0); SPAGAIN; + XPUSHs (jsonstr); +} + diff --git a/XS/Boolean.pm b/XS/Boolean.pm new file mode 100644 index 0000000..67f704b --- /dev/null +++ b/XS/Boolean.pm @@ -0,0 +1,31 @@ +=head1 NAME + +JSON::XS::Boolean - dummy module providing JSON::XS::Boolean + +=head1 SYNOPSIS + + # do not "use" yourself + +=head1 DESCRIPTION + +This module exists only to provide overload resolution for Storable and +similar modules. It's only needed for compatibility with data serialised +(by other modules such as Storable) that was decoded by JSON::XS versions +before 3.0. + +Since 3.0, JSON::PP::Boolean has replaced it. Support for +JSON::XS::Boolean will be removed in a future release. + +=cut + +use JSON::XS (); + +1; + +=head1 AUTHOR + + Marc Lehmann + http://home.schmorp.de/ + +=cut + diff --git a/bin/json_xs b/bin/json_xs new file mode 100755 index 0000000..11d36c6 --- /dev/null +++ b/bin/json_xs @@ -0,0 +1,248 @@ +#!/opt/bin/perl + +=head1 NAME + +json_xs - JSON::XS commandline utility + +=head1 SYNOPSIS + + json_xs [-v] [-f inputformat] [-t outputformat] + +=head1 DESCRIPTION + +F converts between some input and output formats (one of them is +JSON). + +The default input format is C and the default output format is +C. + +=head1 OPTIONS + +=over 4 + +=item -v + +Be slightly more verbose. + +=item -f fromformat + +Read a file in the given format from STDIN. + +C can be one of: + +=over 4 + +=item json - a json text encoded, either utf-8, utf16-be/le, utf32-be/le + +=item cbor - CBOR (RFC 7049, L), a kind of binary JSON + +=item storable - a L frozen value + +=item storable-file - a L file (Storable has two incompatible formats) + +=item bencode - use L, if available (used by torrent files, among others) + +=item clzf - L format (requires that module to be installed) + +=item eval - evaluate the given code as (non-utf-8) Perl, basically the reverse of "-t dump" + +=item yaml - L format (requires that module to be installed) + +=item string - do not attempt to decode the file data + +=item none - nothing is read, creates an C scalar - mainly useful with C<-e> + +=back + +=item -t toformat + +Write the file in the given format to STDOUT. + +C can be one of: + +=over 4 + +=item json, json-utf-8 - json, utf-8 encoded + +=item json-pretty - as above, but pretty-printed + +=item json-utf-16le, json-utf-16be - little endian/big endian utf-16 + +=item json-utf-32le, json-utf-32be - little endian/big endian utf-32 + +=item cbor - CBOR (RFC 7049, L), a kind of binary JSON + +=item cbor-packed - CBOR using extensions to make it smaller + +=item storable - a L frozen value in network format + +=item storable-file - a L file in network format (Storable has two incompatible formats) + +=item bencode - use L, if available (used by torrent files, among others) + +=item clzf - L format + +=item yaml - L format + +=item dump - L + +=item dumper - L + +=item string - writes the data out as if it were a string + +=item none - nothing gets written, mainly useful together with C<-e> + +Note that Data::Dumper doesn't handle self-referential data structures +correctly - use "dump" instead. + +=back + +=item -e code + +Evaluate perl code after reading the data and before writing it out again +- can be used to filter, create or extract data. The data that has been +written is in C<$_>, and whatever is in there is written out afterwards. + +=back + +=head1 EXAMPLES + + json_xs -t none as JSON - if it +is valid JSON, the command outputs nothing, otherwise it will print an +error message and exit with non-zero exit status. + + pretty.json + +Prettify the JSON file F to F. + + json_xs -f storable-file and print a human-readable JSON +version of it to STDOUT. + + json_xs -f storable-file -t yaml {"announce-list"}}' -t string + +Print the tracker list inside a torrent file. + + lwp-request http://cpantesters.perl.org/show/JSON-XS.json | json_xs + +Fetch the cpan-testers result summary C and pretty-print it. + +=head1 AUTHOR + +Copyright (C) 2008 Marc Lehmann + +=cut + +use strict; + +use Getopt::Long; +use Storable (); +use Encode; + +use JSON::XS; + +my $opt_verbose; +my $opt_from = "json"; +my $opt_to = "json-pretty"; +my $opt_eval; + +Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order"); + +GetOptions( + "v" => \$opt_verbose, + "f=s" => \$opt_from, + "t=s" => \$opt_to, + "e=s" => \$opt_eval, +) or die "Usage: $0 [-v] -f fromformat [-e code] [-t toformat]\n"; + +my %F = ( + "none" => sub { undef }, + "string" => sub { $_ }, + "json" => sub { + my $enc = + /^\x00\x00\x00/s ? "utf-32be" + : /^\x00.\x00/s ? "utf-16be" + : /^.\x00\x00\x00/s ? "utf-32le" + : /^.\x00.\x00/s ? "utf-16le" + : "utf-8"; + warn "input text encoding is $enc\n" if $opt_verbose; + JSON::XS->new->decode (decode $enc, $_) + }, + "cbor" => sub { require CBOR::XS; CBOR::XS->new->allow_cycles->decode ($_) }, + "storable" => sub { Storable::thaw $_ }, + "storable-file" => sub { open my $fh, "<", \$_; Storable::fd_retrieve $fh }, + "bencode" => sub { require Convert::Bencode; Convert::Bencode::bdecode ($_) }, + "clzf" => sub { require Compress::LZF; Compress::LZF::sthaw ($_) }, + "yaml" => sub { require YAML::XS; YAML::XS::Load ($_) }, + "eval" => sub { my $v = eval "no strict; no warnings; no utf8;\n#line 1 \"input\"\n$_"; die "$@" if $@; $v }, +); + +my %T = ( + "none" => sub { "" }, + "string" => sub { $_ }, + "json" => sub { encode_json $_ }, + "json-utf-8" => sub { encode_json $_ }, + "json-pretty" => sub { JSON::XS->new->utf8->pretty->encode ($_) }, + "json-utf-16le" => sub { encode "utf-16le", JSON::XS->new->encode ($_) }, + "json-utf-16be" => sub { encode "utf-16be", JSON::XS->new->encode ($_) }, + "json-utf-32le" => sub { encode "utf-32le", JSON::XS->new->encode ($_) }, + "json-utf-32be" => sub { encode "utf-32be", JSON::XS->new->encode ($_) }, + "cbor" => sub { require CBOR::XS; CBOR::XS::encode_cbor ($_) }, + "cbor-packed" => sub { require CBOR::XS; CBOR::XS->new->pack_strings->encode ($_) }, + "storable" => sub { Storable::nfreeze $_ }, + "storable-file" => sub { open my $fh, ">", \my $buf; Storable::nstore_fd $_, $fh; $buf }, + "bencode" => sub { require Convert::Bencode; Convert::Bencode::bencode ($_) }, + "clzf" => sub { require Compress::LZF; Compress::LZF::sfreeze_cr ($_) }, + "yaml" => sub { require YAML::XS; YAML::XS::Dump ($_) }, + "dumper" => sub { + require Data::Dumper; + #local $Data::Dumper::Purity = 1; # hopeless case + local $Data::Dumper::Terse = 1; + local $Data::Dumper::Indent = 1; + local $Data::Dumper::Useqq = 1; + local $Data::Dumper::Quotekeys = 0; + local $Data::Dumper::Sortkeys = 1; + Data::Dumper::Dumper($_) + }, + "dump" => sub { + require Data::Dump; + local $Data::Dump::TRY_BASE64 = 0; + Data::Dump::dump ($_) . "\n" + }, +); + +$F{$opt_from} + or die "$opt_from: not a valid fromformat\n"; + +$T{$opt_to} + or die "$opt_to: not a valid toformat\n"; + +if ($opt_from ne "none") { + local $/; + binmode STDIN; # stupid perl sometimes thinks its funny + $_ = ; +} + +$_ = $F{$opt_from}->(); + +eval $opt_eval; +die $@ if $@; + +$_ = $T{$opt_to}->(); + +binmode STDOUT; +syswrite STDOUT, $_; + + + diff --git a/eg/bench b/eg/bench new file mode 100755 index 0000000..7ac7174 --- /dev/null +++ b/eg/bench @@ -0,0 +1,90 @@ +#!/opt/bin/perl + +# Usage: bench json-file + +# which modules to test (JSON::PP usually excluded because its so slow) +my %tst = ( +# "JSON" => ['JSON::encode_json $perl' , 'JSON::decode_json $json'], + "JSON::PP" => ['$pp->encode ($perl)' , '$pp->decode ($json)'], + "JSON::DWIW/FJ" => ['$dwiw->to_json ($perl)' , '$dwiw->from_json ($json)'], + "JSON::DWIW/DS" => ['$dwiw->to_json ($perl)' , 'JSON::DWIW::deserialize $json'], +# "JSON::PC" => ['$pc->convert ($perl)' , '$pc->parse ($json)'], + "JSON::Syck" => ['JSON::Syck::Dump $perl' , 'JSON::Syck::Load $json'], + "JSON::XS" => ['encode_json $perl' , 'decode_json $json'], + "JSON::XS/2" => ['$xs2->encode ($perl)' , '$xs2->decode ($json)'], + "JSON::XS/3" => ['$xs3->encode ($perl)' , '$xs3->decode ($json)'], + "Storable" => ['Storable::nfreeze $perl' , 'Storable::thaw $pst'], +); + +use JSON (); +use JSON::DWIW; +use JSON::PC; +use JSON::PP (); +use JSON::XS qw(encode_json decode_json); +use JSON::Syck; +use Storable (); + +use Time::HiRes; +use List::Util; + +use utf8; + +my $dwiw = new JSON::DWIW; +my $pc = new JSON::PC; +my $pp = JSON::PP->new->max_depth (512); +my $xs2 = JSON::XS->new->utf8->pretty->canonical; +my $xs3 = JSON::XS->new->utf8->shrink; + +my $json; # the test string + +local $/; +$json = <>; + +# fix syck-brokenised stuff +#$json = JSON::XS->new->ascii(1)->encode (JSON::Syck::Load $json); + +#srand 0; $json = JSON::XS->new->utf8(1)->ascii(0)->encode ([join "", map +(chr rand 255), 0..2047]); + +#if (1) { +# use Storable; +# open my $fh, "<:unix", "/opt/crossfire/share/cfserver/faces" or die "$!"; +# my $faces = Storable::thaw do { <$fh> }; +# $json = objToJson $faces; +# open my $fh2, ">:unix", "faces.json" or die "$!"; +# print $fh2 $json; +# warn length $json; +#} + +sub bench($) { + my ($code) = @_; + + my $pst = Storable::nfreeze JSON::XS::decode_json $json; # seperately decode as storable stringifies :/ + my $perl = JSON::XS::decode_json $json; + + my $count = 5; + my $times = 200; + + my $cent = eval "sub { my \$t = Time::HiRes::time; " . (join ";", ($code) x $count) . "; Time::HiRes::time - \$t }"; + $cent->(); + + my $min = 1e99; + + for (1..$times) { + my $t = $cent->(); + + $min = $t if $t < $min; + } + + return $count / $min; +} + +printf "%-13s | %10s | %10s |\n", "module", "encode", "decode"; +printf "--------------|------------|------------|\n"; +for my $module (sort keys %tst) { + my $enc = bench $tst{$module}[0]; + my $dec = bench $tst{$module}[1]; + + printf "%-13s | %10.3f | %10.3f |\n", $module, $enc, $dec; +} +printf "--------------+------------+------------+\n"; + diff --git a/t/00_load.t b/t/00_load.t new file mode 100644 index 0000000..dab153e --- /dev/null +++ b/t/00_load.t @@ -0,0 +1,5 @@ +BEGIN { $| = 1; print "1..1\n"; } +END {print "not ok 1\n" unless $loaded;} +use JSON::XS; +$loaded = 1; +print "ok 1\n"; diff --git a/t/01_utf8.t b/t/01_utf8.t new file mode 100644 index 0000000..06b3d28 --- /dev/null +++ b/t/01_utf8.t @@ -0,0 +1,23 @@ +BEGIN { $| = 1; print "1..9\n"; } + +use utf8; +use JSON::XS; + +our $test; +sub ok($) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +ok (JSON::XS->new->allow_nonref (1)->utf8 (1)->encode ("ü") eq "\"\xc3\xbc\""); +ok (JSON::XS->new->allow_nonref (1)->encode ("ü") eq "\"ü\""); +ok (JSON::XS->new->allow_nonref (1)->ascii (1)->utf8 (1)->encode (chr 0x8000) eq '"\u8000"'); +ok (JSON::XS->new->allow_nonref (1)->ascii (1)->utf8 (1)->pretty (1)->encode (chr 0x10402) eq "\"\\ud801\\udc02\"\n"); + +eval { JSON::XS->new->allow_nonref (1)->utf8 (1)->decode ('"ü"') }; +ok $@ =~ /malformed UTF-8/; + +ok (JSON::XS->new->allow_nonref (1)->decode ('"ü"') eq "ü"); +ok (JSON::XS->new->allow_nonref (1)->decode ('"\u00fc"') eq "ü"); +ok (JSON::XS->new->allow_nonref (1)->decode ('"\ud801\udc02' . "\x{10204}\"") eq "\x{10402}\x{10204}"); +ok (JSON::XS->new->allow_nonref (1)->decode ('"\"\n\\\\\r\t\f\b"') eq "\"\012\\\015\011\014\010"); + diff --git a/t/02_error.t b/t/02_error.t new file mode 100644 index 0000000..b82bfa2 --- /dev/null +++ b/t/02_error.t @@ -0,0 +1,47 @@ +BEGIN { $| = 1; print "1..31\n"; } + +use utf8; +use JSON::XS; +no warnings; + +our $test; +sub ok($) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +eval { JSON::XS->new->encode ([\-1]) }; ok $@ =~ /cannot encode reference/; +eval { JSON::XS->new->encode ([\undef]) }; ok $@ =~ /cannot encode reference/; +eval { JSON::XS->new->encode ([\2]) }; ok $@ =~ /cannot encode reference/; +eval { JSON::XS->new->encode ([\{}]) }; ok $@ =~ /cannot encode reference/; +eval { JSON::XS->new->encode ([\[]]) }; ok $@ =~ /cannot encode reference/; +eval { JSON::XS->new->encode ([\\1]) }; ok $@ =~ /cannot encode reference/; + +eval { JSON::XS->new->allow_nonref (1)->decode ('"\u1234\udc00"') }; ok $@ =~ /missing high /; +eval { JSON::XS->new->allow_nonref->decode ('"\ud800"') }; ok $@ =~ /missing low /; +eval { JSON::XS->new->allow_nonref (1)->decode ('"\ud800\u1234"') }; ok $@ =~ /surrogate pair /; + +eval { JSON::XS->new->decode ('null') }; ok $@ =~ /allow_nonref/; +eval { JSON::XS->new->allow_nonref (1)->decode ('+0') }; ok $@ =~ /malformed/; +eval { JSON::XS->new->allow_nonref->decode ('.2') }; ok $@ =~ /malformed/; +eval { JSON::XS->new->allow_nonref (1)->decode ('bare') }; ok $@ =~ /malformed/; +eval { JSON::XS->new->allow_nonref->decode ('naughty') }; ok $@ =~ /null/; +eval { JSON::XS->new->allow_nonref (1)->decode ('01') }; ok $@ =~ /leading zero/; +eval { JSON::XS->new->allow_nonref->decode ('00') }; ok $@ =~ /leading zero/; +eval { JSON::XS->new->allow_nonref (1)->decode ('-0.') }; ok $@ =~ /decimal point/; +eval { JSON::XS->new->allow_nonref->decode ('-0e') }; ok $@ =~ /exp sign/; +eval { JSON::XS->new->allow_nonref (1)->decode ('-e+1') }; ok $@ =~ /initial minus/; +eval { JSON::XS->new->allow_nonref->decode ("\"\n\"") }; ok $@ =~ /invalid character/; +eval { JSON::XS->new->allow_nonref (1)->decode ("\"\x01\"") }; ok $@ =~ /invalid character/; +eval { JSON::XS->new->decode ('[5') }; ok $@ =~ /parsing array/; +eval { JSON::XS->new->decode ('{"5"') }; ok $@ =~ /':' expected/; +eval { JSON::XS->new->decode ('{"5":null') }; ok $@ =~ /parsing object/; + +eval { JSON::XS->new->decode (undef) }; ok $@ =~ /malformed/; +eval { JSON::XS->new->decode (\5) }; ok !!$@; # Can't coerce readonly +eval { JSON::XS->new->decode ([]) }; ok $@ =~ /malformed/; +eval { JSON::XS->new->decode (\*STDERR) }; ok $@ =~ /malformed/; +eval { JSON::XS->new->decode (*STDERR) }; ok !!$@; # cannot coerce GLOB + +eval { decode_json ("\"\xa0") }; ok $@ =~ /malformed.*character/; +eval { decode_json ("\"\xa0\"") }; ok $@ =~ /malformed.*character/; + diff --git a/t/03_types.t b/t/03_types.t new file mode 100644 index 0000000..201e387 --- /dev/null +++ b/t/03_types.t @@ -0,0 +1,60 @@ +BEGIN { $| = 1; print "1..76\n"; } + +use utf8; +use Types::Serialiser; +use JSON::XS; + +our $test; +sub ok($) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +ok (!defined JSON::XS->new->allow_nonref (1)->decode ('null')); +ok (JSON::XS->new->allow_nonref (1)->decode ('true') == 1); +ok (JSON::XS->new->allow_nonref (1)->decode ('false') == 0); + +my $true = JSON::XS->new->allow_nonref (1)->decode ('true'); +ok ($true eq 1); +ok (Types::Serialiser::is_bool $true); +my $false = JSON::XS->new->allow_nonref (1)->decode ('false'); +ok ($false == !$true); +ok (Types::Serialiser::is_bool $false); +ok (++$false == 1); +ok (!Types::Serialiser::is_bool $false); + +ok (JSON::XS->new->allow_nonref (1)->decode ('5') == 5); +ok (JSON::XS->new->allow_nonref (1)->decode ('-5') == -5); +ok (JSON::XS->new->allow_nonref (1)->decode ('5e1') == 50); +ok (JSON::XS->new->allow_nonref (1)->decode ('-333e+0') == -333); +ok (JSON::XS->new->allow_nonref (1)->decode ('2.5') == 2.5); + +ok (JSON::XS->new->allow_nonref (1)->decode ('""') eq ""); +ok ('[1,2,3,4]' eq encode_json decode_json ('[1,2, 3,4]')); +ok ('[{},[],[],{}]' eq encode_json decode_json ('[{},[], [ ] ,{ }]')); +ok ('[{"1":[5]}]' eq encode_json [{1 => [5]}]); +ok ('{"1":2,"3":4}' eq JSON::XS->new->canonical (1)->encode (decode_json '{ "1" : 2, "3" : 4 }')); +ok ('{"1":2,"3":1.2}' eq JSON::XS->new->canonical (1)->encode (decode_json '{ "1" : 2, "3" : 1.2 }')); + +ok ('[true]' eq encode_json [Types::Serialiser::true]); +ok ('[false]' eq encode_json [Types::Serialiser::false]); +ok ('[true]' eq encode_json [\1]); +ok ('[false]' eq encode_json [\0]); +ok ('[null]' eq encode_json [undef]); +ok ('[true]' eq encode_json [Types::Serialiser::true]); +ok ('[false]' eq encode_json [Types::Serialiser::false]); + +for $v (1, 2, 3, 5, -1, -2, -3, -4, 100, 1000, 10000, -999, -88, -7, 7, 88, 999, -1e5, 1e6, 1e7, 1e8) { + ok ($v == ((decode_json "[$v]")->[0])); + ok ($v == ((decode_json encode_json [$v])->[0])); +} + +ok (30123 == ((decode_json encode_json [30123])->[0])); +ok (32123 == ((decode_json encode_json [32123])->[0])); +ok (32456 == ((decode_json encode_json [32456])->[0])); +ok (32789 == ((decode_json encode_json [32789])->[0])); +ok (32767 == ((decode_json encode_json [32767])->[0])); +ok (32768 == ((decode_json encode_json [32768])->[0])); + +my @sparse; @sparse[0,3] = (1, 4); +ok ("[1,null,null,4]" eq encode_json \@sparse); + diff --git a/t/04_dwiw_encode.t b/t/04_dwiw_encode.t new file mode 100644 index 0000000..e133bb1 --- /dev/null +++ b/t/04_dwiw_encode.t @@ -0,0 +1,69 @@ +#! perl + +# copied over from JSON::DWIW and modified to use JSON::XS + +# Creation date: 2007-02-20 19:51:06 +# Authors: don + +use strict; +use Test; + +# main +{ + BEGIN { plan tests => 5 } + + use JSON::XS; + + my $data; + + # my $expected_str = '{"var1":"val1","var2":["first_element",{"sub_element":"sub_val","sub_element2":"sub_val2"}],"var3":"val3"}'; + + my $expected_str1 = '{"var1":"val1","var2":["first_element",{"sub_element":"sub_val","sub_element2":"sub_val2"}]}'; + my $expected_str2 = '{"var2":["first_element",{"sub_element":"sub_val","sub_element2":"sub_val2"}],"var1":"val1"}'; + my $expected_str3 = '{"var2":["first_element",{"sub_element2":"sub_val2","sub_element":"sub_val"}],"var1":"val1"}'; + my $expected_str4 = '{"var1":"val1","var2":["first_element",{"sub_element2":"sub_val2","sub_element":"sub_val"}]}'; + + my $json_obj = JSON::XS->new->allow_nonref (1); + my $json_str; + # print STDERR "\n" . $json_str . "\n\n"; + + my $expected_str; + + $data = 'stuff'; + $json_str = $json_obj->encode($data); + ok($json_str eq '"stuff"'); + + $data = "stu\nff"; + $json_str = $json_obj->encode($data); + ok($json_str eq '"stu\nff"'); + + $data = [ 1, 2, 3 ]; + $expected_str = '[1,2,3]'; + $json_str = $json_obj->encode($data); + + ok($json_str eq $expected_str); + + $data = { var1 => 'val1', var2 => 'val2' }; + $json_str = $json_obj->encode($data); + + ok($json_str eq '{"var1":"val1","var2":"val2"}' + or $json_str eq '{"var2":"val2","var1":"val1"}'); + + $data = { var1 => 'val1', + var2 => [ 'first_element', + { sub_element => 'sub_val', sub_element2 => 'sub_val2' }, + ], + # var3 => 'val3', + }; + + $json_str = $json_obj->encode($data); + + ok($json_str eq $expected_str1 or $json_str eq $expected_str2 + or $json_str eq $expected_str3 or $json_str eq $expected_str4); +} + +exit 0; + +############################################################################### +# Subroutines + diff --git a/t/05_dwiw_decode.t b/t/05_dwiw_decode.t new file mode 100644 index 0000000..38ebb12 --- /dev/null +++ b/t/05_dwiw_decode.t @@ -0,0 +1,91 @@ +#! perl + +# copied over from JSON::DWIW and modified to use JSON::XS + +# Creation date: 2007-02-20 21:54:09 +# Authors: don + +use strict; +use warnings; +use Test; + +# main +{ + BEGIN { plan tests => 7 } + + use JSON::XS; + + my $json_str = '{"var1":"val1","var2":["first_element",{"sub_element":"sub_val","sub_element2":"sub_val2"}],"var3":"val3"}'; + + my $json_obj = JSON::XS->new->allow_nonref(1); + my $data = $json_obj->decode($json_str); + + my $pass = 1; + if ($data->{var1} eq 'val1' and $data->{var3} eq 'val3') { + if ($data->{var2}) { + my $array = $data->{var2}; + if (ref($array) eq 'ARRAY') { + if ($array->[0] eq 'first_element') { + my $hash = $array->[1]; + if (ref($hash) eq 'HASH') { + unless ($hash->{sub_element} eq 'sub_val' + and $hash->{sub_element2} eq 'sub_val2') { + $pass = 0; + } + } + else { + $pass = 0; + } + } + else { + $pass = 0; + } + } + else { + $pass = 0; + } + } + else { + $pass = 0; + } + } + + ok($pass); + + $json_str = '"val1"'; + $data = $json_obj->decode($json_str); + ok($data eq 'val1'); + + $json_str = '567'; + $data = $json_obj->decode($json_str); + ok($data == 567); + + $json_str = "5e1"; + $data = $json_obj->decode($json_str); + ok($data == 50); + + $json_str = "5e3"; + $data = $json_obj->decode($json_str); + ok($data == 5000); + + $json_str = "5e+1"; + $data = $json_obj->decode($json_str); + ok($data == 50); + + $json_str = "5e-1"; + $data = $json_obj->decode($json_str); + ok($data == 0.5); + + + + +# use Data::Dumper; +# print STDERR Dumper($test_data) . "\n\n"; + +} + +exit 0; + +############################################################################### +# Subroutines + diff --git a/t/06_pc_pretty.t b/t/06_pc_pretty.t new file mode 100644 index 0000000..89249cc --- /dev/null +++ b/t/06_pc_pretty.t @@ -0,0 +1,66 @@ +#! perl + +# copied over from JSON::PC and modified to use JSON::XS + +use strict; +use Test::More; +BEGIN { plan tests => 9 }; + +use JSON::XS; + +my ($js,$obj,$json); +my $pc = new JSON::XS; + +$obj = {foo => "bar"}; +$js = $pc->encode($obj); +is($js,q|{"foo":"bar"}|); + +$obj = [10, "hoge", {foo => "bar"}]; +$pc->pretty (1); +$js = $pc->encode($obj); +is($js,q|[ + 10, + "hoge", + { + "foo" : "bar" + } +] +|); + +$obj = { foo => [ {a=>"b"}, 0, 1, 2 ] }; +$pc->pretty(0); +$js = $pc->encode($obj); +is($js,q|{"foo":[{"a":"b"},0,1,2]}|); + + +$obj = { foo => [ {a=>"b"}, 0, 1, 2 ] }; +$pc->pretty(1); +$js = $pc->encode($obj); +is($js,q|{ + "foo" : [ + { + "a" : "b" + }, + 0, + 1, + 2 + ] +} +|); + +$obj = { foo => [ {a=>"b"}, 0, 1, 2 ] }; +$pc->pretty(0); +$js = $pc->encode($obj); +is($js,q|{"foo":[{"a":"b"},0,1,2]}|); + + +$obj = {foo => "bar"}; +$pc->indent(1); +is($pc->encode($obj), qq|{\n "foo":"bar"\n}\n|, "nospace"); +$pc->space_after(1); +is($pc->encode($obj), qq|{\n "foo": "bar"\n}\n|, "after"); +$pc->space_before(1); +is($pc->encode($obj), qq|{\n "foo" : "bar"\n}\n|, "both"); +$pc->space_after(0); +is($pc->encode($obj), qq|{\n "foo" :"bar"\n}\n|, "before"); + diff --git a/t/07_pc_esc.t b/t/07_pc_esc.t new file mode 100644 index 0000000..cc97794 --- /dev/null +++ b/t/07_pc_esc.t @@ -0,0 +1,80 @@ +# +# このファイルのエンコーディングはUTF-8 +# + +# copied over from JSON::PC and modified to use JSON::XS + +use Test::More; +use strict; +use utf8; +BEGIN { plan tests => 17 }; +use JSON::XS; + +######################### +my ($js,$obj,$str); + +my $pc = new JSON::XS; + +$obj = {test => qq|abc"def|}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\"def"}|); + +$obj = {qq|te"st| => qq|abc"def|}; +$str = $pc->encode($obj); +is($str,q|{"te\"st":"abc\"def"}|); + +$obj = {test => qq|abc/def|}; # / => \/ +$str = $pc->encode($obj); # but since version 0.99 +is($str,q|{"test":"abc/def"}|); # this handling is deleted. +$obj = $pc->decode($str); +is($obj->{test},q|abc/def|); + +$obj = {test => q|abc\def|}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\\\\def"}|); + +$obj = {test => "abc\bdef"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\bdef"}|); + +$obj = {test => "abc\fdef"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\fdef"}|); + +$obj = {test => "abc\ndef"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\ndef"}|); + +$obj = {test => "abc\rdef"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\rdef"}|); + +$obj = {test => "abc-def"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc-def"}|); + +$obj = {test => "abc(def"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc(def"}|); + +$obj = {test => "abc\\def"}; +$str = $pc->encode($obj); +is($str,q|{"test":"abc\\\\def"}|); + +$obj = {test => "あいうえお"}; +$str = $pc->encode($obj); +is($str,q|{"test":"あいうえお"}|); + +$obj = {"あいうえお" => "かきくけこ"}; +$str = $pc->encode($obj); +is($str,q|{"あいうえお":"かきくけこ"}|); + +$obj = $pc->decode(q|{"id":"abc\ndef"}|); +is($obj->{id},"abc\ndef",q|{"id":"abc\ndef"}|); + +$obj = $pc->decode(q|{"id":"abc\\\ndef"}|); +is($obj->{id},"abc\\ndef",q|{"id":"abc\\\ndef"}|); + +$obj = $pc->decode(q|{"id":"abc\\\\\ndef"}|); +is($obj->{id},"abc\\\ndef",q|{"id":"abc\\\\\ndef"}|); + diff --git a/t/08_pc_base.t b/t/08_pc_base.t new file mode 100644 index 0000000..88d6ba5 --- /dev/null +++ b/t/08_pc_base.t @@ -0,0 +1,95 @@ +use Test::More; + +# copied over from JSON::PC and modified to use JSON::XS + +use strict; +BEGIN { plan tests => 20 }; +use JSON::XS; + +my ($js,$obj); + +my $pc = new JSON::XS; + +$js = q|{}|; + +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'{}', '{}'); + +$js = q|[]|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'[]', '[]'); + + +$js = q|{"foo":"bar"}|; +$obj = $pc->decode($js); +is($obj->{foo},'bar'); +$js = $pc->encode($obj); +is($js,'{"foo":"bar"}', '{"foo":"bar"}'); + +$js = q|{"foo":""}|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'{"foo":""}', '{"foo":""}'); + +$js = q|{"foo":" "}|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'{"foo":" "}' ,'{"foo":" "}'); + +$js = q|{"foo":"0"}|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'{"foo":"0"}',q|{"foo":"0"} - autoencode (default)|); + + +$js = q|{"foo":"0 0"}|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,'{"foo":"0 0"}','{"foo":"0 0"}'); + +$js = q|[1,2,3]|; +$obj = $pc->decode($js); +is($obj->[1],2); +$js = $pc->encode($obj); +is($js,'[1,2,3]'); + +$js = q|{"foo":{"bar":"hoge"}}|; +$obj = $pc->decode($js); +is($obj->{foo}->{bar},'hoge'); +$js = $pc->encode($obj); +is($js,q|{"foo":{"bar":"hoge"}}|); + +$js = q|[{"foo":[1,2,3]},-0.12,{"a":"b"}]|; +$obj = $pc->decode($js); +$js = $pc->encode($obj); +is($js,q|[{"foo":[1,2,3]},-0.12,{"a":"b"}]|); + + +$obj = ["\x01"]; +is($js = $pc->encode($obj),'["\\u0001"]'); +$obj = $pc->decode($js); +is($obj->[0],"\x01"); + +$obj = ["\e"]; +is($js = $pc->encode($obj),'["\\u001b"]'); +$obj = $pc->decode($js); +is($obj->[0],"\e"); + +$js = '{"id":"}'; +eval q{ $pc->decode($js) }; +like($@, qr/unexpected end/i); + +$obj = { foo => sub { "bar" } }; +eval q{ $js = $pc->encode($obj) }; +like($@, qr/JSON can only/i, 'invalid value (coderef)'); + +#$obj = { foo => bless {}, "Hoge" }; +#eval q{ $js = $pc->encode($obj) }; +#like($@, qr/JSON can only/i, 'invalid value (blessd object)'); + +$obj = { foo => \$js }; +eval q{ $js = $pc->encode($obj) }; +like($@, qr/cannot encode reference/i, 'invalid value (ref)'); + diff --git a/t/09_pc_extra_number.t b/t/09_pc_extra_number.t new file mode 100644 index 0000000..0b59ba3 --- /dev/null +++ b/t/09_pc_extra_number.t @@ -0,0 +1,35 @@ +# copied over from JSON::PC and modified to use JSON::XS + +use Test::More; +use strict; +BEGIN { plan tests => 6 }; +use JSON::XS; +use utf8; + +######################### +my ($js,$obj); +my $pc = new JSON::XS; + +$js = '{"foo":0}'; +$obj = $pc->decode($js); +is($obj->{foo}, 0, "normal 0"); + +$js = '{"foo":0.1}'; +$obj = $pc->decode($js); +is($obj->{foo}, 0.1, "normal 0.1"); + + +$js = '{"foo":10}'; +$obj = $pc->decode($js); +is($obj->{foo}, 10, "normal 10"); + +$js = '{"foo":-10}'; +$obj = $pc->decode($js); +is($obj->{foo}, -10, "normal -10"); + + +$js = '{"foo":0, "bar":0.1}'; +$obj = $pc->decode($js); +is($obj->{foo},0, "normal 0"); +is($obj->{bar},0.1,"normal 0.1"); + diff --git a/t/10_pc_keysort.t b/t/10_pc_keysort.t new file mode 100644 index 0000000..ec6699b --- /dev/null +++ b/t/10_pc_keysort.t @@ -0,0 +1,16 @@ +# copied over from JSON::PC and modified to use JSON::XS + +use Test::More; +use strict; +BEGIN { plan tests => 1 }; +use JSON::XS; +######################### + +my ($js,$obj); +my $pc = JSON::XS->new->canonical(1); + +$obj = {a=>1, b=>2, c=>3, d=>4, e=>5, f=>6, g=>7, h=>8, i=>9}; + +$js = $pc->encode($obj); +is($js, q|{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9}|); + diff --git a/t/11_pc_expo.t b/t/11_pc_expo.t new file mode 100644 index 0000000..357a873 --- /dev/null +++ b/t/11_pc_expo.t @@ -0,0 +1,36 @@ +# copied over from JSON::PC and modified to use JSON::XS + +use Test::More; +use strict; +BEGIN { plan tests => 8 }; +use JSON::XS; + +######################### +my ($js,$obj); +my $pc = new JSON::XS; + +$js = q|[-12.34]|; +$obj = $pc->decode($js); +is($obj->[0], -12.34, 'digit -12.34'); +$js = $pc->encode($obj); +is($js,'[-12.34]', 'digit -12.34'); + +$js = q|[-1.234e5]|; +$obj = $pc->decode($js); +is($obj->[0], -123400, 'digit -1.234e5'); +$js = $pc->encode($obj); +is($js,'[-123400]', 'digit -1.234e5'); + +$js = q|[1.23E-4]|; +$obj = $pc->decode($js); +is($obj->[0], 0.000123, 'digit 1.23E-4'); +$js = $pc->encode($obj); +is($js,'[0.000123]', 'digit 1.23E-4'); + + +$js = q|[1.01e+30]|; +$obj = $pc->decode($js); +is($obj->[0], 1.01e+30, 'digit 1.01e+30'); +$js = $pc->encode($obj); +like($js,qr/\[1.01[Ee]\+0?30\]/, 'digit 1.01e+30'); + diff --git a/t/12_blessed.t b/t/12_blessed.t new file mode 100644 index 0000000..c079c82 --- /dev/null +++ b/t/12_blessed.t @@ -0,0 +1,50 @@ +BEGIN { $| = 1; print "1..16\n"; } + +use JSON::XS; + +our $test; +sub ok($;$) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +my $o1 = bless { a => 3 }, "XX"; +my $o2 = bless \(my $dummy = 1), "YY"; + +sub XX::TO_JSON { + {__,""} +} + +my $js = JSON::XS->new; + +eval { $js->encode ($o1) }; ok ($@ =~ /allow_blessed/); +eval { $js->encode ($o2) }; ok ($@ =~ /allow_blessed/); +$js->allow_blessed; +ok ($js->encode ($o1) eq "null"); +ok ($js->encode ($o2) eq "null"); +$js->convert_blessed; +ok ($js->encode ($o1) eq '{"__":""}'); +ok ($js->encode ($o2) eq "null"); + +$js->filter_json_object (sub { 5 }); +$js->filter_json_single_key_object (a => sub { shift }); +$js->filter_json_single_key_object (b => sub { 7 }); + +ok ("ARRAY" eq ref $js->decode ("[]")); +ok (5 eq join ":", @{ $js->decode ('[{}]') }); +ok (6 eq join ":", @{ $js->decode ('[{"a":6}]') }); +ok (5 eq join ":", @{ $js->decode ('[{"a":4,"b":7}]') }); + +$js->filter_json_object; +ok (7 == $js->decode ('[{"a":4,"b":7}]')->[0]{b}); +ok (3 eq join ":", @{ $js->decode ('[{"a":3}]') }); + +$js->filter_json_object (sub { }); +ok (7 == $js->decode ('[{"a":4,"b":7}]')->[0]{b}); +ok (9 eq join ":", @{ $js->decode ('[{"a":9}]') }); + +$js->filter_json_single_key_object ("a"); +ok (4 == $js->decode ('[{"a":4}]')->[0]{a}); + +$js->filter_json_single_key_object (a => sub { }); +ok (4 == $js->decode ('[{"a":4}]')->[0]{a}); + diff --git a/t/13_limit.t b/t/13_limit.t new file mode 100644 index 0000000..4b5d5f2 --- /dev/null +++ b/t/13_limit.t @@ -0,0 +1,29 @@ +BEGIN { $| = 1; print "1..11\n"; } + +use JSON::XS; + +our $test; +sub ok($;$) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +my $def = 512; + +my $js = JSON::XS->new; + +ok (!eval { $js->decode (("[" x ($def + 1)) . ("]" x ($def + 1))) }); +ok (ref $js->decode (("[" x $def) . ("]" x $def))); +ok (ref $js->decode (("{\"\":" x ($def - 1)) . "[]" . ("}" x ($def - 1)))); +ok (!eval { $js->decode (("{\"\":" x $def) . "[]" . ("}" x $def)) }); + +ok (ref $js->max_depth (32)->decode (("[" x 32) . ("]" x 32))); + +ok ($js->max_depth(1)->encode ([])); +ok (!eval { $js->encode ([[]]), 1 }); + +ok ($js->max_depth(2)->encode ([{}])); +ok (!eval { $js->encode ([[{}]]), 1 }); + +ok (eval { ref $js->max_size (8)->decode ("[ ]") }); +eval { $js->max_size (8)->decode ("[ ]") }; ok ($@ =~ /max_size/); + diff --git a/t/14_latin1.t b/t/14_latin1.t new file mode 100644 index 0000000..c66217d --- /dev/null +++ b/t/14_latin1.t @@ -0,0 +1,12 @@ +BEGIN { $| = 1; print "1..4\n"; } + +use JSON::XS; + +my $xs = JSON::XS->new->latin1->allow_nonref; + +print $xs->encode ("\x{12}\x{89} ") eq "\"\\u0012\x{89} \"" ? "" : "not ", "ok 1\n"; +print $xs->encode ("\x{12}\x{89}\x{abc}") eq "\"\\u0012\x{89}\\u0abc\"" ? "" : "not ", "ok 2\n"; + +print $xs->decode ("\"\\u0012\x{89}\"" ) eq "\x{12}\x{89}" ? "" : "not ", "ok 3\n"; +print $xs->decode ("\"\\u0012\x{89}\\u0abc\"") eq "\x{12}\x{89}\x{abc}" ? "" : "not ", "ok 4\n"; + diff --git a/t/15_prefix.t b/t/15_prefix.t new file mode 100644 index 0000000..7955633 --- /dev/null +++ b/t/15_prefix.t @@ -0,0 +1,13 @@ +BEGIN { $| = 1; print "1..4\n"; } + +use JSON::XS; + +my $xs = JSON::XS->new->latin1->allow_nonref; + +eval { $xs->decode ("[] ") }; +print $@ ? "not " : "", "ok 1\n"; +eval { $xs->decode ("[] x") }; +print $@ ? "" : "not ", "ok 2\n"; +print 2 == ($xs->decode_prefix ("[][]"))[1] ? "" : "not ", "ok 3\n"; +print 3 == ($xs->decode_prefix ("[1] t"))[1] ? "" : "not ", "ok 4\n"; + diff --git a/t/16_tied.t b/t/16_tied.t new file mode 100644 index 0000000..b80a4b8 --- /dev/null +++ b/t/16_tied.t @@ -0,0 +1,22 @@ +BEGIN { $| = 1; print "1..2\n"; } + +use JSON::XS; +use Tie::Hash; +use Tie::Array; + +our $test; +sub ok($;$) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +my $js = JSON::XS->new; + +tie my %h, 'Tie::StdHash'; +%h = (a => 1); + +ok ($js->encode (\%h) eq '{"a":1}'); + +tie my @a, 'Tie::StdArray'; +@a = (1, 2); + +ok ($js->encode (\@a) eq '[1,2]'); diff --git a/t/17_relaxed.t b/t/17_relaxed.t new file mode 100644 index 0000000..9dbcfb7 --- /dev/null +++ b/t/17_relaxed.t @@ -0,0 +1,22 @@ +BEGIN { $| = 1; print "1..8\n"; } + +use utf8; +use JSON::XS; + +our $test; +sub ok($) { + print $_[0] ? "" : "not ", "ok ", ++$test, "\n"; +} + +my $json = JSON::XS->new->relaxed; + +ok ('[1,2,3]' eq encode_json $json->decode (' [1,2, 3]')); +ok ('[1,2,4]' eq encode_json $json->decode ('[1,2, 4 , ]')); +ok (!eval { $json->decode ('[1,2, 3,4,,]') }); +ok (!eval { $json->decode ('[,1]') }); + +ok ('{"1":2}' eq encode_json $json->decode (' {"1":2}')); +ok ('{"1":2}' eq encode_json $json->decode ('{"1":2,}')); +ok (!eval { $json->decode ('{,}') }); + +ok ('[1,2]' eq encode_json $json->decode ("[1#,2\n ,2,# ] \n\t]")); diff --git a/t/18_json_checker.t b/t/18_json_checker.t new file mode 100644 index 0000000..7f67365 --- /dev/null +++ b/t/18_json_checker.t @@ -0,0 +1,170 @@ +#! perl + +# use the testsuite from http://www.json.org/JSON_checker/ +# except for fail18.json, as we do not support a depth of 20 (but 16 and 32). + +use strict; +no warnings; +use Test::More; +BEGIN { plan tests => 39 }; + +use JSON::XS; + +# emulate JSON_checker default config +my $json = JSON::XS->new->utf8->max_depth(32)->canonical; + +binmode DATA; + +for (;;) { + $/ = "\n# "; + chomp (my $test = ) + or last; + $/ = "\n"; + my $name = ; + + if (my $perl = eval { $json->decode ($test) }) { + ok ($name =~ /^pass/, $name); + is ($json->encode ($json->decode ($json->encode ($perl))), $json->encode ($perl)); + } else { + ok ($name =~ /^fail/, "$name ($@)"); + } +} + +__DATA__ +"A JSON payload should be an object or array, not a string." +# fail1.json +{"Extra value after close": true} "misplaced quoted value" +# fail10.json +{"Illegal expression": 1 + 2} +# fail11.json +{"Illegal invocation": alert()} +# fail12.json +{"Numbers cannot have leading zeroes": 013} +# fail13.json +{"Numbers cannot be hex": 0x14} +# fail14.json +["Illegal backslash escape: \x15"] +# fail15.json +[\naked] +# fail16.json +["Illegal backslash escape: \017"] +# fail17.json +[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] +# fail18.json +{"Missing colon" null} +# fail19.json +["Unclosed array" +# fail2.json +{"Double colon":: null} +# fail20.json +{"Comma instead of colon", null} +# fail21.json +["Colon instead of comma": false] +# fail22.json +["Bad value", truth] +# fail23.json +['single quote'] +# fail24.json +[" tab character in string "] +# fail25.json +["tab\ character\ in\ string\ "] +# fail26.json +["line +break"] +# fail27.json +["line\ +break"] +# fail28.json +[0e] +# fail29.json +{unquoted_key: "keys must be quoted"} +# fail3.json +[0e+] +# fail30.json +[0e+-1] +# fail31.json +{"Comma instead if closing brace": true, +# fail32.json +["mismatch"} +# fail33.json +["extra comma",] +# fail4.json +["double extra comma",,] +# fail5.json +[ , "<-- missing value"] +# fail6.json +["Comma after the close"], +# fail7.json +["Extra close"]] +# fail8.json +{"Extra comma": true,} +# fail9.json +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E66, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "0123456789": "digit", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 +,"rosebud"] +# pass1.json +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +# pass2.json +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} + +# pass3.json diff --git a/t/19_incr.t b/t/19_incr.t new file mode 100644 index 0000000..0acc6ca --- /dev/null +++ b/t/19_incr.t @@ -0,0 +1,97 @@ +#! perl + +use strict; +no warnings; +use Test::More; +BEGIN { plan tests => 697 }; + +use JSON::XS; + +sub splitter { + my ($coder, $text) = @_; + + # work around hash randomisation bug introduced in 5.18 + $coder->canonical; + + for (0 .. length $text) { + my $a = substr $text, 0, $_; + my $b = substr $text, $_; + + $coder->incr_parse ($a); + $coder->incr_parse ($b); + + my $data = $coder->incr_parse; + ok ($data); + ok ($coder->encode ($data) eq $coder->encode ($coder->decode ($text)), "data"); + ok ($coder->incr_text =~ /^\s*$/, "tailws"); + } +} + +splitter +JSON::XS->new , ' ["x\\"","\\u1000\\\\n\\nx",1,{"\\\\" :5 , "": "x"}]'; +splitter +JSON::XS->new , '[ "x\\"","\\u1000\\\\n\\nx" , 1,{"\\\\ " :5 , "": " x"} ] '; +splitter +JSON::XS->new->allow_nonref, '"test"'; +splitter +JSON::XS->new->allow_nonref, ' "5" '; + +{ + my $text = '[5],{"":1} , [ 1,2, 3], {"3":null}'; + my $coder = new JSON::XS; + for (0 .. length $text) { + my $a = substr $text, 0, $_; + my $b = substr $text, $_; + + $coder->incr_parse ($a); + $coder->incr_parse ($b); + + my $j1 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip1"); + my $j2 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip2"); + my $j3 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip3"); + my $j4 = $coder->incr_parse; ok ($coder->incr_text !~ s/^\s*,//, "cskip4"); + my $j5 = $coder->incr_parse; ok ($coder->incr_text !~ s/^\s*,//, "cskip5"); + + ok ('[5]' eq encode_json $j1, "cjson1"); + ok ('{"":1}' eq encode_json $j2, "cjson2"); + ok ('[1,2,3]' eq encode_json $j3, "cjson3"); + ok ('{"3":null}' eq encode_json $j4, "cjson4"); + ok (!defined $j5, "cjson5"); + } +} + +{ + my $text = '[x][5]'; + my $coder = new JSON::XS; + $coder->incr_parse ($text); + ok (!eval { $coder->incr_parse }, "sparse1"); + ok (!eval { $coder->incr_parse }, "sparse2"); + $coder->incr_skip; + ok ('[5]' eq $coder->encode (scalar $coder->incr_parse), "sparse3"); +} + +{ + my $coder = JSON::XS->new->max_size (5); + ok (!$coder->incr_parse ("[ "), "incsize1"); + eval { !$coder->incr_parse ("] ") }; ok ($@ =~ /6 bytes/, "incsize2 $@"); +} + +{ + my $coder = JSON::XS->new->max_depth (3); + ok (!$coder->incr_parse ("[[["), "incdepth1"); + eval { !$coder->incr_parse (" [] ") }; ok ($@ =~ /maximum nesting/, "incdepth2 $@"); +} + +# contributed by yuval kogman, reformatted to fit style +{ + my $coder = JSON::XS->new; + + my $res = eval { $coder->incr_parse("]") }; + my $e = $@; # test more clobbers $@, we need it twice + + ok (!$res, "unbalanced bracket"); + ok ($e, "got error"); + like ($e, qr/malformed/, "malformed json string error"); + + $coder->incr_skip; + + is_deeply (eval { $coder->incr_parse("[42]") }, [42], "valid data after incr_skip"); +} + + diff --git a/t/20_faihu.t b/t/20_faihu.t new file mode 100644 index 0000000..cb9a6c6 --- /dev/null +++ b/t/20_faihu.t @@ -0,0 +1,28 @@ +#! perl + +# adapted from a test by Aristotle Pagaltzis (http://intertwingly.net/blog/2007/11/15/Astral-Plane-Characters-in-Json) + +use strict; +use warnings; + +use JSON::XS; +use Encode qw(encode decode); + +use Test::More tests => 3; + +my ($faihu, $faihu_json, $roundtrip, $js) = "\x{10346}"; + +$js = JSON::XS->new->allow_nonref->ascii; +$faihu_json = $js->encode($faihu); +$roundtrip = $js->decode($faihu_json); +is ($roundtrip, $faihu, 'JSON in ASCII roundtrips correctly'); + +$js = JSON::XS->new->allow_nonref->utf8; +$faihu_json = $js->encode ($faihu); +$roundtrip = $js->decode ($faihu_json); +is ($roundtrip, $faihu, 'JSON in UTF-8 roundtrips correctly'); + +$js = JSON::XS->new->allow_nonref; +$faihu_json = encode 'UTF-16BE', $js->encode ($faihu); +$roundtrip = $js->decode( decode 'UTF-16BE', $faihu_json); +is ($roundtrip, $faihu, 'JSON with external recoding roundtrips correctly' ); diff --git a/t/21_evans.t b/t/21_evans.t new file mode 100644 index 0000000..d574f57 --- /dev/null +++ b/t/21_evans.t @@ -0,0 +1,23 @@ +#! perl + +# adapted from a test by Martin Evans + +use strict; +use warnings; + +use JSON::XS; + +print "1..1\n"; + +my $data = ["\x{53f0}\x{6240}\x{306e}\x{6d41}\x{3057}", + "\x{6c60}\x{306e}\x{30ab}\x{30a8}\x{30eb}"]; +my $js = JSON::XS->new->encode ($data); +my $j = new JSON::XS; +my $object = $j->incr_parse ($js); + +die "no object" if !$object; + +eval { $j->incr_text }; + +print $@ ? "not " : "", "ok 1 # $@\n"; + diff --git a/t/22_comment_at_eof.t b/t/22_comment_at_eof.t new file mode 100644 index 0000000..7825ec5 --- /dev/null +++ b/t/22_comment_at_eof.t @@ -0,0 +1,46 @@ +# provided by IKEGAMI@cpan.org + +use strict; +use warnings; + +use Test::More tests => 13; + +use JSON::XS; + +use Data::Dumper qw( Dumper ); + +sub decoder { + my ($str) = @_; + + my $json = JSON::XS->new->relaxed; + + $json->incr_parse($_[0]); + + my $rv; + if (!eval { $rv = $json->incr_parse(); 1 }) { + $rv = "died with $@"; + } + + local $Data::Dumper::Useqq = 1; + local $Data::Dumper::Terse = 1; + local $Data::Dumper::Indent = 0; + + return Dumper($rv); +} + +is( decoder( "[]" ), '[]', 'array baseline' ); +is( decoder( " []" ), '[]', 'space ignored before array' ); +is( decoder( "\n[]" ), '[]', 'newline ignored before array' ); +is( decoder( "# foo\n[]" ), '[]', 'comment ignored before array' ); +is( decoder( "# fo[o\n[]"), '[]', 'comment ignored before array' ); +is( decoder( "# fo]o\n[]"), '[]', 'comment ignored before array' ); +is( decoder( "[# fo]o\n]"), '[]', 'comment ignored inside array' ); + +is( decoder( "" ), 'undef', 'eof baseline' ); +is( decoder( " " ), 'undef', 'space ignored before eof' ); +is( decoder( "\n" ), 'undef', 'newline ignored before eof' ); +is( decoder( "#,foo\n" ), 'undef', 'comment ignored before eof' ); +is( decoder( "# []o\n" ), 'undef', 'comment ignored before eof' ); + +is( decoder(qq/#\n[#foo\n"#\\n"#\n]/), '["#\n"]', 'array and string in multiple lines' ); + diff --git a/t/52_object.t b/t/52_object.t new file mode 100644 index 0000000..33f6afb --- /dev/null +++ b/t/52_object.t @@ -0,0 +1,52 @@ +BEGIN { $| = 1; print "1..20\n"; } +BEGIN { $^W = 0 } # hate + +use JSON::XS; + +$json = JSON::XS->new->convert_blessed->allow_tags->allow_nonref; + +print "ok 1\n"; + +sub JSON::XS::tojson::TO_JSON { + print @_ == 1 ? "" : "not ", "ok 3\n"; + print JSON::XS::tojson:: eq ref $_[0] ? "" : "not ", "ok 4\n"; + print $_[0]{k} == 1 ? "" : "not ", "ok 5\n"; + 7 +} + +$obj = bless { k => 1 }, JSON::XS::tojson::; + +print "ok 2\n"; + +$enc = $json->encode ($obj); +print $enc eq 7 ? "" : "not ", "ok 6 # $enc\n"; + +print "ok 7\n"; + +sub JSON::XS::freeze::FREEZE { + print @_ == 2 ? "" : "not ", "ok 8\n"; + print $_[1] eq "JSON" ? "" : "not ", "ok 9\n"; + print JSON::XS::freeze:: eq ref $_[0] ? "" : "not ", "ok 10\n"; + print $_[0]{k} == 1 ? "" : "not ", "ok 11\n"; + (3, 1, 2) +} + +sub JSON::XS::freeze::THAW { + print @_ == 5 ? "" : "not ", "ok 13\n"; + print JSON::XS::freeze:: eq $_[0] ? "" : "not ", "ok 14\n"; + print $_[1] eq "JSON" ? "" : "not ", "ok 15\n"; + print $_[2] == 3 ? "" : "not ", "ok 16\n"; + print $_[3] == 1 ? "" : "not ", "ok 17\n"; + print $_[4] == 2 ? "" : "not ", "ok 18\n"; + 777 +} + +$obj = bless { k => 1 }, JSON::XS::freeze::; +$enc = $json->encode ($obj); +print $enc eq '("JSON::XS::freeze")[3,1,2]' ? "" : "not ", "ok 12 # $enc\n"; + +$dec = $json->decode ($enc); +print $dec eq 777 ? "" : "not ", "ok 19\n"; + +print "ok 20\n"; + diff --git a/t/99_binary.t b/t/99_binary.t new file mode 100644 index 0000000..53d5a8b --- /dev/null +++ b/t/99_binary.t @@ -0,0 +1,42 @@ +BEGIN { $| = 1; print "1..24576\n"; } + +use JSON::XS; + +our $test; +sub ok($;$) { + print $_[0] ? "" : "not ", "ok ", ++$test, " - $_[1]\n"; +} + +sub test($) { + my $js; + + $js = JSON::XS->new->allow_nonref(0)->utf8->ascii->shrink->encode ([$_[0]]); + ok ($_[0] eq ((decode_json $js)->[0]), 0); + $js = JSON::XS->new->allow_nonref(0)->utf8->ascii->encode ([$_[0]]); + ok ($_[0] eq (JSON::XS->new->utf8->shrink->decode($js))->[0], 1); + + $js = JSON::XS->new->allow_nonref(0)->utf8->shrink->encode ([$_[0]]); + ok ($_[0] eq ((decode_json $js)->[0]), 2); + $js = JSON::XS->new->allow_nonref(1)->utf8->encode ([$_[0]]); + ok ($_[0] eq (JSON::XS->new->utf8->shrink->decode($js))->[0], 3); + + $js = JSON::XS->new->allow_nonref(1)->ascii->encode ([$_[0]]); + ok ($_[0] eq JSON::XS->new->decode ($js)->[0], 4); + $js = JSON::XS->new->allow_nonref(0)->ascii->encode ([$_[0]]); + ok ($_[0] eq JSON::XS->new->shrink->decode ($js)->[0], 5); + + $js = JSON::XS->new->allow_nonref(1)->shrink->encode ([$_[0]]); + ok ($_[0] eq JSON::XS->new->decode ($js)->[0], 6); + $js = JSON::XS->new->allow_nonref(0)->encode ([$_[0]]); + ok ($_[0] eq JSON::XS->new->shrink->decode ($js)->[0], 7); +} + +srand 0; # doesn't help too much, but its at least more deterministic + +for (1..768) { + test join "", map chr ($_ & 255), 0..$_; + test join "", map chr rand 255, 0..$_; + test join "", map chr ($_ * 97 & ~0x4000), 0..$_; + test join "", map chr (rand (2**20) & ~0x800), 0..$_; +} + diff --git a/typemap b/typemap new file mode 100644 index 0000000..a41c763 --- /dev/null +++ b/typemap @@ -0,0 +1,15 @@ +JSON * T_JSON + +INPUT + +T_JSON + if (!( + SvROK ($arg) + && SvOBJECT (SvRV ($arg)) + && (SvSTASH (SvRV ($arg)) == JSON_STASH || sv_derived_from ($arg, \"JSON::XS\")) + )) + croak (\"object is not of type JSON::XS\"); + /**/ + $var = (JSON *)SvPVX (SvRV ($arg)); + +