diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/s3select/rapidjson/doc | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
54 files changed, 12589 insertions, 0 deletions
diff --git a/src/s3select/rapidjson/doc/CMakeLists.txt b/src/s3select/rapidjson/doc/CMakeLists.txt new file mode 100644 index 000000000..c5345ba69 --- /dev/null +++ b/src/s3select/rapidjson/doc/CMakeLists.txt @@ -0,0 +1,27 @@ +find_package(Doxygen) + +IF(NOT DOXYGEN_FOUND) + MESSAGE(STATUS "No Doxygen found. Documentation won't be built") +ELSE() + file(GLOB SOURCES ${CMAKE_CURRENT_LIST_DIR}/../include/*) + file(GLOB MARKDOWN_DOC ${CMAKE_CURRENT_LIST_DIR}/../doc/*.md) + list(APPEND MARKDOWN_DOC ${CMAKE_CURRENT_LIST_DIR}/../readme.md) + + CONFIGURE_FILE(Doxyfile.in Doxyfile @ONLY) + CONFIGURE_FILE(Doxyfile.zh-cn.in Doxyfile.zh-cn @ONLY) + + file(GLOB DOXYFILES ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile*) + + add_custom_command(OUTPUT html + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile.zh-cn + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/html + DEPENDS ${MARKDOWN_DOC} ${SOURCES} ${DOXYFILES} + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../ + ) + + add_custom_target(doc ALL DEPENDS html) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html + DESTINATION ${DOC_INSTALL_DIR} + COMPONENT doc) +ENDIF() diff --git a/src/s3select/rapidjson/doc/Doxyfile.in b/src/s3select/rapidjson/doc/Doxyfile.in new file mode 100644 index 000000000..6e79f9371 --- /dev/null +++ b/src/s3select/rapidjson/doc/Doxyfile.in @@ -0,0 +1,2369 @@ +# Doxyfile 1.8.7 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = RapidJSON + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "A fast JSON parser/generator for C++ with both SAX/DOM style API" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@ + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = YES + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if <section_label> ... \endif and \cond <section_label> +# ... \endcond blocks. + +ENABLED_SECTIONS = $(RAPIDJSON_SECTIONS) + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. Do not use file names with spaces, bibtex cannot handle them. See +# also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = readme.md \ + CHANGELOG.md \ + include/rapidjson/rapidjson.h \ + include/ \ + doc/features.md \ + doc/tutorial.md \ + doc/pointer.md \ + doc/stream.md \ + doc/encoding.md \ + doc/dom.md \ + doc/sax.md \ + doc/schema.md \ + doc/performance.md \ + doc/internals.md \ + doc/faq.md + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.inc \ + *.md + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = ./include/rapidjson/msinttypes/ + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = internal + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = ./doc + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# <filter> <input-file> +# +# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = readme.md + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = NO + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# compiled with the --with-libclang option. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = NO + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = ./doc/misc/header.html + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = ./doc/misc/footer.html + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- +# defined cascading style sheet that is included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet file to the output directory. For an example +# see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = ./doc/misc/doxygenextra.css + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = YES + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use <access key> + S +# (what the <access key> is depends on the OS and browser, but it is typically +# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down +# key> to jump into the search results window, the results can be navigated +# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel +# the search. The filter options can be selected when the cursor is inside the +# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys> +# to select a filter and <Enter> or <escape> to activate or cancel the filter +# option. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a web server instead of a web client using Javascript. There +# are two flavors of web server based searching depending on the EXTERNAL_SEARCH +# setting. When disabled, doxygen will generate a PHP script for searching and +# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing +# and searching needs to be provided by external tools. See the section +# "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SERVER_BASED_SEARCH = NO + +# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP +# script for searching. Instead the search results are written to an XML file +# which needs to be processed by an external indexer. Doxygen will invoke an +# external search engine pointed to by the SEARCHENGINE_URL option to obtain the +# search results. +# +# Doxygen ships with an example indexer ( doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). +# +# See the section "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH = NO + +# The SEARCHENGINE_URL should point to a search engine hosted by a web server +# which will return the search results when EXTERNAL_SEARCH is enabled. +# +# Doxygen ships with an example indexer ( doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). See the section "External Indexing and +# Searching" for details. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHENGINE_URL = + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed +# search data is written to a file for indexing by an external tool. With the +# SEARCHDATA_FILE tag the name of this file can be specified. +# The default file is: searchdata.xml. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHDATA_FILE = searchdata.xml + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the +# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is +# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple +# projects and redirect the results back to the right project. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH_ID = + +# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen +# projects other than the one defined by this configuration file, but that are +# all added to the same external search index. Each project needs to have a +# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of +# to a relative location where the documentation can be found. The format is: +# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. +# The default value is: YES. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. +# +# Note that when enabling USE_PDFLATEX this option is only used for generating +# bitmaps for formulas in the HTML output, but not in the Makefile that is +# written to the output directory. +# The default file is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate +# index for LaTeX. +# The default file is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used by the +# printer. +# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x +# 14 inches) and executive (7.25 x 10.5 inches). +# The default value is: a4. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names +# that should be included in the LaTeX output. To get the times font for +# instance you can specify +# EXTRA_PACKAGES=times +# If left blank no extra packages will be included. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the +# generated LaTeX document. The header should contain everything until the first +# chapter. If it is left blank doxygen will generate a standard header. See +# section "Doxygen usage" for information on how to let doxygen write the +# default header to a separate file. +# +# Note: Only use a user-defined header if you know what you are doing! The +# following commands have a special meaning inside the header: $title, +# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will +# replace them by respectively the title of the page, the current date and time, +# only the current date, the version number of doxygen, the project name (see +# PROJECT_NAME), or the project number (see PROJECT_NUMBER). +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the +# generated LaTeX document. The footer should contain everything after the last +# chapter. If it is left blank doxygen will generate a standard footer. +# +# Note: Only use a user-defined footer if you know what you are doing! +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_FOOTER = + +# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the LATEX_OUTPUT output +# directory. Note that the files will be copied as-is; there are no commands or +# markers available. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EXTRA_FILES = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is +# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will +# contain links (just like the HTML output) instead of page references. This +# makes the output suitable for online browsing using a PDF viewer. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PDF_HYPERLINKS = YES + +# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate +# the PDF file directly from the LaTeX files. Set this option to YES to get a +# higher quality PDF documentation. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode +# command to the generated LaTeX files. This will instruct LaTeX to keep running +# if errors occur, instead of asking the user for help. This option is also used +# when generating formulas in HTML. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BATCHMODE = NO + +# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the +# index chapters (such as File Index, Compound Index, etc.) in the output. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HIDE_INDICES = NO + +# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source +# code with syntax highlighting in the LaTeX output. +# +# Note that which sources are shown also depends on other settings such as +# SOURCE_BROWSER. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. See +# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# The default value is: plain. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The +# RTF output is optimized for Word 97 and may not look too pretty with other RTF +# readers/editors. +# The default value is: NO. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: rtf. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will +# contain hyperlink fields. The RTF file will contain links (just like the HTML +# output) instead of page references. This makes the output suitable for online +# browsing using Word or some other Word compatible readers that support those +# fields. +# +# Note: WordPad (write) and others do not support links. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's config +# file, i.e. a series of assignments. You only have to provide replacements, +# missing definitions are set to their default value. +# +# See also section "Doxygen usage" for information on how to generate the +# default style sheet that doxygen normally uses. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an RTF document. Syntax is +# similar to doxygen's config file. A template extensions file can be generated +# using doxygen -e rtf extensionFile. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for +# classes and files. +# The default value is: NO. + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. A directory man3 will be created inside the directory specified by +# MAN_OUTPUT. +# The default directory is: man. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to the generated +# man pages. In case the manual section does not start with a number, the number +# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is +# optional. +# The default value is: .3. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_EXTENSION = .3 + +# The MAN_SUBDIR tag determines the name of the directory created within +# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by +# MAN_EXTENSION with the initial . removed. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_SUBDIR = + +# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it +# will generate one additional man file for each entity documented in the real +# man page(s). These additional files only source the real man page, but without +# them the man command would be unable to find the correct page. +# The default value is: NO. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that +# captures the structure of the code including all documentation. +# The default value is: NO. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: xml. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_OUTPUT = xml + +# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program +# listings (including syntax highlighting and cross-referencing information) to +# the XML output. Note that enabling this will significantly increase the size +# of the XML output. +# The default value is: YES. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- + +# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files +# that can be used to generate PDF. +# The default value is: NO. + +GENERATE_DOCBOOK = NO + +# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in +# front of it. +# The default directory is: docbook. +# This tag requires that the tag GENERATE_DOCBOOK is set to YES. + +DOCBOOK_OUTPUT = docbook + +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen +# Definitions (see http://autogen.sf.net) file that captures the structure of +# the code including all documentation. Note that this feature is still +# experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module +# file that captures the structure of the code including all documentation. +# +# Note that this feature is still experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary +# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI +# output from the Perl module output. +# The default value is: NO. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely +# formatted so it can be parsed by a human reader. This is useful if you want to +# understand what is going on. On the other hand, if this tag is set to NO the +# size of the Perl module output will be much smaller and Perl will parse it +# just the same. +# The default value is: YES. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file are +# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful +# so different doxyrules.make files included by the same Makefile don't +# overwrite each other's variables. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all +# C-preprocessor directives found in the sources and include files. +# The default value is: YES. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names +# in the source code. If set to NO only conditional compilation will be +# performed. Macro expansion can be done in a controlled way by setting +# EXPAND_ONLY_PREDEF to YES. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then +# the macro expansion is limited to the macros specified with the PREDEFINED and +# EXPAND_AS_DEFINED tags. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_ONLY_PREDEF = YES + +# If the SEARCH_INCLUDES tag is set to YES the includes files in the +# INCLUDE_PATH will be searched if a #include is found. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by the +# preprocessor. +# This tag requires that the tag SEARCH_INCLUDES is set to YES. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will be +# used. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that are +# defined before the preprocessor is started (similar to the -D option of e.g. +# gcc). The argument of the tag is a list of macros of the form: name or +# name=definition (no spaces). If the definition and the "=" are omitted, "=1" +# is assumed. To prevent a macro definition from being undefined via #undef or +# recursively expanded use the := operator instead of the = operator. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +PREDEFINED = \ + RAPIDJSON_DOXYGEN_RUNNING \ + RAPIDJSON_NAMESPACE_BEGIN="namespace rapidjson {" \ + RAPIDJSON_NAMESPACE_END="}" \ + RAPIDJSON_REMOVEFPTR_(x)=x \ + RAPIDJSON_ENABLEIF_RETURN(cond,returntype)="RAPIDJSON_REMOVEFPTR_ returntype" \ + RAPIDJSON_DISABLEIF_RETURN(cond,returntype)="RAPIDJSON_REMOVEFPTR_ returntype" + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this +# tag can be used to specify a list of macro names that should be expanded. The +# macro definition that is found in the sources will be used. Use the PREDEFINED +# tag if you want to use a different macro definition that overrules the +# definition found in the source code. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_AS_DEFINED = \ + RAPIDJSON_NOEXCEPT + +# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will +# remove all references to function-like macros that are alone on a line, have +# an all uppercase name, and do not end with a semicolon. Such function macros +# are typically used for boiler-plate code, and will confuse the parser if not +# removed. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tag files. For each tag +# file the location of the external documentation should be added. The format of +# a tag file without this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where loc1 and loc2 can be relative or absolute paths or URLs. See the +# section "Linking to external documentation" for more information about the use +# of tag files. +# Note: Each tag file must have a unique name (where the name does NOT include +# the path). If a tag file is not located in the directory in which doxygen is +# run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create a +# tag file that is based on the input files it reads. See section "Linking to +# external documentation" for more information about the usage of tag files. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external class will be listed in the +# class index. If set to NO only the inherited external classes will be listed. +# The default value is: NO. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in +# the modules index. If set to NO, only the current project's groups will be +# listed. +# The default value is: YES. + +EXTERNAL_GROUPS = YES + +# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in +# the related pages index. If set to NO, only the current project's pages will +# be listed. +# The default value is: YES. + +EXTERNAL_PAGES = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of 'which perl'). +# The default file (with absolute path) is: /usr/bin/perl. + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram +# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to +# NO turns the diagrams off. Note that this option also works with HAVE_DOT +# disabled, but it is recommended to install and use dot, since it yields more +# powerful graphs. +# The default value is: YES. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see: +# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# You can include diagrams made with dia in doxygen documentation. Doxygen will +# then run dia to produce the diagram and insert it in the documentation. The +# DIA_PATH tag allows you to specify the directory where the dia binary resides. +# If left empty dia is assumed to be found in the default search path. + +DIA_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide inheritance +# and usage relations if the target is undocumented or is not a class. +# The default value is: YES. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz (see: +# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent +# Bell Labs. The other options in this section have no effect if this option is +# set to NO +# The default value is: NO. + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed +# to run in parallel. When set to 0 doxygen will base this on the number of +# processors available in the system. You can set it explicitly to a value +# larger than 0 to get control over the balance between CPU load and processing +# speed. +# Minimum value: 0, maximum value: 32, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NUM_THREADS = 0 + +# When you want a differently looking font n the dot files that doxygen +# generates you can specify the font name using DOT_FONTNAME. You need to make +# sure dot is able to find the font, which can be done by putting it in a +# standard location or by setting the DOTFONTPATH environment variable or by +# setting DOT_FONTPATH to the directory containing the font. +# The default value is: Helvetica. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of +# dot graphs. +# Minimum value: 4, maximum value: 24, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the default font as specified with +# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set +# the path where dot can find it using this tag. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTPATH = + +# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for +# each documented class showing the direct and indirect inheritance relations. +# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a +# graph for each documented class showing the direct and indirect implementation +# dependencies (inheritance, containment, and class references variables) of the +# class with other documented classes. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for +# groups, showing the direct groups dependencies. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside the +# class node. If there are many fields or methods and many nodes the graph may +# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the +# number of items for each type to make the size more manageable. Set this to 0 +# for no limit. Note that the threshold may be exceeded by 50% before the limit +# is enforced. So when you set the threshold to 10, up to 15 fields may appear, +# but if the number exceeds 15, the total amount of fields shown is limited to +# 10. +# Minimum value: 0, maximum value: 100, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LIMIT_NUM_FIELDS = 10 + +# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and +# collaboration graphs will show the relations between templates and their +# instances. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +TEMPLATE_RELATIONS = NO + +# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to +# YES then doxygen will generate a graph for each documented file showing the +# direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDE_GRAPH = YES + +# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are +# set to YES then doxygen will generate a graph for each documented file showing +# the direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH tag is set to YES then doxygen will generate a call +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical +# hierarchy of all classes instead of a textual one. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the +# dependencies a directory has on other directories in a graphical way. The +# dependency relations are determined by the #include relations between the +# files in the directories. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. +# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order +# to make the SVG files visible in IE 9+ (other browsers do not have this +# requirement). +# Possible values are: png, jpg, gif and svg. +# The default value is: png. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# +# Note that this requires a modern browser other than Internet Explorer. Tested +# and working are Firefox, Chrome, Safari, and Opera. +# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make +# the SVG files visible. Older versions of IE do not have SVG support. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +INTERACTIVE_SVG = NO + +# The DOT_PATH tag can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the \dotfile +# command). +# This tag requires that the tag HAVE_DOT is set to YES. + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the \mscfile +# command). + +MSCFILE_DIRS = + +# The DIAFILE_DIRS tag can be used to specify one or more directories that +# contain dia files that are included in the documentation (see the \diafile +# command). + +DIAFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes +# that will be shown in the graph. If the number of nodes in a graph becomes +# larger than this value, doxygen will truncate the graph, which is visualized +# by representing a node as a red box. Note that doxygen if the number of direct +# children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that +# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. +# Minimum value: 0, maximum value: 10000, default value: 50. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs +# generated by dot. A depth value of 3 means that only nodes reachable from the +# root by following a path via at most 3 edges will be shown. Nodes that lay +# further from the root node will be omitted. Note that setting this option to 1 +# or 2 may greatly reduce the computation time needed for large code bases. Also +# note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. +# Minimum value: 0, maximum value: 1000, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not seem +# to support this out of the box. +# +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) support +# this, this feature is disabled by default. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page +# explaining the meaning of the various boxes and arrows in the dot generated +# graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot +# files that are used to generate the various graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_CLEANUP = YES diff --git a/src/s3select/rapidjson/doc/Doxyfile.zh-cn.in b/src/s3select/rapidjson/doc/Doxyfile.zh-cn.in new file mode 100644 index 000000000..6a08f72d6 --- /dev/null +++ b/src/s3select/rapidjson/doc/Doxyfile.zh-cn.in @@ -0,0 +1,2369 @@ +# Doxyfile 1.8.7 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = RapidJSON + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "一个C++快速JSON解析器及生成器,包含SAX/DOM风格API" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@ + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = Chinese + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = YES + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if <section_label> ... \endif and \cond <section_label> +# ... \endcond blocks. + +ENABLED_SECTIONS = $(RAPIDJSON_SECTIONS) + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. Do not use file names with spaces, bibtex cannot handle them. See +# also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = readme.zh-cn.md \ + CHANGELOG.md \ + include/rapidjson/rapidjson.h \ + include/ \ + doc/features.zh-cn.md \ + doc/tutorial.zh-cn.md \ + doc/pointer.zh-cn.md \ + doc/stream.zh-cn.md \ + doc/encoding.zh-cn.md \ + doc/dom.zh-cn.md \ + doc/sax.zh-cn.md \ + doc/schema.zh-cn.md \ + doc/performance.zh-cn.md \ + doc/internals.zh-cn.md \ + doc/faq.zh-cn.md + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.inc \ + *.md + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = ./include/rapidjson/msinttypes/ + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = internal + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = ./doc + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# <filter> <input-file> +# +# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = readme.zh-cn.md + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = NO + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# compiled with the --with-libclang option. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = NO + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html/zh-cn + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = ./doc/misc/header.html + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = ./doc/misc/footer.html + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- +# defined cascading style sheet that is included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet file to the output directory. For an example +# see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = ./doc/misc/doxygenextra.css + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = YES + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use <access key> + S +# (what the <access key> is depends on the OS and browser, but it is typically +# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down +# key> to jump into the search results window, the results can be navigated +# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel +# the search. The filter options can be selected when the cursor is inside the +# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys> +# to select a filter and <Enter> or <escape> to activate or cancel the filter +# option. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a web server instead of a web client using Javascript. There +# are two flavors of web server based searching depending on the EXTERNAL_SEARCH +# setting. When disabled, doxygen will generate a PHP script for searching and +# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing +# and searching needs to be provided by external tools. See the section +# "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SERVER_BASED_SEARCH = NO + +# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP +# script for searching. Instead the search results are written to an XML file +# which needs to be processed by an external indexer. Doxygen will invoke an +# external search engine pointed to by the SEARCHENGINE_URL option to obtain the +# search results. +# +# Doxygen ships with an example indexer ( doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). +# +# See the section "External Indexing and Searching" for details. +# The default value is: NO. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH = NO + +# The SEARCHENGINE_URL should point to a search engine hosted by a web server +# which will return the search results when EXTERNAL_SEARCH is enabled. +# +# Doxygen ships with an example indexer ( doxyindexer) and search engine +# (doxysearch.cgi) which are based on the open source search engine library +# Xapian (see: http://xapian.org/). See the section "External Indexing and +# Searching" for details. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHENGINE_URL = + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed +# search data is written to a file for indexing by an external tool. With the +# SEARCHDATA_FILE tag the name of this file can be specified. +# The default file is: searchdata.xml. +# This tag requires that the tag SEARCHENGINE is set to YES. + +SEARCHDATA_FILE = searchdata.xml + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the +# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is +# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple +# projects and redirect the results back to the right project. +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTERNAL_SEARCH_ID = + +# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen +# projects other than the one defined by this configuration file, but that are +# all added to the same external search index. Each project needs to have a +# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of +# to a relative location where the documentation can be found. The format is: +# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... +# This tag requires that the tag SEARCHENGINE is set to YES. + +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. +# The default value is: YES. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. +# +# Note that when enabling USE_PDFLATEX this option is only used for generating +# bitmaps for formulas in the HTML output, but not in the Makefile that is +# written to the output directory. +# The default file is: latex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate +# index for LaTeX. +# The default file is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used by the +# printer. +# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x +# 14 inches) and executive (7.25 x 10.5 inches). +# The default value is: a4. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names +# that should be included in the LaTeX output. To get the times font for +# instance you can specify +# EXTRA_PACKAGES=times +# If left blank no extra packages will be included. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the +# generated LaTeX document. The header should contain everything until the first +# chapter. If it is left blank doxygen will generate a standard header. See +# section "Doxygen usage" for information on how to let doxygen write the +# default header to a separate file. +# +# Note: Only use a user-defined header if you know what you are doing! The +# following commands have a special meaning inside the header: $title, +# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will +# replace them by respectively the title of the page, the current date and time, +# only the current date, the version number of doxygen, the project name (see +# PROJECT_NAME), or the project number (see PROJECT_NUMBER). +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the +# generated LaTeX document. The footer should contain everything after the last +# chapter. If it is left blank doxygen will generate a standard footer. +# +# Note: Only use a user-defined footer if you know what you are doing! +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_FOOTER = + +# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the LATEX_OUTPUT output +# directory. Note that the files will be copied as-is; there are no commands or +# markers available. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EXTRA_FILES = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is +# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will +# contain links (just like the HTML output) instead of page references. This +# makes the output suitable for online browsing using a PDF viewer. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +PDF_HYPERLINKS = YES + +# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate +# the PDF file directly from the LaTeX files. Set this option to YES to get a +# higher quality PDF documentation. +# The default value is: YES. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode +# command to the generated LaTeX files. This will instruct LaTeX to keep running +# if errors occur, instead of asking the user for help. This option is also used +# when generating formulas in HTML. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BATCHMODE = NO + +# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the +# index chapters (such as File Index, Compound Index, etc.) in the output. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_HIDE_INDICES = NO + +# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source +# code with syntax highlighting in the LaTeX output. +# +# Note that which sources are shown also depends on other settings such as +# SOURCE_BROWSER. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. See +# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# The default value is: plain. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The +# RTF output is optimized for Word 97 and may not look too pretty with other RTF +# readers/editors. +# The default value is: NO. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: rtf. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF +# documents. This may be useful for small projects and may help to save some +# trees in general. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will +# contain hyperlink fields. The RTF file will contain links (just like the HTML +# output) instead of page references. This makes the output suitable for online +# browsing using Word or some other Word compatible readers that support those +# fields. +# +# Note: WordPad (write) and others do not support links. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's config +# file, i.e. a series of assignments. You only have to provide replacements, +# missing definitions are set to their default value. +# +# See also section "Doxygen usage" for information on how to generate the +# default style sheet that doxygen normally uses. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an RTF document. Syntax is +# similar to doxygen's config file. A template extensions file can be generated +# using doxygen -e rtf extensionFile. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for +# classes and files. +# The default value is: NO. + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. A directory man3 will be created inside the directory specified by +# MAN_OUTPUT. +# The default directory is: man. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to the generated +# man pages. In case the manual section does not start with a number, the number +# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is +# optional. +# The default value is: .3. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_EXTENSION = .3 + +# The MAN_SUBDIR tag determines the name of the directory created within +# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by +# MAN_EXTENSION with the initial . removed. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_SUBDIR = + +# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it +# will generate one additional man file for each entity documented in the real +# man page(s). These additional files only source the real man page, but without +# them the man command would be unable to find the correct page. +# The default value is: NO. +# This tag requires that the tag GENERATE_MAN is set to YES. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that +# captures the structure of the code including all documentation. +# The default value is: NO. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: xml. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_OUTPUT = xml + +# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program +# listings (including syntax highlighting and cross-referencing information) to +# the XML output. Note that enabling this will significantly increase the size +# of the XML output. +# The default value is: YES. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- + +# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files +# that can be used to generate PDF. +# The default value is: NO. + +GENERATE_DOCBOOK = NO + +# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in +# front of it. +# The default directory is: docbook. +# This tag requires that the tag GENERATE_DOCBOOK is set to YES. + +DOCBOOK_OUTPUT = docbook + +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen +# Definitions (see http://autogen.sf.net) file that captures the structure of +# the code including all documentation. Note that this feature is still +# experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module +# file that captures the structure of the code including all documentation. +# +# Note that this feature is still experimental and incomplete at the moment. +# The default value is: NO. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary +# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI +# output from the Perl module output. +# The default value is: NO. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely +# formatted so it can be parsed by a human reader. This is useful if you want to +# understand what is going on. On the other hand, if this tag is set to NO the +# size of the Perl module output will be much smaller and Perl will parse it +# just the same. +# The default value is: YES. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file are +# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful +# so different doxyrules.make files included by the same Makefile don't +# overwrite each other's variables. +# This tag requires that the tag GENERATE_PERLMOD is set to YES. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all +# C-preprocessor directives found in the sources and include files. +# The default value is: YES. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names +# in the source code. If set to NO only conditional compilation will be +# performed. Macro expansion can be done in a controlled way by setting +# EXPAND_ONLY_PREDEF to YES. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then +# the macro expansion is limited to the macros specified with the PREDEFINED and +# EXPAND_AS_DEFINED tags. +# The default value is: NO. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_ONLY_PREDEF = YES + +# If the SEARCH_INCLUDES tag is set to YES the includes files in the +# INCLUDE_PATH will be searched if a #include is found. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by the +# preprocessor. +# This tag requires that the tag SEARCH_INCLUDES is set to YES. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will be +# used. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that are +# defined before the preprocessor is started (similar to the -D option of e.g. +# gcc). The argument of the tag is a list of macros of the form: name or +# name=definition (no spaces). If the definition and the "=" are omitted, "=1" +# is assumed. To prevent a macro definition from being undefined via #undef or +# recursively expanded use the := operator instead of the = operator. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +PREDEFINED = \ + RAPIDJSON_DOXYGEN_RUNNING \ + RAPIDJSON_NAMESPACE_BEGIN="namespace rapidjson {" \ + RAPIDJSON_NAMESPACE_END="}" \ + RAPIDJSON_REMOVEFPTR_(x)=x \ + RAPIDJSON_ENABLEIF_RETURN(cond,returntype)="RAPIDJSON_REMOVEFPTR_ returntype" \ + RAPIDJSON_DISABLEIF_RETURN(cond,returntype)="RAPIDJSON_REMOVEFPTR_ returntype" + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this +# tag can be used to specify a list of macro names that should be expanded. The +# macro definition that is found in the sources will be used. Use the PREDEFINED +# tag if you want to use a different macro definition that overrules the +# definition found in the source code. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +EXPAND_AS_DEFINED = \ + RAPIDJSON_NOEXCEPT + +# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will +# remove all references to function-like macros that are alone on a line, have +# an all uppercase name, and do not end with a semicolon. Such function macros +# are typically used for boiler-plate code, and will confuse the parser if not +# removed. +# The default value is: YES. +# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tag files. For each tag +# file the location of the external documentation should be added. The format of +# a tag file without this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where loc1 and loc2 can be relative or absolute paths or URLs. See the +# section "Linking to external documentation" for more information about the use +# of tag files. +# Note: Each tag file must have a unique name (where the name does NOT include +# the path). If a tag file is not located in the directory in which doxygen is +# run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create a +# tag file that is based on the input files it reads. See section "Linking to +# external documentation" for more information about the usage of tag files. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external class will be listed in the +# class index. If set to NO only the inherited external classes will be listed. +# The default value is: NO. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in +# the modules index. If set to NO, only the current project's groups will be +# listed. +# The default value is: YES. + +EXTERNAL_GROUPS = YES + +# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in +# the related pages index. If set to NO, only the current project's pages will +# be listed. +# The default value is: YES. + +EXTERNAL_PAGES = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of 'which perl'). +# The default file (with absolute path) is: /usr/bin/perl. + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram +# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to +# NO turns the diagrams off. Note that this option also works with HAVE_DOT +# disabled, but it is recommended to install and use dot, since it yields more +# powerful graphs. +# The default value is: YES. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see: +# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# You can include diagrams made with dia in doxygen documentation. Doxygen will +# then run dia to produce the diagram and insert it in the documentation. The +# DIA_PATH tag allows you to specify the directory where the dia binary resides. +# If left empty dia is assumed to be found in the default search path. + +DIA_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide inheritance +# and usage relations if the target is undocumented or is not a class. +# The default value is: YES. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz (see: +# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent +# Bell Labs. The other options in this section have no effect if this option is +# set to NO +# The default value is: NO. + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed +# to run in parallel. When set to 0 doxygen will base this on the number of +# processors available in the system. You can set it explicitly to a value +# larger than 0 to get control over the balance between CPU load and processing +# speed. +# Minimum value: 0, maximum value: 32, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NUM_THREADS = 0 + +# When you want a differently looking font n the dot files that doxygen +# generates you can specify the font name using DOT_FONTNAME. You need to make +# sure dot is able to find the font, which can be done by putting it in a +# standard location or by setting the DOTFONTPATH environment variable or by +# setting DOT_FONTPATH to the directory containing the font. +# The default value is: Helvetica. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of +# dot graphs. +# Minimum value: 4, maximum value: 24, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the default font as specified with +# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set +# the path where dot can find it using this tag. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_FONTPATH = + +# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for +# each documented class showing the direct and indirect inheritance relations. +# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a +# graph for each documented class showing the direct and indirect implementation +# dependencies (inheritance, containment, and class references variables) of the +# class with other documented classes. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for +# groups, showing the direct groups dependencies. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside the +# class node. If there are many fields or methods and many nodes the graph may +# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the +# number of items for each type to make the size more manageable. Set this to 0 +# for no limit. Note that the threshold may be exceeded by 50% before the limit +# is enforced. So when you set the threshold to 10, up to 15 fields may appear, +# but if the number exceeds 15, the total amount of fields shown is limited to +# 10. +# Minimum value: 0, maximum value: 100, default value: 10. +# This tag requires that the tag HAVE_DOT is set to YES. + +UML_LIMIT_NUM_FIELDS = 10 + +# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and +# collaboration graphs will show the relations between templates and their +# instances. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +TEMPLATE_RELATIONS = NO + +# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to +# YES then doxygen will generate a graph for each documented file showing the +# direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDE_GRAPH = YES + +# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are +# set to YES then doxygen will generate a graph for each documented file showing +# the direct and indirect include dependencies of the file with other documented +# files. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH tag is set to YES then doxygen will generate a call +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller +# dependency graph for every global function or class method. +# +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical +# hierarchy of all classes instead of a textual one. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the +# dependencies a directory has on other directories in a graphical way. The +# dependency relations are determined by the #include relations between the +# files in the directories. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. +# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order +# to make the SVG files visible in IE 9+ (other browsers do not have this +# requirement). +# Possible values are: png, jpg, gif and svg. +# The default value is: png. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# +# Note that this requires a modern browser other than Internet Explorer. Tested +# and working are Firefox, Chrome, Safari, and Opera. +# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make +# the SVG files visible. Older versions of IE do not have SVG support. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +INTERACTIVE_SVG = NO + +# The DOT_PATH tag can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the \dotfile +# command). +# This tag requires that the tag HAVE_DOT is set to YES. + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the \mscfile +# command). + +MSCFILE_DIRS = + +# The DIAFILE_DIRS tag can be used to specify one or more directories that +# contain dia files that are included in the documentation (see the \diafile +# command). + +DIAFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes +# that will be shown in the graph. If the number of nodes in a graph becomes +# larger than this value, doxygen will truncate the graph, which is visualized +# by representing a node as a red box. Note that doxygen if the number of direct +# children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that +# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. +# Minimum value: 0, maximum value: 10000, default value: 50. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs +# generated by dot. A depth value of 3 means that only nodes reachable from the +# root by following a path via at most 3 edges will be shown. Nodes that lay +# further from the root node will be omitted. Note that setting this option to 1 +# or 2 may greatly reduce the computation time needed for large code bases. Also +# note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. +# Minimum value: 0, maximum value: 1000, default value: 0. +# This tag requires that the tag HAVE_DOT is set to YES. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not seem +# to support this out of the box. +# +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) support +# this, this feature is disabled by default. +# The default value is: NO. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page +# explaining the meaning of the various boxes and arrows in the dot generated +# graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot +# files that are used to generate the various graphs. +# The default value is: YES. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_CLEANUP = YES diff --git a/src/s3select/rapidjson/doc/diagram/architecture.dot b/src/s3select/rapidjson/doc/diagram/architecture.dot new file mode 100644 index 000000000..c816c8718 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/architecture.dot @@ -0,0 +1,50 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + nodesep=0.5 + penwidth=0.5 + colorscheme=spectral7 + + node [shape=box, fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5, style=filled, fillcolor=white] + edge [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "SAX" + style=filled + fillcolor=6 + + Reader -> Writer [style=invis] + } + + subgraph cluster2 { + margin="10,10" + labeljust="left" + label = "DOM" + style=filled + fillcolor=7 + + Value + Document + } + + Handler [label="<<concept>>\nHandler"] + + { + edge [arrowtail=onormal, dir=back] + Value -> Document + Handler -> Document + Handler -> Writer + } + + { + edge [arrowhead=vee, style=dashed, constraint=false] + Reader -> Handler [label="calls"] + Value -> Handler [label="calls"] + Document -> Reader [label="uses"] + } +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/architecture.png b/src/s3select/rapidjson/doc/diagram/architecture.png Binary files differnew file mode 100644 index 000000000..556c7e729 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/architecture.png diff --git a/src/s3select/rapidjson/doc/diagram/insituparsing.dot b/src/s3select/rapidjson/doc/diagram/insituparsing.dot new file mode 100644 index 000000000..eca0e3854 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/insituparsing.dot @@ -0,0 +1,65 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + { + node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray] + oldjson [label="\{|\"|m|s|g|\"|:|\"|H|e|l|l|o|\\|n|W|o|r|l|d|!|\"|,|\"|\\|u|0|0|7|3|t|a|r|s|\"|:|1|0|\}", xlabel="Before Parsing"] + //newjson [label="\{|\"|<a>m|s|g|\\0|:|\"|<b>H|e|l|l|o|\\n|W|o|r|l|d|!|\\0|\"|,|\"|<c>s|t|a|r|s|\\0|t|a|r|s|:|1|0|\}", xlabel="After Parsing"] + newjson [shape=plaintext, label=< +<table BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="2"><tr> +<td>{</td> +<td>"</td><td port="a">m</td><td>s</td><td>g</td><td bgcolor="yellow">\\0</td> +<td>:</td> +<td>"</td><td port="b">H</td><td>e</td><td>l</td><td>l</td><td>o</td><td bgcolor="yellow">\\n</td><td bgcolor="yellow">W</td><td bgcolor="yellow">o</td><td bgcolor="yellow">r</td><td bgcolor="yellow">l</td><td bgcolor="yellow">d</td><td bgcolor="yellow">!</td><td bgcolor="yellow">\\0</td><td>"</td> +<td>,</td> +<td>"</td><td port="c" bgcolor="yellow">s</td><td bgcolor="yellow">t</td><td bgcolor="yellow">a</td><td bgcolor="yellow">r</td><td bgcolor="yellow">s</td><td bgcolor="yellow">\\0</td><td>t</td><td>a</td><td>r</td><td>s</td> +<td>:</td> +<td>1</td><td>0</td> +<td>}</td> +</tr></table> +>, xlabel="After Parsing"] + } + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Document by In situ Parsing" + style=filled + fillcolor=gray95 + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + root [label="{object|}", fillcolor=3] + + { + msg [label="{string|<a>}", fillcolor=5] + helloworld [label="{string|<a>}", fillcolor=5] + stars [label="{string|<a>}", fillcolor=5] + ten [label="{number|10}", fillcolor=6] + } + } + + oldjson -> root [label=" ParseInsitu()" lhead="cluster1"] + edge [arrowhead=vee] + root -> { msg; stars } + + edge [arrowhead="none"] + msg -> helloworld + stars -> ten + + { + edge [arrowhead=vee, arrowtail=dot, arrowsize=0.5, dir=both, tailclip=false] + msg:a:c -> newjson:a + helloworld:a:c -> newjson:b + stars:a:c -> newjson:c + } + + //oldjson -> newjson [style=invis] +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/insituparsing.png b/src/s3select/rapidjson/doc/diagram/insituparsing.png Binary files differnew file mode 100644 index 000000000..4400c8846 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/insituparsing.png diff --git a/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.dot b/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.dot new file mode 100644 index 000000000..82ebfe1f0 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.dot @@ -0,0 +1,62 @@ +digraph { + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + penwidth=0.0 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + + node [shape = doublecircle]; Start; Finish; + node [shape = box; style = "rounded, filled"; fillcolor=white ]; + + Start -> ArrayInitial [label=" ["]; + Start -> ObjectInitial [label=" {"]; + + subgraph clusterArray { + margin="10,10" + style=filled + fillcolor=gray95 + label = "Array" + + ArrayInitial; Element; ElementDelimiter; ArrayFinish; + } + + subgraph clusterObject { + margin="10,10" + style=filled + fillcolor=gray95 + label = "Object" + + ObjectInitial; MemberKey; KeyValueDelimiter; MemberValue; MemberDelimiter; ObjectFinish; + } + + ArrayInitial -> ArrayInitial [label="["]; + ArrayInitial -> ArrayFinish [label=" ]"]; + ArrayInitial -> ObjectInitial [label="{", constraint=false]; + ArrayInitial -> Element [label="string\nfalse\ntrue\nnull\nnumber"]; + + Element -> ArrayFinish [label="]"]; + Element -> ElementDelimiter [label=","]; + + ElementDelimiter -> ArrayInitial [label=" ["]; + ElementDelimiter -> ObjectInitial [label="{"]; + ElementDelimiter -> Element [label="string\nfalse\ntrue\nnull\nnumber"]; + + ObjectInitial -> ObjectFinish [label=" }"]; + ObjectInitial -> MemberKey [label=" string "]; + + MemberKey -> KeyValueDelimiter [label=":"]; + + KeyValueDelimiter -> ArrayInitial [label="["]; + KeyValueDelimiter -> ObjectInitial [label=" {"]; + KeyValueDelimiter -> MemberValue [label=" string\n false\n true\n null\n number"]; + + MemberValue -> ObjectFinish [label="}"]; + MemberValue -> MemberDelimiter [label=","]; + + MemberDelimiter -> MemberKey [label=" string "]; + + ArrayFinish -> Finish; + ObjectFinish -> Finish; +} diff --git a/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.png b/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.png Binary files differnew file mode 100644 index 000000000..f315494db --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/iterative-parser-states-diagram.png diff --git a/src/s3select/rapidjson/doc/diagram/makefile b/src/s3select/rapidjson/doc/diagram/makefile new file mode 100644 index 000000000..348397765 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/makefile @@ -0,0 +1,8 @@ +%.pdf: %.dot + dot $< -Tpdf -o $@ + +%.png: %.dot + dot $< -Tpng -o $@ + +DOTFILES = $(basename $(wildcard *.dot)) +all: $(addsuffix .png, $(DOTFILES)) $(addsuffix .pdf, $(DOTFILES)) diff --git a/src/s3select/rapidjson/doc/diagram/move1.dot b/src/s3select/rapidjson/doc/diagram/move1.dot new file mode 100644 index 000000000..a7c1464ad --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move1.dot @@ -0,0 +1,47 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Before" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + { + rank = same + b1 [label="{b:number|456}", fillcolor=6] + a1 [label="{a:number|123}", fillcolor=6] + } + + a1 -> b1 [style="dashed", label="Move", dir=back] + } + + subgraph cluster2 { + margin="10,10" + labeljust="left" + label = "After" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + { + rank = same + b2 [label="{b:null|}", fillcolor=1] + a2 [label="{a:number|456}", fillcolor=6] + } + a2 -> b2 [style=invis, dir=back] + } + b1 -> b2 [style=invis] +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/move1.png b/src/s3select/rapidjson/doc/diagram/move1.png Binary files differnew file mode 100644 index 000000000..ab322d084 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move1.png diff --git a/src/s3select/rapidjson/doc/diagram/move2.dot b/src/s3select/rapidjson/doc/diagram/move2.dot new file mode 100644 index 000000000..2319871b9 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move2.dot @@ -0,0 +1,62 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Before Copying (Hypothetic)" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + c1 [label="{contacts:array|}", fillcolor=4] + c11 [label="{|}"] + c12 [label="{|}"] + c13 [shape="none", label="...", style="solid"] + o1 [label="{o:object|}", fillcolor=3] + ghost [label="{o:object|}", style=invis] + + c1 -> o1 [style="dashed", label="AddMember", constraint=false] + + edge [arrowhead=vee] + c1 -> { c11; c12; c13 } + o1 -> ghost [style=invis] + } + + subgraph cluster2 { + margin="10,10" + labeljust="left" + label = "After Copying (Hypothetic)" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + c2 [label="{contacts:array|}", fillcolor=4] + c3 [label="{array|}", fillcolor=4] + c21 [label="{|}"] + c22 [label="{|}"] + c23 [shape=none, label="...", style="solid"] + o2 [label="{o:object|}", fillcolor=3] + cs [label="{string|\"contacts\"}", fillcolor=5] + c31 [label="{|}"] + c32 [label="{|}"] + c33 [shape="none", label="...", style="solid"] + + edge [arrowhead=vee] + c2 -> { c21; c22; c23 } + o2 -> cs + cs -> c3 [arrowhead=none] + c3 -> { c31; c32; c33 } + } + ghost -> o2 [style=invis] +} diff --git a/src/s3select/rapidjson/doc/diagram/move2.png b/src/s3select/rapidjson/doc/diagram/move2.png Binary files differnew file mode 100644 index 000000000..8d4fc5bcc --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move2.png diff --git a/src/s3select/rapidjson/doc/diagram/move3.dot b/src/s3select/rapidjson/doc/diagram/move3.dot new file mode 100644 index 000000000..57adb4f9d --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move3.dot @@ -0,0 +1,60 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + forcelabels=true + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Before Moving" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + c1 [label="{contacts:array|}", fillcolor=4] + c11 [label="{|}"] + c12 [label="{|}"] + c13 [shape=none, label="...", style="solid"] + o1 [label="{o:object|}", fillcolor=3] + ghost [label="{o:object|}", style=invis] + + c1 -> o1 [style="dashed", constraint=false, label="AddMember"] + + edge [arrowhead=vee] + c1 -> { c11; c12; c13 } + o1 -> ghost [style=invis] + } + + subgraph cluster2 { + margin="10,10" + labeljust="left" + label = "After Moving" + style=filled + fillcolor=gray95 + + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + c2 [label="{contacts:null|}", fillcolor=1] + c3 [label="{array|}", fillcolor=4] + c21 [label="{|}"] + c22 [label="{|}"] + c23 [shape="none", label="...", style="solid"] + o2 [label="{o:object|}", fillcolor=3] + cs [label="{string|\"contacts\"}", fillcolor=5] + c2 -> o2 [style="dashed", constraint=false, label="AddMember", style=invis] + + edge [arrowhead=vee] + c3 -> { c21; c22; c23 } + o2 -> cs + cs -> c3 [arrowhead=none] + } + ghost -> o2 [style=invis] +} diff --git a/src/s3select/rapidjson/doc/diagram/move3.png b/src/s3select/rapidjson/doc/diagram/move3.png Binary files differnew file mode 100644 index 000000000..558470f1f --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/move3.png diff --git a/src/s3select/rapidjson/doc/diagram/normalparsing.dot b/src/s3select/rapidjson/doc/diagram/normalparsing.dot new file mode 100644 index 000000000..b15941ba0 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/normalparsing.dot @@ -0,0 +1,56 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + { + node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray] + normaljson [label="\{|\"|m|s|g|\"|:|\"|H|e|l|l|o|\\|n|W|o|r|l|d|!|\"|,|\"|\\|u|0|0|7|3|t|a|r|s\"|:|1|0|\}"] + + { + rank = same + msgstring [label="m|s|g|\\0"] + helloworldstring [label="H|e|l|l|o|\\n|W|o|r|l|d|!|\\0"] + starsstring [label="s|t|a|r|s\\0"] + } + } + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Document by Normal Parsing" + style=filled + fillcolor=gray95 + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + root [label="{object|}", fillcolor=3] + + { + msg [label="{string|<a>}", fillcolor=5] + helloworld [label="{string|<a>}", fillcolor=5] + stars [label="{string|<a>}", fillcolor=5] + ten [label="{number|10}", fillcolor=6] + } + } + + normaljson -> root [label=" Parse()" lhead="cluster1"] + edge [arrowhead=vee] + root -> { msg; stars } + + edge [arrowhead="none"] + msg -> helloworld + stars -> ten + + edge [arrowhead=vee, arrowtail=dot, arrowsize=0.5, dir=both, tailclip=false] + msg:a:c -> msgstring:w + helloworld:a:c -> helloworldstring:w + stars:a:c -> starsstring:w + + msgstring -> helloworldstring -> starsstring [style=invis] +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/normalparsing.png b/src/s3select/rapidjson/doc/diagram/normalparsing.png Binary files differnew file mode 100644 index 000000000..702512ca3 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/normalparsing.png diff --git a/src/s3select/rapidjson/doc/diagram/simpledom.dot b/src/s3select/rapidjson/doc/diagram/simpledom.dot new file mode 100644 index 000000000..959cdbb41 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/simpledom.dot @@ -0,0 +1,54 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10, arrowhead=normal] + + { + node [shape=record, fontsize="8", margin="0.04", height=0.2, color=gray] + srcjson [label="\{|\"|p|r|o|j|e|c|t|\"|:|\"|r|a|p|i|d|j|s|o|n|\"|,|\"|s|t|a|r|s|\"|:|1|0|\}"] + dstjson [label="\{|\"|p|r|o|j|e|c|t|\"|:|\"|r|a|p|i|d|j|s|o|n|\"|,|\"|s|t|a|r|s|\"|:|1|1|\}"] + } + + { + node [shape="box", style="filled", fillcolor="gray95"] + Document2 [label="(Modified) Document"] + Writer + } + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Document" + style=filled + fillcolor=gray95 + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + root [label="{object|}", fillcolor=3] + + { + project [label="{string|\"project\"}", fillcolor=5] + rapidjson [label="{string|\"rapidjson\"}", fillcolor=5] + stars [label="{string|\"stars\"}", fillcolor=5] + ten [label="{number|10}", fillcolor=6] + } + + edge [arrowhead=vee] + root -> { project; stars } + + edge [arrowhead="none"] + project -> rapidjson + stars -> ten + } + + srcjson -> root [label=" Parse()", lhead="cluster1"] + + ten -> Document2 [label=" Increase \"stars\"", ltail="cluster1" ] + Document2 -> Writer [label=" Traverse DOM by Accept()"] + Writer -> dstjson [label=" Output to StringBuffer"] +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/simpledom.png b/src/s3select/rapidjson/doc/diagram/simpledom.png Binary files differnew file mode 100644 index 000000000..38d9c5dc1 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/simpledom.png diff --git a/src/s3select/rapidjson/doc/diagram/tutorial.dot b/src/s3select/rapidjson/doc/diagram/tutorial.dot new file mode 100644 index 000000000..138ddc381 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/tutorial.dot @@ -0,0 +1,58 @@ +digraph { + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.2 + penwidth=0.5 + + node [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + edge [fontname="Inconsolata, Consolas", fontsize=10] + + subgraph cluster1 { + margin="10,10" + labeljust="left" + label = "Document" + style=filled + fillcolor=gray95 + node [shape=Mrecord, style=filled, colorscheme=spectral7] + + root [label="{object|}", fillcolor=3] + + { + hello [label="{string|\"hello\"}", fillcolor=5] + t [label="{string|\"t\"}", fillcolor=5] + f [label="{string|\"f\"}", fillcolor=5] + n [label="{string|\"n\"}", fillcolor=5] + i [label="{string|\"i\"}", fillcolor=5] + pi [label="{string|\"pi\"}", fillcolor=5] + a [label="{string|\"a\"}", fillcolor=5] + + world [label="{string|\"world\"}", fillcolor=5] + true [label="{true|}", fillcolor=7] + false [label="{false|}", fillcolor=2] + null [label="{null|}", fillcolor=1] + i1 [label="{number|123}", fillcolor=6] + pi1 [label="{number|3.1416}", fillcolor=6] + array [label="{array|size=4}", fillcolor=4] + + a1 [label="{number|1}", fillcolor=6] + a2 [label="{number|2}", fillcolor=6] + a3 [label="{number|3}", fillcolor=6] + a4 [label="{number|4}", fillcolor=6] + } + + edge [arrowhead=vee] + root -> { hello; t; f; n; i; pi; a } + array -> { a1; a2; a3; a4 } + + edge [arrowhead=none] + hello -> world + t -> true + f -> false + n -> null + i -> i1 + pi -> pi1 + a -> array + } +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/tutorial.png b/src/s3select/rapidjson/doc/diagram/tutorial.png Binary files differnew file mode 100644 index 000000000..8a12924ac --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/tutorial.png diff --git a/src/s3select/rapidjson/doc/diagram/utilityclass.dot b/src/s3select/rapidjson/doc/diagram/utilityclass.dot new file mode 100644 index 000000000..1492a8a7a --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/utilityclass.dot @@ -0,0 +1,73 @@ +digraph { + rankdir=LR + compound=true + fontname="Inconsolata, Consolas" + fontsize=10 + margin="0,0" + ranksep=0.3 + nodesep=0.15 + penwidth=0.5 + colorscheme=spectral7 + + node [shape=box, fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5, style=filled, fillcolor=white] + edge [fontname="Inconsolata, Consolas", fontsize=10, penwidth=0.5] + + subgraph cluster0 { + style=filled + fillcolor=4 + + Encoding [label="<<concept>>\nEncoding"] + + edge [arrowtail=onormal, dir=back] + Encoding -> { UTF8; UTF16; UTF32; ASCII; AutoUTF } + UTF16 -> { UTF16LE; UTF16BE } + UTF32 -> { UTF32LE; UTF32BE } + } + + subgraph cluster1 { + style=filled + fillcolor=5 + + Stream [label="<<concept>>\nStream"] + InputByteStream [label="<<concept>>\nInputByteStream"] + OutputByteStream [label="<<concept>>\nOutputByteStream"] + + edge [arrowtail=onormal, dir=back] + Stream -> { + StringStream; InsituStringStream; StringBuffer; + EncodedInputStream; EncodedOutputStream; + AutoUTFInputStream; AutoUTFOutputStream + InputByteStream; OutputByteStream + } + + InputByteStream -> { MemoryStream; FlieReadStream } + OutputByteStream -> { MemoryBuffer; FileWriteStream } + } + + subgraph cluster2 { + style=filled + fillcolor=3 + + Allocator [label="<<concept>>\nAllocator"] + + edge [arrowtail=onormal, dir=back] + Allocator -> { CrtAllocator; MemoryPoolAllocator } + } + + { + edge [arrowtail=odiamond, arrowhead=vee, dir=both] + EncodedInputStream -> InputByteStream + EncodedOutputStream -> OutputByteStream + AutoUTFInputStream -> InputByteStream + AutoUTFOutputStream -> OutputByteStream + MemoryPoolAllocator -> Allocator [label="base", tailport=s] + } + + { + edge [arrowhead=vee, style=dashed] + AutoUTFInputStream -> AutoUTF + AutoUTFOutputStream -> AutoUTF + } + + //UTF32LE -> Stream [style=invis] +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/diagram/utilityclass.png b/src/s3select/rapidjson/doc/diagram/utilityclass.png Binary files differnew file mode 100644 index 000000000..ce029a4d0 --- /dev/null +++ b/src/s3select/rapidjson/doc/diagram/utilityclass.png diff --git a/src/s3select/rapidjson/doc/dom.md b/src/s3select/rapidjson/doc/dom.md new file mode 100644 index 000000000..6992e77d1 --- /dev/null +++ b/src/s3select/rapidjson/doc/dom.md @@ -0,0 +1,281 @@ +# DOM + +Document Object Model(DOM) is an in-memory representation of JSON for query and manipulation. The basic usage of DOM is described in [Tutorial](doc/tutorial.md). This section will describe some details and more advanced usages. + +[TOC] + +# Template {#Template} + +In the tutorial, `Value` and `Document` was used. Similarly to `std::string`, these are actually `typedef` of template classes: + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > +class GenericValue { + // ... +}; + +template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > +class GenericDocument : public GenericValue<Encoding, Allocator> { + // ... +}; + +typedef GenericValue<UTF8<> > Value; +typedef GenericDocument<UTF8<> > Document; + +} // namespace rapidjson +~~~~~~~~~~ + +User can customize these template parameters. + +## Encoding {#Encoding} + +The `Encoding` parameter specifies the encoding of JSON String value in memory. Possible options are `UTF8`, `UTF16`, `UTF32`. Note that, these 3 types are also template class. `UTF8<>` is `UTF8<char>`, which means using char to store the characters. You may refer to [Encoding](doc/encoding.md) for details. + +Suppose a Windows application would query localization strings stored in JSON files. Unicode-enabled functions in Windows use UTF-16 (wide character) encoding. No matter what encoding was used in JSON files, we can store the strings in UTF-16 in memory. + +~~~~~~~~~~cpp +using namespace rapidjson; + +typedef GenericDocument<UTF16<> > WDocument; +typedef GenericValue<UTF16<> > WValue; + +FILE* fp = fopen("localization.json", "rb"); // non-Windows use "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // wraps bis into eis + +WDocument d; +d.ParseStream<0, AutoUTF<unsigned> >(eis); + +const WValue locale(L"ja"); // Japanese + +MessageBoxW(hWnd, d[locale].GetString(), L"Test", MB_OK); +~~~~~~~~~~ + +## Allocator {#Allocator} + +The `Allocator` defines which allocator class is used when allocating/deallocating memory for `Document`/`Value`. `Document` owns, or references to an `Allocator` instance. On the other hand, `Value` does not do so, in order to reduce memory consumption. + +The default allocator used in `GenericDocument` is `MemoryPoolAllocator`. This allocator actually allocate memory sequentially, and cannot deallocate one by one. This is very suitable when parsing a JSON into a DOM tree. + +Another allocator is `CrtAllocator`, of which CRT is short for C RunTime library. This allocator simply calls the standard `malloc()`/`realloc()`/`free()`. When there is a lot of add and remove operations, this allocator may be preferred. But this allocator is far less efficient than `MemoryPoolAllocator`. + +# Parsing {#Parsing} + +`Document` provides several functions for parsing. In below, (1) is the fundamental function, while the others are helpers which call (1). + +~~~~~~~~~~cpp +using namespace rapidjson; + +// (1) Fundamental +template <unsigned parseFlags, typename SourceEncoding, typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (2) Using the same Encoding for stream +template <unsigned parseFlags, typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (3) Using default parse flags +template <typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (4) In situ parsing +template <unsigned parseFlags> +GenericDocument& GenericDocument::ParseInsitu(Ch* str); + +// (5) In situ parsing, using default parse flags +GenericDocument& GenericDocument::ParseInsitu(Ch* str); + +// (6) Normal parsing of a string +template <unsigned parseFlags, typename SourceEncoding> +GenericDocument& GenericDocument::Parse(const Ch* str); + +// (7) Normal parsing of a string, using same Encoding of Document +template <unsigned parseFlags> +GenericDocument& GenericDocument::Parse(const Ch* str); + +// (8) Normal parsing of a string, using default parse flags +GenericDocument& GenericDocument::Parse(const Ch* str); +~~~~~~~~~~ + +The examples of [tutorial](doc/tutorial.md) uses (8) for normal parsing of string. The examples of [stream](doc/stream.md) uses the first three. *In situ* parsing will be described soon. + +The `parseFlags` are combination of the following bit-flags: + +Parse flags | Meaning +------------------------------|----------------------------------- +`kParseNoFlags` | No flag is set. +`kParseDefaultFlags` | Default parse flags. It is equal to macro `RAPIDJSON_PARSE_DEFAULT_FLAGS`, which is defined as `kParseNoFlags`. +`kParseInsituFlag` | In-situ(destructive) parsing. +`kParseValidateEncodingFlag` | Validate encoding of JSON strings. +`kParseIterativeFlag` | Iterative(constant complexity in terms of function call stack size) parsing. +`kParseStopWhenDoneFlag` | After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate `kParseErrorDocumentRootNotSingular` error. Using this flag for parsing multiple JSONs in the same stream. +`kParseFullPrecisionFlag` | Parse number in full precision (slower). If this flag is not set, the normal precision (faster) is used. Normal precision has maximum 3 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) error. +`kParseCommentsFlag` | Allow one-line `// ...` and multi-line `/* ... */` comments (relaxed JSON syntax). +`kParseNumbersAsStringsFlag` | Parse numerical type values as strings. +`kParseTrailingCommasFlag` | Allow trailing commas at the end of objects and arrays (relaxed JSON syntax). +`kParseNanAndInfFlag` | Allow parsing `NaN`, `Inf`, `Infinity`, `-Inf` and `-Infinity` as `double` values (relaxed JSON syntax). +`kParseEscapedApostropheFlag` | Allow escaped apostrophe `\'` in strings (relaxed JSON syntax). + +By using a non-type template parameter, instead of a function parameter, C++ compiler can generate code which is optimized for specified combinations, improving speed, and reducing code size (if only using a single specialization). The downside is the flags needed to be determined in compile-time. + +The `SourceEncoding` parameter defines what encoding is in the stream. This can be differed to the `Encoding` of the `Document`. See [Transcoding and Validation](#TranscodingAndValidation) section for details. + +And the `InputStream` is type of input stream. + +## Parse Error {#ParseError} + +When the parse processing succeeded, the `Document` contains the parse results. When there is an error, the original DOM is *unchanged*. And the error state of parsing can be obtained by `bool HasParseError()`, `ParseErrorCode GetParseError()` and `size_t GetErrorOffset()`. + +Parse Error Code | Description +--------------------------------------------|--------------------------------------------------- +`kParseErrorNone` | No error. +`kParseErrorDocumentEmpty` | The document is empty. +`kParseErrorDocumentRootNotSingular` | The document root must not follow by other values. +`kParseErrorValueInvalid` | Invalid value. +`kParseErrorObjectMissName` | Missing a name for object member. +`kParseErrorObjectMissColon` | Missing a colon after a name of object member. +`kParseErrorObjectMissCommaOrCurlyBracket` | Missing a comma or `}` after an object member. +`kParseErrorArrayMissCommaOrSquareBracket` | Missing a comma or `]` after an array element. +`kParseErrorStringUnicodeEscapeInvalidHex` | Incorrect hex digit after `\\u` escape in string. +`kParseErrorStringUnicodeSurrogateInvalid` | The surrogate pair in string is invalid. +`kParseErrorStringEscapeInvalid` | Invalid escape character in string. +`kParseErrorStringMissQuotationMark` | Missing a closing quotation mark in string. +`kParseErrorStringInvalidEncoding` | Invalid encoding in string. +`kParseErrorNumberTooBig` | Number too big to be stored in `double`. +`kParseErrorNumberMissFraction` | Miss fraction part in number. +`kParseErrorNumberMissExponent` | Miss exponent in number. + +The offset of error is defined as the character number from beginning of stream. Currently RapidJSON does not keep track of line number. + +To get an error message, RapidJSON provided a English messages in `rapidjson/error/en.h`. User can customize it for other locales, or use a custom localization system. + +Here shows an example of parse error handling. + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/error/en.h" + +// ... +Document d; +if (d.Parse(json).HasParseError()) { + fprintf(stderr, "\nError(offset %u): %s\n", + (unsigned)d.GetErrorOffset(), + GetParseError_En(d.GetParseError())); + // ... +} +~~~~~~~~~~ + +## In Situ Parsing {#InSituParsing} + +From [Wikipedia](http://en.wikipedia.org/wiki/In_situ): + +> *In situ* ... is a Latin phrase that translates literally to "on site" or "in position". It means "locally", "on site", "on the premises" or "in place" to describe an event where it takes place, and is used in many different contexts. +> ... +> (In computer science) An algorithm is said to be an in situ algorithm, or in-place algorithm, if the extra amount of memory required to execute the algorithm is O(1), that is, does not exceed a constant no matter how large the input. For example, heapsort is an in situ sorting algorithm. + +In normal parsing process, a large overhead is to decode JSON strings and copy them to other buffers. *In situ* parsing decodes those JSON string at the place where it is stored. It is possible in JSON because the length of decoded string is always shorter than or equal to the one in JSON. In this context, decoding a JSON string means to process the escapes, such as `"\n"`, `"\u1234"`, etc., and add a null terminator (`'\0'`)at the end of string. + +The following diagrams compare normal and *in situ* parsing. The JSON string values contain pointers to the decoded string. + +![normal parsing](diagram/normalparsing.png) + +In normal parsing, the decoded string are copied to freshly allocated buffers. `"\\n"` (2 characters) is decoded as `"\n"` (1 character). `"\\u0073"` (6 characters) is decoded as `"s"` (1 character). + +![instiu parsing](diagram/insituparsing.png) + +*In situ* parsing just modified the original JSON. Updated characters are highlighted in the diagram. If the JSON string does not contain escape character, such as `"msg"`, the parsing process merely replace the closing double quotation mark with a null character. + +Since *in situ* parsing modify the input, the parsing API needs `char*` instead of `const char*`. + +~~~~~~~~~~cpp +// Read whole file into a buffer +FILE* fp = fopen("test.json", "r"); +fseek(fp, 0, SEEK_END); +size_t filesize = (size_t)ftell(fp); +fseek(fp, 0, SEEK_SET); +char* buffer = (char*)malloc(filesize + 1); +size_t readLength = fread(buffer, 1, filesize, fp); +buffer[readLength] = '\0'; +fclose(fp); + +// In situ parsing the buffer into d, buffer will also be modified +Document d; +d.ParseInsitu(buffer); + +// Query/manipulate the DOM here... + +free(buffer); +// Note: At this point, d may have dangling pointers pointed to the deallocated buffer. +~~~~~~~~~~ + +The JSON strings are marked as const-string. But they may not be really "constant". The life cycle of it depends on the JSON buffer. + +In situ parsing minimizes allocation overheads and memory copying. Generally this improves cache coherence, which is an important factor of performance in modern computer. + +There are some limitations of *in situ* parsing: + +1. The whole JSON is in memory. +2. The source encoding in stream and target encoding in document must be the same. +3. The buffer need to be retained until the document is no longer used. +4. If the DOM need to be used for long period after parsing, and there are few JSON strings in the DOM, retaining the buffer may be a memory waste. + +*In situ* parsing is mostly suitable for short-term JSON that only need to be processed once, and then be released from memory. In practice, these situation is very common, for example, deserializing JSON to C++ objects, processing web requests represented in JSON, etc. + +## Transcoding and Validation {#TranscodingAndValidation} + +RapidJSON supports conversion between Unicode formats (officially termed UCS Transformation Format) internally. During DOM parsing, the source encoding of the stream can be different from the encoding of the DOM. For example, the source stream contains a UTF-8 JSON, while the DOM is using UTF-16 encoding. There is an example code in [EncodedInputStream](doc/stream.md). + +When writing a JSON from DOM to output stream, transcoding can also be used. An example is in [EncodedOutputStream](doc/stream.md). + +During transcoding, the source string is decoded to into Unicode code points, and then the code points are encoded in the target format. During decoding, it will validate the byte sequence in the source string. If it is not a valid sequence, the parser will be stopped with `kParseErrorStringInvalidEncoding` error. + +When the source encoding of stream is the same as encoding of DOM, by default, the parser will *not* validate the sequence. User may use `kParseValidateEncodingFlag` to force validation. + +# Techniques {#Techniques} + +Some techniques about using DOM API is discussed here. + +## DOM as SAX Event Publisher + +In RapidJSON, stringifying a DOM with `Writer` may be look a little bit weird. + +~~~~~~~~~~cpp +// ... +Writer<StringBuffer> writer(buffer); +d.Accept(writer); +~~~~~~~~~~ + +Actually, `Value::Accept()` is responsible for publishing SAX events about the value to the handler. With this design, `Value` and `Writer` are decoupled. `Value` can generate SAX events, and `Writer` can handle those events. + +User may create custom handlers for transforming the DOM into other formats. For example, a handler which converts the DOM into XML. + +For more about SAX events and handler, please refer to [SAX](doc/sax.md). + +## User Buffer {#UserBuffer} + +Some applications may try to avoid memory allocations whenever possible. + +`MemoryPoolAllocator` can support this by letting user to provide a buffer. The buffer can be on the program stack, or a "scratch buffer" which is statically allocated (a static/global array) for storing temporary data. + +`MemoryPoolAllocator` will use the user buffer to satisfy allocations. When the user buffer is used up, it will allocate a chunk of memory from the base allocator (by default the `CrtAllocator`). + +Here is an example of using stack memory. The first allocator is for storing values, while the second allocator is for storing temporary data during parsing. + +~~~~~~~~~~cpp +typedef GenericDocument<UTF8<>, MemoryPoolAllocator<>, MemoryPoolAllocator<>> DocumentType; +char valueBuffer[4096]; +char parseBuffer[1024]; +MemoryPoolAllocator<> valueAllocator(valueBuffer, sizeof(valueBuffer)); +MemoryPoolAllocator<> parseAllocator(parseBuffer, sizeof(parseBuffer)); +DocumentType d(&valueAllocator, sizeof(parseBuffer), &parseAllocator); +d.Parse(json); +~~~~~~~~~~ + +If the total size of allocation is less than 4096+1024 bytes during parsing, this code does not invoke any heap allocation (via `new` or `malloc()`) at all. + +User can query the current memory consumption in bytes via `MemoryPoolAllocator::Size()`. And then user can determine a suitable size of user buffer. diff --git a/src/s3select/rapidjson/doc/dom.zh-cn.md b/src/s3select/rapidjson/doc/dom.zh-cn.md new file mode 100644 index 000000000..7a555dc1c --- /dev/null +++ b/src/s3select/rapidjson/doc/dom.zh-cn.md @@ -0,0 +1,285 @@ +# DOM + +文档对象模型(Document Object Model, DOM)是一种罝于内存中的 JSON 表示方式,以供查询及操作。我们已于 [教程](doc/tutorial.zh-cn.md) 中介绍了 DOM 的基本用法,本节将讲述一些细节及高级用法。 + +[TOC] + +# 模板 {#Template} + +教程中使用了 `Value` 和 `Document` 类型。与 `std::string` 相似,这些类型其实是两个模板类的 `typedef`: + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > +class GenericValue { + // ... +}; + +template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > +class GenericDocument : public GenericValue<Encoding, Allocator> { + // ... +}; + +typedef GenericValue<UTF8<> > Value; +typedef GenericDocument<UTF8<> > Document; + +} // namespace rapidjson +~~~~~~~~~~ + +使用者可以自定义这些模板参数。 + +## 编码 {#Encoding} + +`Encoding` 参数指明在内存中的 JSON String 使用哪种编码。可行的选项有 `UTF8`、`UTF16`、`UTF32`。要注意这 3 个类型其实也是模板类。`UTF8<>` 等同 `UTF8<char>`,这代表它使用 `char` 来存储字符串。更多细节可以参考 [编码](doc/encoding.zh-cn.md)。 + +这里是一个例子。假设一个 Windows 应用软件希望查询存储于 JSON 中的本地化字符串。Windows 中含 Unicode 的函数使用 UTF-16(宽字符)编码。无论 JSON 文件使用哪种编码,我们都可以把字符串以 UTF-16 形式存储在内存。 + +~~~~~~~~~~cpp +using namespace rapidjson; + +typedef GenericDocument<UTF16<> > WDocument; +typedef GenericValue<UTF16<> > WValue; + +FILE* fp = fopen("localization.json", "rb"); // 非 Windows 平台使用 "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // 包装 bis 成 eis + +WDocument d; +d.ParseStream<0, AutoUTF<unsigned> >(eis); + +const WValue locale(L"ja"); // Japanese + +MessageBoxW(hWnd, d[locale].GetString(), L"Test", MB_OK); +~~~~~~~~~~ + +## 分配器 {#Allocator} + +`Allocator` 定义当 `Document`/`Value` 分配或释放内存时使用那个分配类。`Document` 拥有或引用到一个 `Allocator` 实例。而为了节省内存,`Value` 没有这么做。 + +`GenericDocument` 的缺省分配器是 `MemoryPoolAllocator`。此分配器实际上会顺序地分配内存,并且不能逐一释放。当要解析一个 JSON 并生成 DOM,这种分配器是非常合适的。 + +RapidJSON 还提供另一个分配器 `CrtAllocator`,当中 CRT 是 C 运行库(C RunTime library)的缩写。此分配器简单地读用标准的 `malloc()`/`realloc()`/`free()`。当我们需要许多增减操作,这种分配器会更为适合。然而这种分配器远远比 `MemoryPoolAllocator` 低效。 + +# 解析 {#Parsing} + +`Document` 提供几个解析函数。以下的 (1) 是根本的函数,其他都是调用 (1) 的协助函数。 + +~~~~~~~~~~cpp +using namespace rapidjson; + +// (1) 根本 +template <unsigned parseFlags, typename SourceEncoding, typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (2) 使用流的编码 +template <unsigned parseFlags, typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (3) 使用缺省标志 +template <typename InputStream> +GenericDocument& GenericDocument::ParseStream(InputStream& is); + +// (4) 原位解析 +template <unsigned parseFlags> +GenericDocument& GenericDocument::ParseInsitu(Ch* str); + +// (5) 原位解析,使用缺省标志 +GenericDocument& GenericDocument::ParseInsitu(Ch* str); + +// (6) 正常解析一个字符串 +template <unsigned parseFlags, typename SourceEncoding> +GenericDocument& GenericDocument::Parse(const Ch* str); + +// (7) 正常解析一个字符串,使用 Document 的编码 +template <unsigned parseFlags> +GenericDocument& GenericDocument::Parse(const Ch* str); + +// (8) 正常解析一个字符串,使用缺省标志 +GenericDocument& GenericDocument::Parse(const Ch* str); +~~~~~~~~~~ + +[教程](doc/tutorial.zh-cn.md) 中的例使用 (8) 去正常解析字符串。而 [流](doc/stream.zh-cn.md) 的例子使用前 3 个函数。我们将稍后介绍原位(*In situ*) 解析。 + +`parseFlags` 是以下位标置的组合: + +解析位标志 | 意义 +------------------------------|----------------------------------- +`kParseNoFlags` | 没有任何标志。 +`kParseDefaultFlags` | 缺省的解析选项。它等于 `RAPIDJSON_PARSE_DEFAULT_FLAGS` 宏,此宏定义为 `kParseNoFlags`。 +`kParseInsituFlag` | 原位(破坏性)解析。 +`kParseValidateEncodingFlag` | 校验 JSON 字符串的编码。 +`kParseIterativeFlag` | 迭代式(调用堆栈大小为常数复杂度)解析。 +`kParseStopWhenDoneFlag` | 当从流解析了一个完整的 JSON 根节点之后,停止继续处理余下的流。当使用了此标志,解析器便不会产生 `kParseErrorDocumentRootNotSingular` 错误。可使用本标志去解析同一个流里的多个 JSON。 +`kParseFullPrecisionFlag` | 使用完整的精确度去解析数字(较慢)。如不设置此标节,则会使用正常的精确度(较快)。正常精确度会有最多 3 个 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) 的误差。 +`kParseCommentsFlag` | 容许单行 `// ...` 及多行 `/* ... */` 注释(放宽的 JSON 语法)。 +`kParseNumbersAsStringsFlag` | 把数字类型解析成字符串。 +`kParseTrailingCommasFlag` | 容许在对象和数组结束前含有逗号(放宽的 JSON 语法)。 +`kParseNanAndInfFlag` | 容许 `NaN`、`Inf`、`Infinity`、`-Inf` 及 `-Infinity` 作为 `double` 值(放宽的 JSON 语法)。 +`kParseEscapedApostropheFlag` | 容许字符串中转义单引号 `\'` (放宽的 JSON 语法)。 + +由于使用了非类型模板参数,而不是函数参数,C++ 编译器能为个别组合生成代码,以改善性能及减少代码尺寸(当只用单种特化)。缺点是需要在编译期决定标志。 + +`SourceEncoding` 参数定义流使用了什么编码。这与 `Document` 的 `Encoding` 不相同。细节可参考 [转码和校验](#TranscodingAndValidation) 一节。 + +此外 `InputStream` 是输入流的类型。 + +## 解析错误 {#ParseError} + +当解析过程顺利完成,`Document` 便会含有解析结果。当过程出现错误,原来的 DOM 会*维持不变*。可使用 `bool HasParseError()`、`ParseErrorCode GetParseError()` 及 `size_t GetErrorOffset()` 获取解析的错误状态。 + +解析错误代号 | 描述 +--------------------------------------------|--------------------------------------------------- +`kParseErrorNone` | 无错误。 +`kParseErrorDocumentEmpty` | 文档是空的。 +`kParseErrorDocumentRootNotSingular` | 文档的根后面不能有其它值。 +`kParseErrorValueInvalid` | 不合法的值。 +`kParseErrorObjectMissName` | Object 成员缺少名字。 +`kParseErrorObjectMissColon` | Object 成员名字后缺少冒号。 +`kParseErrorObjectMissCommaOrCurlyBracket` | Object 成员后缺少逗号或 `}`。 +`kParseErrorArrayMissCommaOrSquareBracket` | Array 元素后缺少逗号或 `]` 。 +`kParseErrorStringUnicodeEscapeInvalidHex` | String 中的 `\\u` 转义符后含非十六进位数字。 +`kParseErrorStringUnicodeSurrogateInvalid` | String 中的代理对(surrogate pair)不合法。 +`kParseErrorStringEscapeInvalid` | String 含非法转义字符。 +`kParseErrorStringMissQuotationMark` | String 缺少关闭引号。 +`kParseErrorStringInvalidEncoding` | String 含非法编码。 +`kParseErrorNumberTooBig` | Number 的值太大,不能存储于 `double`。 +`kParseErrorNumberMissFraction` | Number 缺少了小数部分。 +`kParseErrorNumberMissExponent` | Number 缺少了指数。 + +错误的偏移量定义为从流开始至错误处的字符数量。目前 RapidJSON 不记录错误行号。 + +要取得错误讯息,RapidJSON 在 `rapidjson/error/en.h` 中提供了英文错误讯息。使用者可以修改它用于其他语言环境,或使用一个自定义的本地化系统。 + +以下是一个处理错误的例子。 + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/error/en.h" + +// ... +Document d; +if (d.Parse(json).HasParseError()) { + fprintf(stderr, "\nError(offset %u): %s\n", + (unsigned)d.GetErrorOffset(), + GetParseError_En(d.GetParseErrorCode())); + // ... +} +~~~~~~~~~~ + +## 原位解析 {#InSituParsing} + +根据 [维基百科](http://en.wikipedia.org/wiki/In_situ): + +> *In situ* ... is a Latin phrase that translates literally to "on site" or "in position". It means "locally", "on site", "on the premises" or "in place" to describe an event where it takes place, and is used in many different contexts. +> ... +> (In computer science) An algorithm is said to be an in situ algorithm, or in-place algorithm, if the extra amount of memory required to execute the algorithm is O(1), that is, does not exceed a constant no matter how large the input. For example, heapsort is an in situ sorting algorithm. + +> 翻译:*In situ*……是一个拉丁文片语,字面上的意思是指「现场」、「在位置」。在许多不同语境中,它描述一个事件发生的位置,意指「本地」、「现场」、「在处所」、「就位」。 +> …… +> (在计算机科学中)一个算法若称为原位算法,或在位算法,是指执行该算法所需的额外内存空间是 O(1) 的,换句话说,无论输入大小都只需要常数空间。例如,堆排序是一个原位排序算法。 + +在正常的解析过程中,对 JSON string 解码并复制至其他缓冲区是一个很大的开销。原位解析(*in situ* parsing)把这些 JSON string 直接解码于它原来存储的地方。由于解码后的 string 长度总是短于或等于原来储存于 JSON 的 string,所以这是可行的。在这个语境下,对 JSON string 进行解码是指处理转义符,如 `"\n"`、`"\u1234"` 等,以及在 string 末端加入空终止符号 (`'\0'`)。 + +以下的图比较正常及原位解析。JSON string 值包含指向解码后的字符串。 + +![正常解析](diagram/normalparsing.png) + +在正常解析中,解码后的字符串被复制至全新分配的缓冲区中。`"\\n"`(2 个字符)被解码成 `"\n"`(1 个字符)。`"\\u0073"`(6 个字符)被解码成 `"s"`(1 个字符)。 + +![原位解析](diagram/insituparsing.png) + +原位解析直接修改了原来的 JSON。图中高亮了被更新的字符。若 JSON string 不含转义符,例如 `"msg"`,那么解析过程仅仅是以空字符代替结束双引号。 + +由于原位解析修改了输入,其解析 API 需要 `char*` 而非 `const char*`。 + +~~~~~~~~~~cpp +// 把整个文件读入 buffer +FILE* fp = fopen("test.json", "r"); +fseek(fp, 0, SEEK_END); +size_t filesize = (size_t)ftell(fp); +fseek(fp, 0, SEEK_SET); +char* buffer = (char*)malloc(filesize + 1); +size_t readLength = fread(buffer, 1, filesize, fp); +buffer[readLength] = '\0'; +fclose(fp); + +// 原位解析 buffer 至 d,buffer 内容会被修改。 +Document d; +d.ParseInsitu(buffer); + +// 在此查询、修改 DOM…… + +free(buffer); +// 注意:在这个位置,d 可能含有指向已被释放的 buffer 的悬空指针 +~~~~~~~~~~ + +JSON string 会被打上 const-string 的标志。但它们可能并非真正的「常数」。它的生命周期取决于存储 JSON 的缓冲区。 + +原位解析把分配开销及内存复制减至最小。通常这样做能改善缓存一致性,而这对现代计算机来说是一个重要的性能因素。 + +原位解析有以下限制: + +1. 整个 JSON 须存储在内存之中。 +2. 流的来源缓码与文档的目标编码必须相同。 +3. 需要保留缓冲区,直至文档不再被使用。 +4. 若 DOM 需要在解析后被长期使用,而 DOM 内只有很少 JSON string,保留缓冲区可能造成内存浪费。 + +原位解析最适合用于短期的、用完即弃的 JSON。实际应用中,这些场合是非常普遍的,例如反序列化 JSON 至 C++ 对象、处理以 JSON 表示的 web 请求等。 + +## 转码与校验 {#TranscodingAndValidation} + +RapidJSON 内部支持不同 Unicode 格式(正式的术语是 UCS 变换格式)间的转换。在 DOM 解析时,流的来源编码与 DOM 的编码可以不同。例如,来源流可能含有 UTF-8 的 JSON,而 DOM 则使用 UTF-16 编码。在 [EncodedInputStream](doc/stream.zh-cn.md) 一节里有一个例子。 + +当从 DOM 输出一个 JSON 至输出流之时,也可以使用转码功能。在 [EncodedOutputStream](doc/stream.zh-cn.md) 一节里有一个例子。 + +在转码过程中,会把来源 string 解码成 Unicode 码点,然后把码点编码成目标格式。在解码时,它会校验来源 string 的字节序列是否合法。若遇上非合法序列,解析器会停止并返回 `kParseErrorStringInvalidEncoding` 错误。 + +当来源编码与 DOM 的编码相同,解析器缺省地 * 不会 * 校验序列。使用者可开启 `kParseValidateEncodingFlag` 去强制校验。 + +# 技巧 {#Techniques} + +这里讨论一些 DOM API 的使用技巧。 + +## 把 DOM 作为 SAX 事件发表者 + +在 RapidJSON 中,利用 `Writer` 把 DOM 生成 JSON 的做法,看来有点奇怪。 + +~~~~~~~~~~cpp +// ... +Writer<StringBuffer> writer(buffer); +d.Accept(writer); +~~~~~~~~~~ + +实际上,`Value::Accept()` 是负责发布该值相关的 SAX 事件至处理器的。通过这个设计,`Value` 及 `Writer` 解除了偶合。`Value` 可生成 SAX 事件,而 `Writer` 则可以处理这些事件。 + +使用者可以创建自定义的处理器,去把 DOM 转换成其它格式。例如,一个把 DOM 转换成 XML 的处理器。 + +要知道更多关于 SAX 事件与处理器,可参阅 [SAX](doc/sax.zh-cn.md)。 + +## 使用者缓冲区 {#UserBuffer} + +许多应用软件可能需要尽量减少内存分配。 + +`MemoryPoolAllocator` 可以帮助这方面,它容许使用者提供一个缓冲区。该缓冲区可能置于程序堆栈,或是一个静态分配的「草稿缓冲区(scratch buffer)」(一个静态/全局的数组),用于储存临时数据。 + +`MemoryPoolAllocator` 会先用使用者缓冲区去解决分配请求。当使用者缓冲区用完,就会从基础分配器(缺省为 `CrtAllocator`)分配一块内存。 + +以下是使用堆栈内存的例子,第一个分配器用于存储值,第二个用于解析时的临时缓冲。 + +~~~~~~~~~~cpp +typedef GenericDocument<UTF8<>, MemoryPoolAllocator<>, MemoryPoolAllocator<>> DocumentType; +char valueBuffer[4096]; +char parseBuffer[1024]; +MemoryPoolAllocator<> valueAllocator(valueBuffer, sizeof(valueBuffer)); +MemoryPoolAllocator<> parseAllocator(parseBuffer, sizeof(parseBuffer)); +DocumentType d(&valueAllocator, sizeof(parseBuffer), &parseAllocator); +d.Parse(json); +~~~~~~~~~~ + +若解析时分配总量少于 4096+1024 字节时,这段代码不会造成任何堆内存分配(经 `new` 或 `malloc()`)。 + +使用者可以通过 `MemoryPoolAllocator::Size()` 查询当前已分的内存大小。那么使用者可以拟定使用者缓冲区的合适大小。 diff --git a/src/s3select/rapidjson/doc/encoding.md b/src/s3select/rapidjson/doc/encoding.md new file mode 100644 index 000000000..e663aeac9 --- /dev/null +++ b/src/s3select/rapidjson/doc/encoding.md @@ -0,0 +1,146 @@ +# Encoding + +According to [ECMA-404](http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf), + +> (in Introduction) JSON text is a sequence of Unicode code points. + +The earlier [RFC4627](http://www.ietf.org/rfc/rfc4627.txt) stated that, + +> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is UTF-8. + +> (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible. When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used. + +RapidJSON supports various encodings. It can also validate the encodings of JSON, and transcoding JSON among encodings. All these features are implemented internally, without the need for external libraries (e.g. [ICU](http://site.icu-project.org/)). + +[TOC] + +# Unicode {#Unicode} +From [Unicode's official website](http://www.unicode.org/standard/WhatIsUnicode.html): +> Unicode provides a unique number for every character, +> no matter what the platform, +> no matter what the program, +> no matter what the language. + +Those unique numbers are called code points, which is in the range `0x0` to `0x10FFFF`. + +## Unicode Transformation Format {#UTF} + +There are various encodings for storing Unicode code points. These are called Unicode Transformation Format (UTF). RapidJSON supports the most commonly used UTFs, including + +* UTF-8: 8-bit variable-width encoding. It maps a code point to 1–4 bytes. +* UTF-16: 16-bit variable-width encoding. It maps a code point to 1–2 16-bit code units (i.e., 2–4 bytes). +* UTF-32: 32-bit fixed-width encoding. It directly maps a code point to a single 32-bit code unit (i.e. 4 bytes). + +For UTF-16 and UTF-32, the byte order (endianness) does matter. Within computer memory, they are often stored in the computer's endianness. However, when it is stored in file or transferred over network, we need to state the byte order of the byte sequence, either little-endian (LE) or big-endian (BE). + +RapidJSON provide these encodings via the structs in `rapidjson/encodings.h`: + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename CharType = char> +struct UTF8; + +template<typename CharType = wchar_t> +struct UTF16; + +template<typename CharType = wchar_t> +struct UTF16LE; + +template<typename CharType = wchar_t> +struct UTF16BE; + +template<typename CharType = unsigned> +struct UTF32; + +template<typename CharType = unsigned> +struct UTF32LE; + +template<typename CharType = unsigned> +struct UTF32BE; + +} // namespace rapidjson +~~~~~~~~~~ + +For processing text in memory, we normally use `UTF8`, `UTF16` or `UTF32`. For processing text via I/O, we may use `UTF8`, `UTF16LE`, `UTF16BE`, `UTF32LE` or `UTF32BE`. + +When using the DOM-style API, the `Encoding` template parameter in `GenericValue<Encoding>` and `GenericDocument<Encoding>` indicates the encoding to be used to represent JSON string in memory. So normally we will use `UTF8`, `UTF16` or `UTF32` for this template parameter. The choice depends on operating systems and other libraries that the application is using. For example, Windows API represents Unicode characters in UTF-16, while most Linux distributions and applications prefer UTF-8. + +Example of UTF-16 DOM declaration: + +~~~~~~~~~~cpp +typedef GenericDocument<UTF16<> > WDocument; +typedef GenericValue<UTF16<> > WValue; +~~~~~~~~~~ + +For a detail example, please check the example in [DOM's Encoding](doc/stream.md) section. + +## Character Type {#CharacterType} + +As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 1–4 code units for UTF-8. + +For `UTF16(LE|BE)`, `UTF32(LE|BE)`, the `CharType` must be integer type of at least 2 and 4 bytes respectively. + +Note that C++11 introduces `char16_t` and `char32_t`, which can be used for `UTF16` and `UTF32` respectively. + +## AutoUTF {#AutoUTF} + +Previous encodings are statically bound in compile-time. In other words, user must know exactly which encodings will be used in the memory or streams. However, sometimes we may need to read/write files of different encodings. The encoding needed to be decided in runtime. + +`AutoUTF` is an encoding designed for this purpose. It chooses which encoding to be used according to the input or output stream. Currently, it should be used with `EncodedInputStream` and `EncodedOutputStream`. + +## ASCII {#ASCII} + +Although the JSON standards did not mention about [ASCII](http://en.wikipedia.org/wiki/ASCII), sometimes we would like to write 7-bit ASCII JSON for applications that cannot handle UTF-8. Since any JSON can represent unicode characters in escaped sequence `\uXXXX`, JSON can always be encoded in ASCII. + +Here is an example for writing a UTF-8 DOM into ASCII: + +~~~~~~~~~~cpp +using namespace rapidjson; +Document d; // UTF8<> +// ... +StringBuffer buffer; +Writer<StringBuffer, Document::EncodingType, ASCII<> > writer(buffer); +d.Accept(writer); +std::cout << buffer.GetString(); +~~~~~~~~~~ + +ASCII can be used in input stream. If the input stream contains bytes with values above 127, it will cause `kParseErrorStringInvalidEncoding` error. + +ASCII *cannot* be used in memory (encoding of `Document` or target encoding of `Reader`), as it cannot represent Unicode code points. + +# Validation & Transcoding {#ValidationTranscoding} + +When RapidJSON parses a JSON, it can validate the input JSON, whether it is a valid sequence of a specified encoding. This option can be turned on by adding `kParseValidateEncodingFlag` in `parseFlags` template parameter. + +If the input encoding and output encoding is different, `Reader` and `Writer` will automatically transcode (convert) the text. In this case, `kParseValidateEncodingFlag` is not necessary, as it must decode the input sequence. And if the sequence was unable to be decoded, it must be invalid. + +## Transcoder {#Transcoder} + +Although the encoding functions in RapidJSON are designed for JSON parsing/generation, user may abuse them for transcoding of non-JSON strings. + +Here is an example for transcoding a string from UTF-8 to UTF-16: + +~~~~~~~~~~cpp +#include "rapidjson/encodings.h" + +using namespace rapidjson; + +const char* s = "..."; // UTF-8 string +StringStream source(s); +GenericStringBuffer<UTF16<> > target; + +bool hasError = false; +while (source.Peek() != '\0') + if (!Transcoder<UTF8<>, UTF16<> >::Transcode(source, target)) { + hasError = true; + break; + } + +if (!hasError) { + const wchar_t* t = target.GetString(); + // ... +} +~~~~~~~~~~ + +You may also use `AutoUTF` and the associated streams for setting source/target encoding in runtime. diff --git a/src/s3select/rapidjson/doc/encoding.zh-cn.md b/src/s3select/rapidjson/doc/encoding.zh-cn.md new file mode 100644 index 000000000..808ba525f --- /dev/null +++ b/src/s3select/rapidjson/doc/encoding.zh-cn.md @@ -0,0 +1,152 @@ +# 编码 + +根据 [ECMA-404](http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf): + +> (in Introduction) JSON text is a sequence of Unicode code points. +> +> 翻译:JSON 文本是 Unicode 码点的序列。 + +较早的 [RFC4627](http://www.ietf.org/rfc/rfc4627.txt) 申明: + +> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is UTF-8. +> +> 翻译:JSON 文本应该以 Unicode 编码。缺省的编码为 UTF-8。 + +> (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible. When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used. +> +> 翻译:JSON 可使用 UTF-8、UTF-16 或 UTF-32 表示。当 JSON 以 UTF-8 写入,该 JSON 是 8 位兼容的。当 JSON 以 UTF-16 或 UTF-32 写入,就必须使用二进制的内容传送编码。 + +RapidJSON 支持多种编码。它也能检查 JSON 的编码,以及在不同编码中进行转码。所有这些功能都是在内部实现,无需使用外部的程序库(如 [ICU](http://site.icu-project.org/))。 + +[TOC] + +# Unicode {#Unicode} +根据 [Unicode 的官方网站](http://www.unicode.org/standard/translations/t-chinese.html): +>Unicode 给每个字符提供了一个唯一的数字, +不论是什么平台、 +不论是什么程序、 +不论是什么语言。 + +这些唯一数字称为码点(code point),其范围介乎 `0x0` 至 `0x10FFFF` 之间。 + +## Unicode 转换格式 {#UTF} + +存储 Unicode 码点有多种编码方式。这些称为 Unicode 转换格式(Unicode Transformation Format, UTF)。RapidJSON 支持最常用的 UTF,包括: + +* UTF-8:8 位可变长度编码。它把一个码点映射至 1 至 4 个字节。 +* UTF-16:16 位可变长度编码。它把一个码点映射至 1 至 2 个 16 位编码单元(即 2 至 4 个字节)。 +* UTF-32:32 位固定长度编码。它直接把码点映射至单个 32 位编码单元(即 4 字节)。 + +对于 UTF-16 及 UTF-32 来说,字节序(endianness)是有影响的。在内存中,它们通常都是以该计算机的字节序来存储。然而,当要储存在文件中或在网上传输,我们需要指明字节序列的字节序,是小端(little endian, LE)还是大端(big-endian, BE)。 + +RapidJSON 通过 `rapidjson/encodings.h` 中的 struct 去提供各种编码: + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename CharType = char> +struct UTF8; + +template<typename CharType = wchar_t> +struct UTF16; + +template<typename CharType = wchar_t> +struct UTF16LE; + +template<typename CharType = wchar_t> +struct UTF16BE; + +template<typename CharType = unsigned> +struct UTF32; + +template<typename CharType = unsigned> +struct UTF32LE; + +template<typename CharType = unsigned> +struct UTF32BE; + +} // namespace rapidjson +~~~~~~~~~~ + +对于在内存中的文本,我们正常会使用 `UTF8`、`UTF16` 或 `UTF32`。对于处理经过 I/O 的文本,我们可使用 `UTF8`、`UTF16LE`、`UTF16BE`、`UTF32LE` 或 `UTF32BE`。 + +当使用 DOM 风格的 API,`GenericValue<Encoding>` 及 `GenericDocument<Encoding>` 里的 `Encoding` 模板参数是用于指明内存中存储的 JSON 字符串使用哪种编码。因此通常我们会在此参数中使用 `UTF8`、`UTF16` 或 `UTF32`。如何选择,视乎应用软件所使用的操作系统及其他程序库。例如,Windows API 使用 UTF-16 表示 Unicode 字符,而多数的 Linux 发行版本及应用软件则更喜欢 UTF-8。 + +使用 UTF-16 的 DOM 声明例子: + +~~~~~~~~~~cpp +typedef GenericDocument<UTF16<> > WDocument; +typedef GenericValue<UTF16<> > WValue; +~~~~~~~~~~ + +可以在 [DOM's Encoding](doc/stream.zh-cn.md) 一节看到更详细的使用例子。 + +## 字符类型 {#CharacterType} + +从之前的声明中可以看到,每个编码都有一个 `CharType` 模板参数。这可能比较容易混淆,实际上,每个 `CharType` 存储一个编码单元,而不是一个字符(码点)。如之前所谈及,在 UTF-8 中一个码点可能会编码成 1 至 4 个编码单元。 + +对于 `UTF16(LE|BE)` 及 `UTF32(LE|BE)` 来说,`CharType` 必须分别是一个至少 2 及 4 字节的整数类型。 + +注意 C++11 新添了 `char16_t` 及 `char32_t` 类型,也可分别用于 `UTF16` 及 `UTF32`。 + +## AutoUTF {#AutoUTF} + +上述所介绍的编码都是在编译期静态挷定的。换句话说,使用者必须知道内存或流之中使用了哪种编码。然而,有时候我们可能需要读写不同编码的文件,而且这些编码需要在运行时才能决定。 + +`AutoUTF` 是为此而设计的编码。它根据输入或输出流来选择使用哪种编码。目前它应该与 `EncodedInputStream` 及 `EncodedOutputStream` 结合使用。 + +## ASCII {#ASCII} + +虽然 JSON 标准并未提及 [ASCII](http://en.wikipedia.org/wiki/ASCII),有时候我们希望写入 7 位的 ASCII JSON,以供未能处理 UTF-8 的应用程序使用。由于任 JSON 都可以把 Unicode 字符表示为 `\uXXXX` 转义序列,JSON 总是可用 ASCII 来编码。 + +以下的例子把 UTF-8 的 DOM 写成 ASCII 的 JSON: + +~~~~~~~~~~cpp +using namespace rapidjson; +Document d; // UTF8<> +// ... +StringBuffer buffer; +Writer<StringBuffer, Document::EncodingType, ASCII<> > writer(buffer); +d.Accept(writer); +std::cout << buffer.GetString(); +~~~~~~~~~~ + +ASCII 可用于输入流。当输入流包含大于 127 的字节,就会导致 `kParseErrorStringInvalidEncoding` 错误。 + +ASCII * 不能 * 用于内存(`Document` 的编码,或 `Reader` 的目标编码),因为它不能表示 Unicode 码点。 + +# 校验及转码 {#ValidationTranscoding} + +当 RapidJSON 解析一个 JSON 时,它能校验输入 JSON,判断它是否所标明编码的合法序列。要开启此选项,请把 `kParseValidateEncodingFlag` 加入 `parseFlags` 模板参数。 + +若输入编码和输出编码并不相同,`Reader` 及 `Writer` 会算把文本转码。在这种情况下,并不需要 `kParseValidateEncodingFlag`,因为它必须解码输入序列。若序列不能被解码,它必然是不合法的。 + +## 转码器 {#Transcoder} + +虽然 RapidJSON 的编码功能是为 JSON 解析/生成而设计,使用者也可以“滥用”它们来为非 JSON 字符串转码。 + +以下的例子把 UTF-8 字符串转码成 UTF-16: + +~~~~~~~~~~cpp +#include "rapidjson/encodings.h" + +using namespace rapidjson; + +const char* s = "..."; // UTF-8 string +StringStream source(s); +GenericStringBuffer<UTF16<> > target; + +bool hasError = false; +while (source.Peek() != '\0') + if (!Transcoder<UTF8<>, UTF16<> >::Transcode(source, target)) { + hasError = true; + break; + } + +if (!hasError) { + const wchar_t* t = target.GetString(); + // ... +} +~~~~~~~~~~ + +你也可以用 `AutoUTF` 及对应的流来在运行时设置内源/目的之编码。 diff --git a/src/s3select/rapidjson/doc/faq.md b/src/s3select/rapidjson/doc/faq.md new file mode 100644 index 000000000..55fa2af88 --- /dev/null +++ b/src/s3select/rapidjson/doc/faq.md @@ -0,0 +1,289 @@ +# FAQ + +[TOC] + +## General + +1. What is RapidJSON? + + RapidJSON is a C++ library for parsing and generating JSON. You may check all [features](doc/features.md) of it. + +2. Why is RapidJSON named so? + + It is inspired by [RapidXML](http://rapidxml.sourceforge.net/), which is a fast XML DOM parser. + +3. Is RapidJSON similar to RapidXML? + + RapidJSON borrowed some designs of RapidXML, including *in situ* parsing, header-only library. But the two APIs are completely different. Also RapidJSON provide many features that are not in RapidXML. + +4. Is RapidJSON free? + + Yes, it is free under MIT license. It can be used in commercial applications. Please check the details in [license.txt](https://github.com/Tencent/rapidjson/blob/master/license.txt). + +5. Is RapidJSON small? What are its dependencies? + + Yes. A simple executable which parses a JSON and prints its statistics is less than 30KB on Windows. + + RapidJSON depends on C++ standard library only. + +6. How to install RapidJSON? + + Check [Installation section](https://miloyip.github.io/rapidjson/). + +7. Can RapidJSON run on my platform? + + RapidJSON has been tested in many combinations of operating systems, compilers and CPU architecture by the community. But we cannot ensure that it can be run on your particular platform. Building and running the unit test suite will give you the answer. + +8. Does RapidJSON support C++03? C++11? + + RapidJSON was firstly implemented for C++03. Later it added optional support of some C++11 features (e.g., move constructor, `noexcept`). RapidJSON shall be compatible with C++03 or C++11 compliant compilers. + +9. Does RapidJSON really work in real applications? + + Yes. It is deployed in both client and server real applications. A community member reported that RapidJSON in their system parses 50 million JSONs daily. + +10. How RapidJSON is tested? + + RapidJSON contains a unit test suite for automatic testing. [Travis](https://travis-ci.org/Tencent/rapidjson/)(for Linux) and [AppVeyor](https://ci.appveyor.com/project/Tencent/rapidjson/)(for Windows) will compile and run the unit test suite for all modifications. The test process also uses Valgrind (in Linux) to detect memory leaks. + +11. Is RapidJSON well documented? + + RapidJSON provides user guide and API documentationn. + +12. Are there alternatives? + + Yes, there are a lot alternatives. For example, [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) has a listing of open-source C/C++ JSON libraries. [json.org](http://www.json.org/) also has a list. + +## JSON + +1. What is JSON? + + JSON (JavaScript Object Notation) is a lightweight data-interchange format. It uses human readable text format. More details of JSON can be referred to [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) and [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm). + +2. What are applications of JSON? + + JSON are commonly used in web applications for transferring structured data. It is also used as a file format for data persistence. + +3. Does RapidJSON conform to the JSON standard? + + Yes. RapidJSON is fully compliance with [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) and [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm). It can handle corner cases, such as supporting null character and surrogate pairs in JSON strings. + +4. Does RapidJSON support relaxed syntax? + + Currently no. RapidJSON only support the strict standardized format. Support on related syntax is under discussion in this [issue](https://github.com/Tencent/rapidjson/issues/36). + +## DOM and SAX + +1. What is DOM style API? + + Document Object Model (DOM) is an in-memory representation of JSON for query and manipulation. + +2. What is SAX style API? + + SAX is an event-driven API for parsing and generation. + +3. Should I choose DOM or SAX? + + DOM is easy for query and manipulation. SAX is very fast and memory-saving but often more difficult to be applied. + +4. What is *in situ* parsing? + + *in situ* parsing decodes the JSON strings directly into the input JSON. This is an optimization which can reduce memory consumption and improve performance, but the input JSON will be modified. Check [in-situ parsing](doc/dom.md) for details. + +5. When does parsing generate an error? + + The parser generates an error when the input JSON contains invalid syntax, or a value can not be represented (a number is too big), or the handler of parsers terminate the parsing. Check [parse error](doc/dom.md) for details. + +6. What error information is provided? + + The error is stored in `ParseResult`, which includes the error code and offset (number of characters from the beginning of JSON). The error code can be translated into human-readable error message. + +7. Why not just using `double` to represent JSON number? + + Some applications use 64-bit unsigned/signed integers. And these integers cannot be converted into `double` without loss of precision. So the parsers detects whether a JSON number is convertible to different types of integers and/or `double`. + +8. How to clear-and-minimize a document or value? + + Call one of the `SetXXX()` methods - they call destructor which deallocates DOM data: + + ~~~~~~~~~~cpp + Document d; + ... + d.SetObject(); // clear and minimize + ~~~~~~~~~~ + + Alternatively, use equivalent of the [C++ swap with temporary idiom](https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Clear-and-minimize): + ~~~~~~~~~~cpp + Value(kObjectType).Swap(d); + ~~~~~~~~~~ + or equivalent, but slightly longer to type: + ~~~~~~~~~~cpp + d.Swap(Value(kObjectType).Move()); + ~~~~~~~~~~ + +9. How to insert a document node into another document? + + Let's take the following two DOM trees represented as JSON documents: + ~~~~~~~~~~cpp + Document person; + person.Parse("{\"person\":{\"name\":{\"first\":\"Adam\",\"last\":\"Thomas\"}}}"); + + Document address; + address.Parse("{\"address\":{\"city\":\"Moscow\",\"street\":\"Quiet\"}}"); + ~~~~~~~~~~ + Let's assume we want to merge them in such way that the whole `address` document becomes a node of the `person`: + ~~~~~~~~~~js + { "person": { + "name": { "first": "Adam", "last": "Thomas" }, + "address": { "city": "Moscow", "street": "Quiet" } + } + } + ~~~~~~~~~~ + + The most important requirement to take care of document and value life-cycle as well as consistent memory management using the right allocator during the value transfer. + + Simple yet most efficient way to achieve that is to modify the `address` definition above to initialize it with allocator of the `person` document, then we just add the root member of the value: + ~~~~~~~~~~cpp + Document address(&person.GetAllocator()); + ... + person["person"].AddMember("address", address["address"], person.GetAllocator()); + ~~~~~~~~~~ +Alternatively, if we don't want to explicitly refer to the root value of `address` by name, we can refer to it via iterator: + ~~~~~~~~~~cpp + auto addressRoot = address.MemberBegin(); + person["person"].AddMember(addressRoot->name, addressRoot->value, person.GetAllocator()); + ~~~~~~~~~~ + + Second way is to deep-clone the value from the address document: + ~~~~~~~~~~cpp + Value addressValue = Value(address["address"], person.GetAllocator()); + person["person"].AddMember("address", addressValue, person.GetAllocator()); + ~~~~~~~~~~ + +## Document/Value (DOM) + +1. What is move semantics? Why? + + Instead of copy semantics, move semantics is used in `Value`. That means, when assigning a source value to a target value, the ownership of source value is moved to the target value. + + Since moving is faster than copying, this design decision forces user to aware of the copying overhead. + +2. How to copy a value? + + There are two APIs: constructor with allocator, and `CopyFrom()`. See [Deep Copy Value](doc/tutorial.md) for an example. + +3. Why do I need to provide the length of string? + + Since C string is null-terminated, the length of string needs to be computed via `strlen()`, with linear runtime complexity. This incurs an unnecessary overhead of many operations, if the user already knows the length of string. + + Also, RapidJSON can handle `\u0000` (null character) within a string. If a string contains null characters, `strlen()` cannot return the true length of it. In such case user must provide the length of string explicitly. + +4. Why do I need to provide allocator parameter in many DOM manipulation API? + + Since the APIs are member functions of `Value`, we do not want to save an allocator pointer in every `Value`. + +5. Does it convert between numerical types? + + When using `GetInt()`, `GetUint()`, ... conversion may occur. For integer-to-integer conversion, it only convert when it is safe (otherwise it will assert). However, when converting a 64-bit signed/unsigned integer to double, it will convert but be aware that it may lose precision. A number with fraction, or an integer larger than 64-bit, can only be obtained by `GetDouble()`. + +## Reader/Writer (SAX) + +1. Why don't we just `printf` a JSON? Why do we need a `Writer`? + + Most importantly, `Writer` will ensure the output JSON is well-formed. Calling SAX events incorrectly (e.g. `StartObject()` pairing with `EndArray()`) will assert. Besides, `Writer` will escapes strings (e.g., `\n`). Finally, the numeric output of `printf()` may not be a valid JSON number, especially in some locale with digit delimiters. And the number-to-string conversion in `Writer` is implemented with very fast algorithms, which outperforms than `printf()` or `iostream`. + +2. Can I pause the parsing process and resume it later? + + This is not directly supported in the current version due to performance consideration. However, if the execution environment supports multi-threading, user can parse a JSON in a separate thread, and pause it by blocking in the input stream. + +## Unicode + +1. Does it support UTF-8, UTF-16 and other format? + + Yes. It fully support UTF-8, UTF-16 (LE/BE), UTF-32 (LE/BE) and ASCII. + +2. Can it validate the encoding? + + Yes, just pass `kParseValidateEncodingFlag` to `Parse()`. If there is invalid encoding in the stream, it will generate `kParseErrorStringInvalidEncoding` error. + +3. What is surrogate pair? Does RapidJSON support it? + + JSON uses UTF-16 encoding when escaping unicode character, e.g. `\u5927` representing Chinese character "big". To handle characters other than those in basic multilingual plane (BMP), UTF-16 encodes those characters with two 16-bit values, which is called UTF-16 surrogate pair. For example, the Emoji character U+1F602 can be encoded as `\uD83D\uDE02` in JSON. + + RapidJSON fully support parsing/generating UTF-16 surrogates. + +4. Can it handle `\u0000` (null character) in JSON string? + + Yes. RapidJSON fully support null character in JSON string. However, user need to be aware of it and using `GetStringLength()` and related APIs to obtain the true length of string. + +5. Can I output `\uxxxx` for all non-ASCII character? + + Yes, use `ASCII<>` as output encoding template parameter in `Writer` can enforce escaping those characters. + +## Stream + +1. I have a big JSON file. Should I load the whole file to memory? + + User can use `FileReadStream` to read the file chunk-by-chunk. But for *in situ* parsing, the whole file must be loaded. + +2. Can I parse JSON while it is streamed from network? + + Yes. User can implement a custom stream for this. Please refer to the implementation of `FileReadStream`. + +3. I don't know what encoding will the JSON be. How to handle them? + + You may use `AutoUTFInputStream` which detects the encoding of input stream automatically. However, it will incur some performance overhead. + +4. What is BOM? How RapidJSON handle it? + + [Byte order mark (BOM)](http://en.wikipedia.org/wiki/Byte_order_mark) sometimes reside at the beginning of file/stream to indicate the UTF encoding type of it. + + RapidJSON's `EncodedInputStream` can detect/consume BOM. `EncodedOutputStream` can optionally write a BOM. See [Encoded Streams](doc/stream.md) for example. + +5. Why little/big endian is related? + + little/big endian of stream is an issue for UTF-16 and UTF-32 streams, but not UTF-8 stream. + +## Performance + +1. Is RapidJSON really fast? + + Yes. It may be the fastest open source JSON library. There is a [benchmark](https://github.com/miloyip/nativejson-benchmark) for evaluating performance of C/C++ JSON libraries. + +2. Why is it fast? + + Many design decisions of RapidJSON is aimed at time/space performance. These may reduce user-friendliness of APIs. Besides, it also employs low-level optimizations (intrinsics, SIMD) and special algorithms (custom double-to-string, string-to-double conversions). + +3. What is SIMD? How it is applied in RapidJSON? + + [SIMD](http://en.wikipedia.org/wiki/SIMD) instructions can perform parallel computation in modern CPUs. RapidJSON support Intel's SSE2/SSE4.2 and ARM's Neon to accelerate whitespace/tabspace/carriage-return/line-feed skipping. This improves performance of parsing indent formatted JSON. Define `RAPIDJSON_SSE2`, `RAPIDJSON_SSE42` or `RAPIDJSON_NEON` macro to enable this feature. However, running the executable on a machine without such instruction set support will make it crash. + +4. Does it consume a lot of memory? + + The design of RapidJSON aims at reducing memory footprint. + + In the SAX API, `Reader` consumes memory proportional to maximum depth of JSON tree, plus maximum length of JSON string. + + In the DOM API, each `Value` consumes exactly 16/24 bytes for 32/64-bit architecture respectively. RapidJSON also uses a special memory allocator to minimize overhead of allocations. + +5. What is the purpose of being high performance? + + Some applications need to process very large JSON files. Some server-side applications need to process huge amount of JSONs. Being high performance can improve both latency and throughput. In a broad sense, it will also save energy. + +## Gossip + +1. Who are the developers of RapidJSON? + + Milo Yip ([miloyip](https://github.com/miloyip)) is the original author of RapidJSON. Many contributors from the world have improved RapidJSON. Philipp A. Hartmann ([pah](https://github.com/pah)) has implemented a lot of improvements, setting up automatic testing and also involves in a lot of discussions for the community. Don Ding ([thebusytypist](https://github.com/thebusytypist)) implemented the iterative parser. Andrii Senkovych ([jollyroger](https://github.com/jollyroger)) completed the CMake migration. Kosta ([Kosta-Github](https://github.com/Kosta-Github)) provided a very neat short-string optimization. Thank you for all other contributors and community members as well. + +2. Why do you develop RapidJSON? + + It was just a hobby project initially in 2011. Milo Yip is a game programmer and he just knew about JSON at that time and would like to apply JSON in future projects. As JSON seems very simple he would like to write a header-only and fast library. + +3. Why there is a long empty period of development? + + It is basically due to personal issues, such as getting new family members. Also, Milo Yip has spent a lot of spare time on translating "Game Engine Architecture" by Jason Gregory into Chinese. + +4. Why did the repository move from Google Code to GitHub? + + This is the trend. And GitHub is much more powerful and convenient. diff --git a/src/s3select/rapidjson/doc/faq.zh-cn.md b/src/s3select/rapidjson/doc/faq.zh-cn.md new file mode 100644 index 000000000..cf1124d82 --- /dev/null +++ b/src/s3select/rapidjson/doc/faq.zh-cn.md @@ -0,0 +1,290 @@ +# 常见问题 + +[TOC] + +## 一般问题 + +1. RapidJSON 是什么? + + RapidJSON 是一个 C++ 库,用于解析及生成 JSON。读者可参考它的所有 [特点](doc/features.zh-cn.md)。 + +2. 为什么称作 RapidJSON? + + 它的灵感来自于 [RapidXML](http://rapidxml.sourceforge.net/),RapidXML 是一个高速的 XML DOM 解析器。 + +3. RapidJSON 与 RapidXML 相似么? + + RapidJSON 借镜了 RapidXML 的一些设计, 包括原位(*in situ*)解析、只有头文件的库。但两者的 API 是完全不同的。此外 RapidJSON 也提供许多 RapidXML 没有的特点。 + +4. RapidJSON 是免费的么? + + 是的,它在 MIT 协议下免费。它可用于商业软件。详情请参看 [license.txt](https://github.com/Tencent/rapidjson/blob/master/license.txt)。 + +5. RapidJSON 很小么?它有何依赖? + + 是的。在 Windows 上,一个解析 JSON 并打印出统计的可执行文件少于 30KB。 + + RapidJSON 仅依赖于 C++ 标准库。 + +6. 怎样安装 RapidJSON? + + 见 [安装一节](../readme.zh-cn.md#安装)。 + +7. RapidJSON 能否运行于我的平台? + + 社区已在多个操作系统/编译器/CPU 架构的组合上测试 RapidJSON。但我们无法确保它能运行于你特定的平台上。只需要生成及执行单元测试便能获取答案。 + +8. RapidJSON 支持 C++03 么?C++11 呢? + + RapidJSON 开始时在 C++03 上实现。后来加入了可选的 C++11 特性支持(如转移构造函数、`noexcept`)。RapidJSON 应该兼容所有遵从 C++03 或 C++11 的编译器。 + +9. RapidJSON 是否真的用于实际应用? + + 是的。它被配置于前台及后台的真实应用中。一个社区成员说 RapidJSON 在他们的系统中每日解析 5 千万个 JSON。 + +10. RapidJSON 是如何被测试的? + + RapidJSON 包含一组单元测试去执行自动测试。[Travis](https://travis-ci.org/Tencent/rapidjson/)(供 Linux 平台)及 [AppVeyor](https://ci.appveyor.com/project/Tencent/rapidjson/)(供 Windows 平台)会对所有修改进行编译及执行单元测试。在 Linux 下还会使用 Valgrind 去检测内存泄漏。 + +11. RapidJSON 是否有完整的文档? + + RapidJSON 提供了使用手册及 API 说明文档。 + +12. 有没有其他替代品? + + 有许多替代品。例如 [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) 列出了一些开源的 C/C++ JSON 库。[json.org](http://www.json.org/) 也有一个列表。 + +## JSON + +1. 什么是 JSON? + + JSON (JavaScript Object Notation) 是一个轻量的数据交换格式。它使用人类可读的文本格式。更多关于 JSON 的细节可考 [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) 及 [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm)。 + +2. JSON 有什么应用场合? + + JSON 常用于网页应用程序,以传送结构化数据。它也可作为文件格式用于数据持久化。 + +3. RapidJSON 是否符合 JSON 标准? + + 是。RapidJSON 完全符合 [RFC7159](http://www.ietf.org/rfc/rfc7159.txt) 及 [ECMA-404](http://www.ecma-international.org/publications/standards/Ecma-404.htm)。它能处理一些特殊情况,例如支持 JSON 字符串中含有空字符及代理对(surrogate pair)。 + +4. RapidJSON 是否支持宽松的语法? + + 目前不支持。RapidJSON 只支持严格的标准格式。宽松语法可以在这个 [issue](https://github.com/Tencent/rapidjson/issues/36) 中进行讨论。 + +## DOM 与 SAX + +1. 什么是 DOM 风格 API? + + Document Object Model(DOM)是一个储存于内存的 JSON 表示方式,用于查询及修改 JSON。 + +2. 什么是 SAX 风格 API? + + SAX 是一个事件驱动的 API,用于解析及生成 JSON。 + +3. 我应用 DOM 还是 SAX? + + DOM 易于查询及修改。SAX 则是非常快及省内存的,但通常较难使用。 + +4. 什么是原位(*in situ*)解析? + + 原位解析会把 JSON 字符串直接解码至输入的 JSON 中。这是一个优化,可减少内存消耗及提升性能,但输入的 JSON 会被更改。进一步细节请参考 [原位解析](doc/dom.zh-cn.md) 。 + +5. 什么时候会产生解析错误? + + 当输入的 JSON 包含非法语法,或不能表示一个值(如 Number 太大),或解析器的处理器中断解析过程,解析器都会产生一个错误。详情请参考 [解析错误](doc/dom.zh-cn.md)。 + +6. 有什么错误信息? + + 错误信息存储在 `ParseResult`,它包含错误代号及偏移值(从 JSON 开始至错误处的字符数目)。可以把错误代号翻译为人类可读的错误讯息。 + +7. 为何不只使用 `double` 去表示 JSON number? + + 一些应用需要使用 64 位无号/有号整数。这些整数不能无损地转换成 `double`。因此解析器会检测一个 JSON number 是否能转换至各种整数类型及 `double`。 + +8. 如何清空并最小化 `document` 或 `value` 的容量? + + 调用 `SetXXX()` 方法 - 这些方法会调用析构函数,并重建空的 Object 或 Array: + + ~~~~~~~~~~cpp + Document d; + ... + d.SetObject(); // clear and minimize + ~~~~~~~~~~ + + 另外,也可以参考在 [C++ swap with temporary idiom](https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Clear-and-minimize) 中的一种等价的方法: + ~~~~~~~~~~cpp + Value(kObjectType).Swap(d); + ~~~~~~~~~~ + 或者,使用这个稍微长一点的代码也能完成同样的事情: + ~~~~~~~~~~cpp + d.Swap(Value(kObjectType).Move()); + ~~~~~~~~~~ + +9. 如何将一个 `document` 节点插入到另一个 `document` 中? + + 比如有以下两个 document(DOM): + ~~~~~~~~~~cpp + Document person; + person.Parse("{\"person\":{\"name\":{\"first\":\"Adam\",\"last\":\"Thomas\"}}}"); + + Document address; + address.Parse("{\"address\":{\"city\":\"Moscow\",\"street\":\"Quiet\"}}"); + ~~~~~~~~~~ + 假设我们希望将整个 `address` 插入到 `person` 中,作为其的一个子节点: + ~~~~~~~~~~js + { "person": { + "name": { "first": "Adam", "last": "Thomas" }, + "address": { "city": "Moscow", "street": "Quiet" } + } + } + ~~~~~~~~~~ + + 在插入节点的过程中需要注意 `document` 和 `value` 的生命周期并且正确地使用 allocator 进行内存分配和管理。 + + 一个简单有效的方法就是修改上述 `address` 变量的定义,让其使用 `person` 的 allocator 初始化,然后将其添加到根节点。 + + ~~~~~~~~~~cpp + Documnet address(&person.GetAllocator()); + ... + person["person"].AddMember("address", address["address"], person.GetAllocator()); + ~~~~~~~~~~ + 当然,如果你不想通过显式地写出 `address` 的 key 来得到其值,可以使用迭代器来实现: + ~~~~~~~~~~cpp + auto addressRoot = address.MemberBegin(); + person["person"].AddMember(addressRoot->name, addressRoot->value, person.GetAllocator()); + ~~~~~~~~~~ + + 此外,还可以通过深拷贝 address document 来实现: + ~~~~~~~~~~cpp + Value addressValue = Value(address["address"], person.GetAllocator()); + person["person"].AddMember("address", addressValue, person.GetAllocator()); + ~~~~~~~~~~ + +## Document/Value (DOM) + +1. 什么是转移语义?为什么? + + `Value` 不用复制语义,而使用了转移语义。这是指,当把来源值赋值于目标值时,来源值的所有权会转移至目标值。 + + 由于转移快于复制,此设计决定强迫使用者注意到复制的消耗。 + +2. 怎样去复制一个值? + + 有两个 API 可用:含 allocator 的构造函数,以及 `CopyFrom()`。可参考 [深复制 Value](doc/tutorial.zh-cn.md) 里的用例。 + +3. 为什么我需要提供字符串的长度? + + 由于 C 字符串是空字符结尾的,需要使用 `strlen()` 去计算其长度,这是线性复杂度的操作。若使用者已知字符串的长度,对很多操作来说会造成不必要的消耗。 + + 此外,RapidJSON 可处理含有 `\u0000`(空字符)的字符串。若一个字符串含有空字符,`strlen()` 便不能返回真正的字符串长度。在这种情况下使用者必须明确地提供字符串长度。 + +4. 为什么在许多 DOM 操作 API 中要提供分配器作为参数? + + 由于这些 API 是 `Value` 的成员函数,我们不希望为每个 `Value` 储存一个分配器指针。 + +5. 它会转换各种数值类型么? + + 当使用 `GetInt()`、`GetUint()` 等 API 时,可能会发生转换。对于整数至整数转换,仅当保证转换安全才会转换(否则会断言失败)。然而,当把一个 64 位有号/无号整数转换至 double 时,它会转换,但有可能会损失精度。含有小数的数字、或大于 64 位的整数,都只能使用 `GetDouble()` 获取其值。 + +## Reader/Writer (SAX) + +1. 为什么不仅仅用 `printf` 输出一个 JSON?为什么需要 `Writer`? + + 最重要的是,`Writer` 能确保输出的 JSON 是格式正确的。错误地调用 SAX 事件(如 `StartObject()` 错配 `EndArray()`)会造成断言失败。此外,`Writer` 会把字符串进行转义(如 `\n`)。最后,`printf()` 的数值输出可能并不是一个合法的 JSON number,特别是某些 locale 会有数字分隔符。而且 `Writer` 的数值字符串转换是使用非常快的算法来实现的,胜过 `printf()` 及 `iostream`。 + +2. 我能否暂停解析过程,并在稍后继续? + + 基于性能考虑,目前版本并不直接支持此功能。然而,若执行环境支持多线程,使用者可以在另一线程解析 JSON,并通过阻塞输入流去暂停。 + +## Unicode + +1. 它是否支持 UTF-8、UTF-16 及其他格式? + + 是。它完全支持 UTF-8、UTF-16(大端/小端)、UTF-32(大端/小端)及 ASCII。 + +2. 它能否检测编码的合法性? + + 能。只需把 `kParseValidateEncodingFlag` 参考传给 `Parse()`。若发现在输入流中有非法的编码,它就会产生 `kParseErrorStringInvalidEncoding` 错误。 + +3. 什么是代理对(surrogate pair)?RapidJSON 是否支持? + + JSON 使用 UTF-16 编码去转义 Unicode 字符,例如 `\u5927` 表示中文字“大”。要处理基本多文种平面(basic multilingual plane,BMP)以外的字符时,UTF-16 会把那些字符编码成两个 16 位值,这称为 UTF-16 代理对。例如,绘文字字符 U+1F602 在 JSON 中可被编码成 `\uD83D\uDE02`。 + + RapidJSON 完全支持解析及生成 UTF-16 代理对。 + +4. 它能否处理 JSON 字符串中的 `\u0000`(空字符)? + + 能。RapidJSON 完全支持 JSON 字符串中的空字符。然而,使用者需要注意到这件事,并使用 `GetStringLength()` 及相关 API 去取得字符串真正长度。 + +5. 能否对所有非 ASCII 字符输出成 `\uxxxx` 形式? + + 可以。只要在 `Writer` 中使用 `ASCII<>` 作为输出编码参数,就可以强逼转义那些字符。 + +## 流 + +1. 我有一个很大的 JSON 文件。我应否把它整个载入内存中? + + 使用者可使用 `FileReadStream` 去逐块读入文件。但若使用于原位解析,必须载入整个文件。 + +2. 我能否解析一个从网络上串流进来的 JSON? + + 可以。使用者可根据 `FileReadStream` 的实现,去实现一个自定义的流。 + +3. 我不知道一些 JSON 将会使用哪种编码。怎样处理它们? + + 你可以使用 `AutoUTFInputStream`,它能自动检测输入流的编码。然而,它会带来一些性能开销。 + +4. 什么是 BOM?RapidJSON 怎样处理它? + + [字节顺序标记(byte order mark, BOM)](http://en.wikipedia.org/wiki/Byte_order_mark) 有时会出现于文件/流的开始,以表示其 UTF 编码类型。 + + RapidJSON 的 `EncodedInputStream` 可检测/跳过 BOM。`EncodedOutputStream` 可选择是否写入 BOM。可参考 [编码流](doc/stream.zh-cn.md) 中的例子。 + +5. 为什么会涉及大端/小端? + + 流的大端/小端是 UTF-16 及 UTF-32 流要处理的问题,而 UTF-8 不需要处理。 + +## 性能 + +1. RapidJSON 是否真的快? + + 是。它可能是最快的开源 JSON 库。有一个 [评测](https://github.com/miloyip/nativejson-benchmark) 评估 C/C++ JSON 库的性能。 + +2. 为什么它会快? + + RapidJSON 的许多设计是针对时间/空间性能来设计的,这些决定可能会影响 API 的易用性。此外,它也使用了许多底层优化(内部函数/intrinsic、SIMD)及特别的算法(自定义的 double 至字符串转换、字符串至 double 的转换)。 + +3. 什是是 SIMD?它如何用于 RapidJSON? + + [SIMD](http://en.wikipedia.org/wiki/SIMD) 指令可以在现代 CPU 中执行并行运算。RapidJSON 支持使用 Intel 的 SSE2/SSE4.2 和 ARM 的 Neon 来加速对空白符、制表符、回车符和换行符的过滤处理。在解析含缩进的 JSON 时,这能提升性能。只要定义名为 `RAPIDJSON_SSE2` ,`RAPIDJSON_SSE42` 或 `RAPIDJSON_NEON` 的宏,就能启动这个功能。然而,若在不支持这些指令集的机器上执行这些可执行文件,会导致崩溃。 + +4. 它会消耗许多内存么? + + RapidJSON 的设计目标是减低内存占用。 + + 在 SAX API 中,`Reader` 消耗的内存与 JSON 树深度加上最长 JSON 字符成正比。 + + 在 DOM API 中,每个 `Value` 在 32/64 位架构下分别消耗 16/24 字节。RapidJSON 也使用一个特殊的内存分配器去减少分配的额外开销。 + +5. 高性能的意义何在? + + 有些应用程序需要处理非常大的 JSON 文件。而有些后台应用程序需要处理大量的 JSON。达到高性能同时改善延时及吞吐量。更广义来说,这也可以节省能源。 + +## 八卦 + +1. 谁是 RapidJSON 的开发者? + + 叶劲峰(Milo Yip,[miloyip](https://github.com/miloyip))是 RapidJSON 的原作者。全世界许多贡献者一直在改善 RapidJSON。Philipp A. Hartmann([pah](https://github.com/pah))实现了许多改进,也设置了自动化测试,而且还参与许多社区讨论。丁欧南(Don Ding,[thebusytypist](https://github.com/thebusytypist))实现了迭代式解析器。Andrii Senkovych([jollyroger](https://github.com/jollyroger))完成了向 CMake 的迁移。Kosta([Kosta-Github](https://github.com/Kosta-Github))提供了一个非常灵巧的短字符串优化。也需要感谢其他献者及社区成员。 + +2. 为何你要开发 RapidJSON? + + 在 2011 年开始这项目时,它只是一个兴趣项目。Milo Yip 是一个游戏程序员,他在那时候认识到 JSON 并希望在未来的项目中使用。由于 JSON 好像很简单,他希望写一个快速的仅有头文件的程序库。 + +3. 为什么开发中段有一段长期空档? + + 主要是个人因素,例如加入新家庭成员。另外,Milo Yip 也花了许多业余时间去翻译 Jason Gregory 的《Game Engine Architecture》至中文版《游戏引擎架构》。 + +4. 为什么这个项目从 Google Code 搬到 GitHub? + + 这是大势所趋,而且 GitHub 更为强大及方便。 diff --git a/src/s3select/rapidjson/doc/features.md b/src/s3select/rapidjson/doc/features.md new file mode 100644 index 000000000..0d79e7f89 --- /dev/null +++ b/src/s3select/rapidjson/doc/features.md @@ -0,0 +1,104 @@ +# Features + +## General + +* Cross-platform + * Compilers: Visual Studio, gcc, clang, etc. + * Architectures: x86, x64, ARM, etc. + * Operating systems: Windows, Mac OS X, Linux, iOS, Android, etc. +* Easy installation + * Header files only library. Just copy the headers to your project. +* Self-contained, minimal dependences + * No STL, BOOST, etc. + * Only included `<cstdio>`, `<cstdlib>`, `<cstring>`, `<inttypes.h>`, `<new>`, `<stdint.h>`. +* Without C++ exception, RTTI +* High performance + * Use template and inline functions to reduce function call overheads. + * Internal optimized Grisu2 and floating point parsing implementations. + * Optional SSE2/SSE4.2 support. + +## Standard compliance + +* RapidJSON should be fully RFC4627/ECMA-404 compliance. +* Support JSON Pointer (RFC6901). +* Support JSON Schema Draft v4. +* Support Unicode surrogate. +* Support null character (`"\u0000"`) + * For example, `["Hello\u0000World"]` can be parsed and handled gracefully. There is API for getting/setting lengths of string. +* Support optional relaxed syntax. + * Single line (`// ...`) and multiple line (`/* ... */`) comments (`kParseCommentsFlag`). + * Trailing commas at the end of objects and arrays (`kParseTrailingCommasFlag`). + * `NaN`, `Inf`, `Infinity`, `-Inf` and `-Infinity` as `double` values (`kParseNanAndInfFlag`) +* [NPM compliant](http://github.com/Tencent/rapidjson/blob/master/doc/npm.md). + +## Unicode + +* Support UTF-8, UTF-16, UTF-32 encodings, including little endian and big endian. + * These encodings are used in input/output streams and in-memory representation. +* Support automatic detection of encodings in input stream. +* Support transcoding between encodings internally. + * For example, you can read a UTF-8 file and let RapidJSON transcode the JSON strings into UTF-16 in the DOM. +* Support encoding validation internally. + * For example, you can read a UTF-8 file, and let RapidJSON check whether all JSON strings are valid UTF-8 byte sequence. +* Support custom character types. + * By default the character types are `char` for UTF8, `wchar_t` for UTF16, `uint32_t` for UTF32. +* Support custom encodings. + +## API styles + +* SAX (Simple API for XML) style API + * Similar to [SAX](http://en.wikipedia.org/wiki/Simple_API_for_XML), RapidJSON provides a event sequential access parser API (`rapidjson::GenericReader`). It also provides a generator API (`rapidjson::Writer`) which consumes the same set of events. +* DOM (Document Object Model) style API + * Similar to [DOM](http://en.wikipedia.org/wiki/Document_Object_Model) for HTML/XML, RapidJSON can parse JSON into a DOM representation (`rapidjson::GenericDocument`), for easy manipulation, and finally stringify back to JSON if needed. + * The DOM style API (`rapidjson::GenericDocument`) is actually implemented with SAX style API (`rapidjson::GenericReader`). SAX is faster but sometimes DOM is easier. Users can pick their choices according to scenarios. + +## Parsing + +* Recursive (default) and iterative parser + * Recursive parser is faster but prone to stack overflow in extreme cases. + * Iterative parser use custom stack to keep parsing state. +* Support *in situ* parsing. + * Parse JSON string values in-place at the source JSON, and then the DOM points to addresses of those strings. + * Faster than convention parsing: no allocation for strings, no copy (if string does not contain escapes), cache-friendly. +* Support 32-bit/64-bit signed/unsigned integer and `double` for JSON number type. +* Support parsing multiple JSONs in input stream (`kParseStopWhenDoneFlag`). +* Error Handling + * Support comprehensive error code if parsing failed. + * Support error message localization. + +## DOM (Document) + +* RapidJSON checks range of numerical values for conversions. +* Optimization for string literal + * Only store pointer instead of copying +* Optimization for "short" strings + * Store short string in `Value` internally without additional allocation. + * For UTF-8 string: maximum 11 characters in 32-bit, 21 characters in 64-bit (13 characters in x86-64). +* Optionally support `std::string` (define `RAPIDJSON_HAS_STDSTRING=1`) + +## Generation + +* Support `rapidjson::PrettyWriter` for adding newlines and indentations. + +## Stream + +* Support `rapidjson::GenericStringBuffer` for storing the output JSON as string. +* Support `rapidjson::FileReadStream` and `rapidjson::FileWriteStream` for input/output `FILE` object. +* Support custom streams. + +## Memory + +* Minimize memory overheads for DOM. + * Each JSON value occupies exactly 16/20 bytes for most 32/64-bit machines (excluding text string). +* Support fast default allocator. + * A stack-based allocator (allocate sequentially, prohibit to free individual allocations, suitable for parsing). + * User can provide a pre-allocated buffer. (Possible to parse a number of JSONs without any CRT allocation) +* Support standard CRT(C-runtime) allocator. +* Support custom allocators. + +## Miscellaneous + +* Some C++11 support (optional) + * Rvalue reference + * `noexcept` specifier + * Range-based for loop diff --git a/src/s3select/rapidjson/doc/features.zh-cn.md b/src/s3select/rapidjson/doc/features.zh-cn.md new file mode 100644 index 000000000..7662cc13e --- /dev/null +++ b/src/s3select/rapidjson/doc/features.zh-cn.md @@ -0,0 +1,103 @@ +# 特点 + +## 总体 + +* 跨平台 + * 编译器:Visual Studio、gcc、clang 等 + * 架构:x86、x64、ARM 等 + * 操作系统:Windows、Mac OS X、Linux、iOS、Android 等 +* 容易安装 + * 只有头文件的库。只需把头文件复制至你的项目中。 +* 独立、最小依赖 + * 不需依赖 STL、BOOST 等。 + * 只包含 `<cstdio>`, `<cstdlib>`, `<cstring>`, `<inttypes.h>`, `<new>`, `<stdint.h>`。 +* 没使用 C++ 异常、RTTI +* 高性能 + * 使用模版及内联函数去降低函数调用开销。 + * 内部经优化的 Grisu2 及浮点数解析实现。 + * 可选的 SSE2/SSE4.2 支持。 + +## 符合标准 + +* RapidJSON 应完全符合 RFC4627/ECMA-404 标准。 +* 支持 JSON Pointer (RFC6901). +* 支持 JSON Schema Draft v4. +* 支持 Unicode 代理对(surrogate pair)。 +* 支持空字符(`"\u0000"`)。 + * 例如,可以优雅地解析及处理 `["Hello\u0000World"]`。含读写字符串长度的 API。 +* 支持可选的放宽语法 + * 单行(`// ...`)及多行(`/* ... */`) 注释 (`kParseCommentsFlag`)。 + * 在对象和数组结束前含逗号 (`kParseTrailingCommasFlag`)。 + * `NaN`、`Inf`、`Infinity`、`-Inf` 及 `-Infinity` 作为 `double` 值 (`kParseNanAndInfFlag`) +* [NPM 兼容](https://github.com/Tencent/rapidjson/blob/master/doc/npm.md). + +## Unicode + +* 支持 UTF-8、UTF-16、UTF-32 编码,包括小端序和大端序。 + * 这些编码用于输入输出流,以及内存中的表示。 +* 支持从输入流自动检测编码。 +* 内部支持编码的转换。 + * 例如,你可以读取一个 UTF-8 文件,让 RapidJSON 把 JSON 字符串转换至 UTF-16 的 DOM。 +* 内部支持编码校验。 + * 例如,你可以读取一个 UTF-8 文件,让 RapidJSON 检查是否所有 JSON 字符串是合法的 UTF-8 字节序列。 +* 支持自定义的字符类型。 + * 预设的字符类型是:UTF-8 为 `char`,UTF-16 为 `wchar_t`,UTF32 为 `uint32_t`。 +* 支持自定义的编码。 + +## API 风格 + +* SAX(Simple API for XML)风格 API + * 类似于 [SAX](http://en.wikipedia.org/wiki/Simple_API_for_XML), RapidJSON 提供一个事件循序访问的解析器 API(`rapidjson::GenericReader`)。RapidJSON 也提供一个生成器 API(`rapidjson::Writer`),可以处理相同的事件集合。 +* DOM(Document Object Model)风格 API + * 类似于 HTML/XML 的 [DOM](http://en.wikipedia.org/wiki/Document_Object_Model),RapidJSON 可把 JSON 解析至一个 DOM 表示方式(`rapidjson::GenericDocument`),以方便操作。如有需要,可把 DOM 转换(stringify)回 JSON。 + * DOM 风格 API(`rapidjson::GenericDocument`)实际上是由 SAX 风格 API(`rapidjson::GenericReader`)实现的。SAX 更快,但有时 DOM 更易用。用户可根据情况作出选择。 + +## 解析 + +* 递归式(预设)及迭代式解析器 + * 递归式解析器较快,但在极端情况下可出现堆栈溢出。 + * 迭代式解析器使用自定义的堆栈去维持解析状态。 +* 支持原位(*in situ*)解析。 + * 把 JSON 字符串的值解析至原 JSON 之中,然后让 DOM 指向那些字符串。 + * 比常规分析更快:不需字符串的内存分配、不需复制(如字符串不含转义符)、缓存友好。 +* 对于 JSON 数字类型,支持 32-bit/64-bit 的有号/无号整数,以及 `double`。 +* 错误处理 + * 支持详尽的解析错误代号。 + * 支持本地化错误信息。 + +## DOM (Document) + +* RapidJSON 在类型转换时会检查数值的范围。 +* 字符串字面量的优化 + * 只储存指针,不作复制 +* 优化“短”字符串 + * 在 `Value` 内储存短字符串,无需额外分配。 + * 对 UTF-8 字符串来说,32 位架构下可存储最多 11 字符,64 位下 21 字符(x86-64 下 13 字符)。 +* 可选地支持 `std::string`(定义 `RAPIDJSON_HAS_STDSTRING=1`) + +## 生成 + +* 支持 `rapidjson::PrettyWriter` 去加入换行及缩进。 + +## 输入输出流 + +* 支持 `rapidjson::GenericStringBuffer`,把输出的 JSON 储存于字符串内。 +* 支持 `rapidjson::FileReadStream` 及 `rapidjson::FileWriteStream`,使用 `FILE` 对象作输入输出。 +* 支持自定义输入输出流。 + +## 内存 + +* 最小化 DOM 的内存开销。 + * 对大部分 32/64 位机器而言,每个 JSON 值只占 16 或 20 字节(不包含字符串)。 +* 支持快速的预设分配器。 + * 它是一个堆栈形式的分配器(顺序分配,不容许单独释放,适合解析过程之用)。 + * 使用者也可提供一个预分配的缓冲区。(有可能达至无需 CRT 分配就能解析多个 JSON) +* 支持标准 CRT(C-runtime)分配器。 +* 支持自定义分配器。 + +## 其他 + +* 一些 C++11 的支持(可选) + * 右值引用(rvalue reference) + * `noexcept` 修饰符 + * 范围 for 循环 diff --git a/src/s3select/rapidjson/doc/internals.md b/src/s3select/rapidjson/doc/internals.md new file mode 100644 index 000000000..81fe9c16e --- /dev/null +++ b/src/s3select/rapidjson/doc/internals.md @@ -0,0 +1,368 @@ +# Internals + +This section records some design and implementation details. + +[TOC] + +# Architecture {#Architecture} + +## SAX and DOM + +The basic relationships of SAX and DOM is shown in the following UML diagram. + +![Architecture UML class diagram](diagram/architecture.png) + +The core of the relationship is the `Handler` concept. From the SAX side, `Reader` parses a JSON from a stream and publish events to a `Handler`. `Writer` implements the `Handler` concept to handle the same set of events. From the DOM side, `Document` implements the `Handler` concept to build a DOM according to the events. `Value` supports a `Value::Accept(Handler&)` function, which traverses the DOM to publish events. + +With this design, SAX is not dependent on DOM. Even `Reader` and `Writer` have no dependencies between them. This provides flexibility to chain event publisher and handlers. Besides, `Value` does not depends on SAX as well. So, in addition to stringify a DOM to JSON, user may also stringify it to a XML writer, or do anything else. + +## Utility Classes + +Both SAX and DOM APIs depends on 3 additional concepts: `Allocator`, `Encoding` and `Stream`. Their inheritance hierarchy is shown as below. + +![Utility classes UML class diagram](diagram/utilityclass.png) + +# Value {#Value} + +`Value` (actually a typedef of `GenericValue<UTF8<>>`) is the core of DOM API. This section describes the design of it. + +## Data Layout {#DataLayout} + +`Value` is a [variant type](http://en.wikipedia.org/wiki/Variant_type). In RapidJSON's context, an instance of `Value` can contain 1 of 6 JSON value types. This is possible by using `union`. Each `Value` contains two members: `union Data data_` and a`unsigned flags_`. The `flags_` indicates the JSON type, and also additional information. + +The following tables show the data layout of each type. The 32-bit/64-bit columns indicates the size of the field in bytes. + +| Null | |32-bit|64-bit| +|-------------------|----------------------------------|:----:|:----:| +| (unused) | |4 |8 | +| (unused) | |4 |4 | +| (unused) | |4 |4 | +| `unsigned flags_` | `kNullType kNullFlag` |4 |4 | + +| Bool | |32-bit|64-bit| +|-------------------|----------------------------------------------------|:----:|:----:| +| (unused) | |4 |8 | +| (unused) | |4 |4 | +| (unused) | |4 |4 | +| `unsigned flags_` | `kBoolType` (either `kTrueFlag` or `kFalseFlag`) |4 |4 | + +| String | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `Ch* str` | Pointer to the string (may own) |4 |8 | +| `SizeType length` | Length of string |4 |4 | +| (unused) | |4 |4 | +| `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 | + +| Object | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `Member* members` | Pointer to array of members (owned) |4 |8 | +| `SizeType size` | Number of members |4 |4 | +| `SizeType capacity` | Capacity of members |4 |4 | +| `unsigned flags_` | `kObjectType kObjectFlag` |4 |4 | + +| Array | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `Value* values` | Pointer to array of values (owned) |4 |8 | +| `SizeType size` | Number of values |4 |4 | +| `SizeType capacity` | Capacity of values |4 |4 | +| `unsigned flags_` | `kArrayType kArrayFlag` |4 |4 | + +| Number (Int) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `int i` | 32-bit signed integer |4 |4 | +| (zero padding) | 0 |4 |4 | +| (unused) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kIntFlag kInt64Flag ...` |4 |4 | + +| Number (UInt) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `unsigned u` | 32-bit unsigned integer |4 |4 | +| (zero padding) | 0 |4 |4 | +| (unused) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kUintFlag kUint64Flag ...` |4 |4 | + +| Number (Int64) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `int64_t i64` | 64-bit signed integer |8 |8 | +| (unused) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 | + +| Number (Uint64) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `uint64_t i64` | 64-bit unsigned integer |8 |8 | +| (unused) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 | + +| Number (Double) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `uint64_t i64` | Double precision floating-point |8 |8 | +| (unused) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kDoubleFlag` |4 |4 | + +Here are some notes: +* To reduce memory consumption for 64-bit architecture, `SizeType` is typedef as `unsigned` instead of `size_t`. +* Zero padding for 32-bit number may be placed after or before the actual type, according to the endianness. This makes possible for interpreting a 32-bit integer as a 64-bit integer, without any conversion. +* An `Int` is always an `Int64`, but the converse is not always true. + +## Flags {#Flags} + +The 32-bit `flags_` contains both JSON type and other additional information. As shown in the above tables, each JSON type contains redundant `kXXXType` and `kXXXFlag`. This design is for optimizing the operation of testing bit-flags (`IsNumber()`) and obtaining a sequential number for each type (`GetType()`). + +String has two optional flags. `kCopyFlag` means that the string owns a copy of the string. `kInlineStrFlag` means using [Short-String Optimization](#ShortString). + +Number is a bit more complicated. For normal integer values, it can contains `kIntFlag`, `kUintFlag`, `kInt64Flag` and/or `kUint64Flag`, according to the range of the integer. For numbers with fraction, and integers larger than 64-bit range, they will be stored as `double` with `kDoubleFlag`. + +## Short-String Optimization {#ShortString} + + [Kosta](https://github.com/Kosta-Github) provided a very neat short-string optimization. The optimization idea is given as follow. Excluding the `flags_`, a `Value` has 12 or 16 bytes (32-bit or 64-bit) for storing actual data. Instead of storing a pointer to a string, it is possible to store short strings in these space internally. For encoding with 1-byte character type (e.g. `char`), it can store maximum 11 or 15 characters string inside the `Value` type. + +| ShortString (Ch=char) | |32-bit|64-bit| +|---------------------|-------------------------------------|:----:|:----:| +| `Ch str[MaxChars]` | String buffer |11 |15 | +| `Ch invLength` | MaxChars - Length |1 |1 | +| `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 | + +A special technique is applied. Instead of storing the length of string directly, it stores (MaxChars - length). This make it possible to store 11 characters with trailing `\0`. + +This optimization can reduce memory usage for copy-string. It can also improve cache-coherence thus improve runtime performance. + +# Allocator {#InternalAllocator} + +`Allocator` is a concept in RapidJSON: +~~~cpp +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +~~~ + +Note that `Malloc()` and `Realloc()` are member functions but `Free()` is static member function. + +## MemoryPoolAllocator {#MemoryPoolAllocator} + +`MemoryPoolAllocator` is the default allocator for DOM. It allocate but do not free memory. This is suitable for building a DOM tree. + +Internally, it allocates chunks of memory from the base allocator (by default `CrtAllocator`) and stores the chunks as a singly linked list. When user requests an allocation, it allocates memory from the following order: + +1. User supplied buffer if it is available. (See [User Buffer section in DOM](doc/dom.md)) +2. If user supplied buffer is full, use the current memory chunk. +3. If the current block is full, allocate a new block of memory. + +# Parsing Optimization {#ParsingOptimization} + +## Skip Whitespaces with SIMD {#SkipwhitespaceWithSIMD} + +When parsing JSON from a stream, the parser need to skip 4 whitespace characters: + +1. Space (`U+0020`) +2. Character Tabulation (`U+000B`) +3. Line Feed (`U+000A`) +4. Carriage Return (`U+000D`) + +A simple implementation will be simply: +~~~cpp +void SkipWhitespace(InputStream& s) { + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); +} +~~~ + +However, this requires 4 comparisons and a few branching for each character. This was found to be a hot spot. + +To accelerate this process, SIMD was applied to compare 16 characters with 4 white spaces for each iteration. Currently RapidJSON supports SSE2, SSE4.2 and ARM Neon instructions for this. And it is only activated for UTF-8 memory streams, including string stream or *in situ* parsing. + +To enable this optimization, need to define `RAPIDJSON_SSE2`, `RAPIDJSON_SSE42` or `RAPIDJSON_NEON` before including `rapidjson.h`. Some compilers can detect the setting, as in `perftest.h`: + +~~~cpp +// __SSE2__ and __SSE4_2__ are recognized by gcc, clang, and the Intel compiler. +// We use -march=native with gmake to enable -msse2 and -msse4.2, if supported. +// Likewise, __ARM_NEON is used to detect Neon. +#if defined(__SSE4_2__) +# define RAPIDJSON_SSE42 +#elif defined(__SSE2__) +# define RAPIDJSON_SSE2 +#elif defined(__ARM_NEON) +# define RAPIDJSON_NEON +#endif +~~~ + +Note that, these are compile-time settings. Running the executable on a machine without such instruction set support will make it crash. + +### Page boundary issue + +In an early version of RapidJSON, [an issue](https://code.google.com/archive/p/rapidjson/issues/104) reported that the `SkipWhitespace_SIMD()` causes crash very rarely (around 1 in 500,000). After investigation, it is suspected that `_mm_loadu_si128()` accessed bytes after `'\0'`, and across a protected page boundary. + +In [Intel® 64 and IA-32 Architectures Optimization Reference Manual +](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html), section 10.2.1: + +> To support algorithms requiring unaligned 128-bit SIMD memory accesses, memory buffer allocation by a caller function should consider adding some pad space so that a callee function can safely use the address pointer safely with unaligned 128-bit SIMD memory operations. +> The minimal padding size should be the width of the SIMD register that might be used in conjunction with unaligned SIMD memory access. + +This is not feasible as RapidJSON should not enforce such requirement. + +To fix this issue, currently the routine process bytes up to the next aligned address. After tha, use aligned read to perform SIMD processing. Also see [#85](https://github.com/Tencent/rapidjson/issues/85). + +## Local Stream Copy {#LocalStreamCopy} + +During optimization, it is found that some compilers cannot localize some member data access of streams into local variables or registers. Experimental results show that for some stream types, making a copy of the stream and used it in inner-loop can improve performance. For example, the actual (non-SIMD) implementation of `SkipWhitespace()` is implemented as: + +~~~cpp +template<typename InputStream> +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy<InputStream> copy(is); + InputStream& s(copy.s); + + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); +} +~~~ + +Depending on the traits of stream, `StreamLocalCopy` will make (or not make) a copy of the stream object, use it locally and copy the states of stream back to the original stream. + +## Parsing to Double {#ParsingDouble} + +Parsing string into `double` is difficult. The standard library function `strtod()` can do the job but it is slow. By default, the parsers use normal precision setting. This has has maximum 3 [ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) error and implemented in `internal::StrtodNormalPrecision()`. + +When using `kParseFullPrecisionFlag`, the parsers calls `internal::StrtodFullPrecision()` instead, and this function actually implemented 3 versions of conversion methods. +1. [Fast-Path](http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/). +2. Custom DIY-FP implementation as in [double-conversion](https://github.com/floitsch/double-conversion). +3. Big Integer Method as in (Clinger, William D. How to read floating point numbers accurately. Vol. 25. No. 6. ACM, 1990). + +If the first conversion methods fail, it will try the second, and so on. + +# Generation Optimization {#GenerationOptimization} + +## Integer-to-String conversion {#itoa} + +The naive algorithm for integer-to-string conversion involves division per each decimal digit. We have implemented various implementations and evaluated them in [itoa-benchmark](https://github.com/miloyip/itoa-benchmark). + +Although SSE2 version is the fastest but the difference is minor by comparing to the first running-up `branchlut`. And `branchlut` is pure C++ implementation so we adopt `branchlut` in RapidJSON. + +## Double-to-String conversion {#dtoa} + +Originally RapidJSON uses `snprintf(..., ..., "%g")` to achieve double-to-string conversion. This is not accurate as the default precision is 6. Later we also find that this is slow and there is an alternative. + +Google's V8 [double-conversion](https://github.com/floitsch/double-conversion +) implemented a newer, fast algorithm called Grisu3 (Loitsch, Florian. "Printing floating-point numbers quickly and accurately with integers." ACM Sigplan Notices 45.6 (2010): 233-243.). + +However, since it is not header-only so that we implemented a header-only version of Grisu2. This algorithm guarantees that the result is always accurate. And in most of cases it produces the shortest (optimal) string representation. + +The header-only conversion function has been evaluated in [dtoa-benchmark](https://github.com/miloyip/dtoa-benchmark). + +# Parser {#Parser} + +## Iterative Parser {#IterativeParser} + +The iterative parser is a recursive descent LL(1) parser +implemented in a non-recursive manner. + +### Grammar {#IterativeParserGrammar} + +The grammar used for this parser is based on strict JSON syntax: +~~~~~~~~~~ +S -> array | object +array -> [ values ] +object -> { members } +values -> non-empty-values | ε +non-empty-values -> value addition-values +addition-values -> ε | , non-empty-values +members -> non-empty-members | ε +non-empty-members -> member addition-members +addition-members -> ε | , non-empty-members +member -> STRING : value +value -> STRING | NUMBER | NULL | BOOLEAN | object | array +~~~~~~~~~~ + +Note that left factoring is applied to non-terminals `values` and `members` +to make the grammar be LL(1). + +### Parsing Table {#IterativeParserParsingTable} + +Based on the grammar, we can construct the FIRST and FOLLOW set. + +The FIRST set of non-terminals is listed below: + +| NON-TERMINAL | FIRST | +|:-----------------:|:--------------------------------:| +| array | [ | +| object | { | +| values | ε STRING NUMBER NULL BOOLEAN { [ | +| addition-values | ε COMMA | +| members | ε STRING | +| addition-members | ε COMMA | +| member | STRING | +| value | STRING NUMBER NULL BOOLEAN { [ | +| S | [ { | +| non-empty-members | STRING | +| non-empty-values | STRING NUMBER NULL BOOLEAN { [ | + +The FOLLOW set is listed below: + +| NON-TERMINAL | FOLLOW | +|:-----------------:|:-------:| +| S | $ | +| array | , $ } ] | +| object | , $ } ] | +| values | ] | +| non-empty-values | ] | +| addition-values | ] | +| members | } | +| non-empty-members | } | +| addition-members | } | +| member | , } | +| value | , } ] | + +Finally the parsing table can be constructed from FIRST and FOLLOW set: + +| NON-TERMINAL | [ | { | , | : | ] | } | STRING | NUMBER | NULL | BOOLEAN | +|:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:| +| S | array | object | | | | | | | | | +| array | [ values ] | | | | | | | | | | +| object | | { members } | | | | | | | | | +| values | non-empty-values | non-empty-values | | | ε | | non-empty-values | non-empty-values | non-empty-values | non-empty-values | +| non-empty-values | value addition-values | value addition-values | | | | | value addition-values | value addition-values | value addition-values | value addition-values | +| addition-values | | | , non-empty-values | | ε | | | | | | +| members | | | | | | ε | non-empty-members | | | | +| non-empty-members | | | | | | | member addition-members | | | | +| addition-members | | | , non-empty-members | | | ε | | | | | +| member | | | | | | | STRING : value | | | | +| value | array | object | | | | | STRING | NUMBER | NULL | BOOLEAN | + +There is a great [tool](http://hackingoff.com/compilers/predict-first-follow-set) for above grammar analysis. + +### Implementation {#IterativeParserImplementation} + +Based on the parsing table, a direct(or conventional) implementation +that pushes the production body in reverse order +while generating a production could work. + +In RapidJSON, several modifications(or adaptations to current design) are made to a direct implementation. + +First, the parsing table is encoded in a state machine in RapidJSON. +States are constructed by the head and body of production. +State transitions are constructed by production rules. +Besides, extra states are added for productions involved with `array` and `object`. +In this way the generation of array values or object members would be a single state transition, +rather than several pop/push operations in the direct implementation. +This also makes the estimation of stack size more easier. + +The state diagram is shown as follows: + +![State Diagram](diagram/iterative-parser-states-diagram.png) + +Second, the iterative parser also keeps track of array's value count and object's member count +in its internal stack, which may be different from a conventional implementation. diff --git a/src/s3select/rapidjson/doc/internals.zh-cn.md b/src/s3select/rapidjson/doc/internals.zh-cn.md new file mode 100644 index 000000000..d414fc140 --- /dev/null +++ b/src/s3select/rapidjson/doc/internals.zh-cn.md @@ -0,0 +1,363 @@ +# 内部架构 + +本部分记录了一些设计和实现细节。 + +[TOC] + +# 架构 {#Architecture} + +## SAX 和 DOM + +下面的 UML 图显示了 SAX 和 DOM 的基本关系。 + +![架构 UML 类图](diagram/architecture.png) + +关系的核心是 `Handler` 概念。在 SAX 一边,`Reader` 从流解析 JSON 并将事件发送到 `Handler`。`Writer` 实现了 `Handler` 概念,用于处理相同的事件。在 DOM 一边,`Document` 实现了 `Handler` 概念,用于通过这些时间来构建 DOM。`Value` 支持了 `Value::Accept(Handler&)` 函数,它可以将 DOM 转换为事件进行发送。 + +在这个设计,SAX 是不依赖于 DOM 的。甚至 `Reader` 和 `Writer` 之间也没有依赖。这提供了连接事件发送器和处理器的灵活性。除此之外,`Value` 也是不依赖于 SAX 的。所以,除了将 DOM 序列化为 JSON 之外,用户也可以将其序列化为 XML,或者做任何其他事情。 + +## 工具类 + +SAX 和 DOM API 都依赖于3个额外的概念:`Allocator`、`Encoding` 和 `Stream`。它们的继承层次结构如下图所示。 + +![工具类 UML 类图](diagram/utilityclass.png) + +# 值(Value) {#Value} + +`Value` (实际上被定义为 `GenericValue<UTF8<>>`)是 DOM API 的核心。本部分描述了它的设计。 + +## 数据布局 {#DataLayout} + +`Value` 是[可变类型](http://en.wikipedia.org/wiki/Variant_type)。在 RapidJSON 的上下文中,一个 `Value` 的实例可以包含6种 JSON 数据类型之一。通过使用 `union` ,这是可能实现的。每一个 `Value` 包含两个成员:`union Data data_` 和 `unsigned flags_`。`flags_` 表明了 JSON 类型,以及附加的信息。 + +下表显示了所有类型的数据布局。32位/64位列表明了字段所占用的字节数。 + +| Null | | 32位 | 64位 | +|-------------------|----------------------------------|:----:|:----:| +| (未使用) | |4 |8 | +| (未使用) | |4 |4 | +| (未使用) | |4 |4 | +| `unsigned flags_` | `kNullType kNullFlag` |4 |4 | + +| Bool | | 32位 | 64位 | +|-------------------|----------------------------------------------------|:----:|:----:| +| (未使用) | |4 |8 | +| (未使用) | |4 |4 | +| (未使用) | |4 |4 | +| `unsigned flags_` | `kBoolType` (either `kTrueFlag` or `kFalseFlag`) |4 |4 | + +| String | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `Ch* str` | 指向字符串的指针(可能拥有所有权) |4 |8 | +| `SizeType length` | 字符串长度 |4 |4 | +| (未使用) | |4 |4 | +| `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 | + +| Object | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `Member* members` | 指向成员数组的指针(拥有所有权) |4 |8 | +| `SizeType size` | 成员数量 |4 |4 | +| `SizeType capacity` | 成员容量 |4 |4 | +| `unsigned flags_` | `kObjectType kObjectFlag` |4 |4 | + +| Array | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `Value* values` | 指向值数组的指针(拥有所有权) |4 |8 | +| `SizeType size` | 值数量 |4 |4 | +| `SizeType capacity` | 值容量 |4 |4 | +| `unsigned flags_` | `kArrayType kArrayFlag` |4 |4 | + +| Number (Int) | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `int i` | 32位有符号整数 |4 |4 | +| (零填充) | 0 |4 |4 | +| (未使用) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kIntFlag kInt64Flag ...` |4 |4 | + +| Number (UInt) | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `unsigned u` | 32位无符号整数 |4 |4 | +| (零填充) | 0 |4 |4 | +| (未使用) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kUintFlag kUint64Flag ...` |4 |4 | + +| Number (Int64) | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `int64_t i64` | 64位有符号整数 |8 |8 | +| (未使用) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 | + +| Number (Uint64) | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `uint64_t i64` | 64位无符号整数 |8 |8 | +| (未使用) | |4 |8 | +| `unsigned flags_` | `kNumberType kNumberFlag kInt64Flag ...` |4 |4 | + +| Number (Double) | | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `uint64_t i64` | 双精度浮点数 |8 |8 | +| (未使用) | |4 |8 | +| `unsigned flags_` |`kNumberType kNumberFlag kDoubleFlag`|4 |4 | + +这里有一些需要注意的地方: +* 为了减少在64位架构上的内存消耗,`SizeType` 被定义为 `unsigned` 而不是 `size_t`。 +* 32位整数的零填充可能被放在实际类型的前面或后面,这依赖于字节序。这使得它可以将32位整数不经过任何转换就可以解释为64位整数。 +* `Int` 永远是 `Int64`,反之不然。 + +## 标志 {#Flags} + +32位的 `flags_` 包含了 JSON 类型和其他信息。如前文中的表所述,每一种 JSON 类型包含了冗余的 `kXXXType` 和 `kXXXFlag`。这个设计是为了优化测试位标志(`IsNumber()`)和获取每一种类型的序列号(`GetType()`)。 + +字符串有两个可选的标志。`kCopyFlag` 表明这个字符串拥有字符串拷贝的所有权。而 `kInlineStrFlag` 意味着使用了[短字符串优化](#ShortString)。 + +数字更加复杂一些。对于普通的整数值,它可以包含 `kIntFlag`、`kUintFlag`、 `kInt64Flag` 和/或 `kUint64Flag`,这由整数的范围决定。带有小数或者超过64位所能表达的范围的整数的数字会被存储为带有 `kDoubleFlag` 的 `double`。 + +## 短字符串优化 {#ShortString} + +[Kosta](https://github.com/Kosta-Github) 提供了很棒的短字符串优化。这个优化的xxx如下所述。除去 `flags_` ,`Value` 有12或16字节(对于32位或64位)来存储实际的数据。这为在其内部直接存储短字符串而不是存储字符串的指针创造了可能。对于1字节的字符类型(例如 `char`),它可以在 `Value` 类型内部存储至多11或15个字符的字符串。 + +|ShortString (Ch=char)| | 32位 | 64位 | +|---------------------|-------------------------------------|:----:|:----:| +| `Ch str[MaxChars]` | 字符串缓冲区 |11 |15 | +| `Ch invLength` | MaxChars - Length |1 |1 | +| `unsigned flags_` | `kStringType kStringFlag ...` |4 |4 | + +这里使用了一项特殊的技术。它存储了 (MaxChars - length) 而不直接存储字符串的长度。这使得存储11个字符并且带有后缀 `\0` 成为可能。 + +这个优化可以减少字符串拷贝内存占用。它也改善了缓存一致性,并进一步提高了运行时性能。 + +# 分配器(Allocator) {#InternalAllocator} + +`Allocator` 是 RapidJSON 中的概念: +~~~cpp +concept Allocator { + static const bool kNeedFree; //!< 表明这个分配器是否需要调用 Free()。 + + // 申请内存块。 + // \param size 内存块的大小,以字节记。 + // \returns 指向内存块的指针。 + void* Malloc(size_t size); + + // 调整内存块的大小。 + // \param originalPtr 当前内存块的指针。空指针是被允许的。 + // \param originalSize 当前大小,以字节记。(设计问题:因为有些分配器可能不会记录它,显示的传递它可以节约内存。) + // \param newSize 新大小,以字节记。 + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // 释放内存块。 + // \param ptr 指向内存块的指针。空指针是被允许的。 + static void Free(void *ptr); +}; +~~~ + +需要注意的是 `Malloc()` 和 `Realloc()` 是成员函数而 `Free()` 是静态成员函数。 + +## MemoryPoolAllocator {#MemoryPoolAllocator} + +`MemoryPoolAllocator` 是 DOM 的默认内存分配器。它只申请内存而不释放内存。这对于构建 DOM 树非常合适。 + +在它的内部,它从基础的内存分配器申请内存块(默认为 `CrtAllocator`)并将这些内存块存储为单向链表。当用户请求申请内存,它会遵循下列步骤来申请内存: + +1. 如果可用,使用用户提供的缓冲区。(见 [User Buffer section in DOM](doc/dom.md)) +2. 如果用户提供的缓冲区已满,使用当前内存块。 +3. 如果当前内存块已满,申请新的内存块。 + +# 解析优化 {#ParsingOptimization} + +## 使用 SIMD 跳过空格 {#SkipwhitespaceWithSIMD} + +当从流中解析 JSON 时,解析器需要跳过4种空格字符: + +1. 空格 (`U+0020`) +2. 制表符 (`U+000B`) +3. 换行 (`U+000A`) +4. 回车 (`U+000D`) + +这是一份简单的实现: +~~~cpp +void SkipWhitespace(InputStream& s) { + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); +} +~~~ + +但是,这需要对每个字符进行4次比较以及一些分支。这被发现是一个热点。 + +为了加速这一处理,RapidJSON 使用 SIMD 来在一次迭代中比较16个字符和4个空格。目前 RapidJSON 支持 SSE2 , SSE4.2 和 ARM Neon 指令。同时它也只会对 UTF-8 内存流启用,包括字符串流或 *原位* 解析。 + +你可以通过在包含 `rapidjson.h` 之前定义 `RAPIDJSON_SSE2` , `RAPIDJSON_SSE42` 或 `RAPIDJSON_NEON` 来启用这个优化。一些编译器可以检测这个设置,如 `perftest.h`: + +~~~cpp +// __SSE2__ 和 __SSE4_2__ 可被 gcc、clang 和 Intel 编译器识别: +// 如果支持的话,我们在 gmake 中使用了 -march=native 来启用 -msse2 和 -msse4.2 +// 同样的, __ARM_NEON 被用于识别Neon +#if defined(__SSE4_2__) +# define RAPIDJSON_SSE42 +#elif defined(__SSE2__) +# define RAPIDJSON_SSE2 +#elif defined(__ARM_NEON) +# define RAPIDJSON_NEON +#endif +~~~ + +需要注意的是,这是编译期的设置。在不支持这些指令的机器上运行可执行文件会使它崩溃。 + +### 页面对齐问题 + +在 RapidJSON 的早期版本中,被报告了[一个问题](https://code.google.com/archive/p/rapidjson/issues/104):`SkipWhitespace_SIMD()` 会罕见地导致崩溃(约五十万分之一的几率)。在调查之后,怀疑是 `_mm_loadu_si128()` 访问了 `'\0'` 之后的内存,并越过被保护的页面边界。 + +在 [Intel® 64 and IA-32 Architectures Optimization Reference Manual +](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html) 中,章节 10.2.1: + +> 为了支持需要费对齐的128位 SIMD 内存访问的算法,调用者的内存缓冲区申请应当考虑添加一些填充空间,这样被调用的函数可以安全地将地址指针用于未对齐的128位 SIMD 内存操作。 +> 在结合非对齐的 SIMD 内存操作中,最小的对齐大小应该等于 SIMD 寄存器的大小。 + +对于 RapidJSON 来说,这显然是不可行的,因为 RapidJSON 不应当强迫用户进行内存对齐。 + +为了修复这个问题,当前的代码会先按字节处理直到下一个对齐的地址。在这之后,使用对齐读取来进行 SIMD 处理。见 [#85](https://github.com/Tencent/rapidjson/issues/85)。 + +## 局部流拷贝 {#LocalStreamCopy} + +在优化的过程中,我们发现一些编译器不能将访问流的一些成员数据放入局部变量或者寄存器中。测试结果显示,对于一些流类型,创建流的拷贝并将其用于内层循环中可以改善性能。例如,实际(非 SIMD)的 `SkipWhitespace()` 被实现为: + +~~~cpp +template<typename InputStream> +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy<InputStream> copy(is); + InputStream& s(copy.s); + + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); +} +~~~ + +基于流的特征,`StreamLocalCopy` 会创建(或不创建)流对象的拷贝,在局部使用它并将流的状态拷贝回原来的流。 + +## 解析为双精度浮点数 {#ParsingDouble} + +将字符串解析为 `double` 并不简单。标准库函数 `strtod()` 可以胜任这项工作,但它比较缓慢。默认情况下,解析器使用默认的精度设置。这最多有 3[ULP](http://en.wikipedia.org/wiki/Unit_in_the_last_place) 的误差,并实现在 `internal::StrtodNormalPrecision()` 中。 + +当使用 `kParseFullPrecisionFlag` 时,编译器会改为调用 `internal::StrtodFullPrecision()` ,这个函数会自动调用三个版本的转换。 +1. [Fast-Path](http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/)。 +2. [double-conversion](https://github.com/floitsch/double-conversion) 中的自定义 DIY-FP 实现。 +3. (Clinger, William D. How to read floating point numbers accurately. Vol. 25. No. 6. ACM, 1990) 中的大整数算法。 + +如果第一个转换方法失败,则尝试使用第二种方法,以此类推。 + +# 生成优化 {#GenerationOptimization} + +## 整数到字符串的转换 {#itoa} + +整数到字符串转换的朴素算法需要对每一个十进制位进行一次除法。我们实现了若干版本并在 [itoa-benchmark](https://github.com/miloyip/itoa-benchmark) 中对它们进行了评估。 + +虽然 SSE2 版本是最快的,但它和第二快的 `branchlut` 差距不大。而且 `branchlut` 是纯C++实现,所以我们在 RapidJSON 中使用了 `branchlut`。 + +## 双精度浮点数到字符串的转换 {#dtoa} + +原来 RapidJSON 使用 `snprintf(..., ..., "%g")` 来进行双精度浮点数到字符串的转换。这是不准确的,因为默认的精度是6。随后我们发现它很缓慢,而且有其它的替代品。 + +Google 的 V8 [double-conversion](https://github.com/floitsch/double-conversion +) 实现了更新的、快速的被称为 Grisu3 的算法(Loitsch, Florian. "Printing floating-point numbers quickly and accurately with integers." ACM Sigplan Notices 45.6 (2010): 233-243.)。 + +然而,这个实现不是仅头文件的,所以我们实现了一个仅头文件的 Grisu2 版本。这个算法保证了结果永远精确。而且在大多数情况下,它会生成最短的(可选)字符串表示。 + +这个仅头文件的转换函数在 [dtoa-benchmark](https://github.com/miloyip/dtoa-benchmark) 中进行评估。 + +# 解析器 {#Parser} + +## 迭代解析 {#IterativeParser} + +迭代解析器是一个以非递归方式实现的递归下降的 LL(1) 解析器。 + +### 语法 {#IterativeParserGrammar} + +解析器使用的语法是基于严格 JSON 语法的: +~~~~~~~~~~ +S -> array | object +array -> [ values ] +object -> { members } +values -> non-empty-values | ε +non-empty-values -> value addition-values +addition-values -> ε | , non-empty-values +members -> non-empty-members | ε +non-empty-members -> member addition-members +addition-members -> ε | , non-empty-members +member -> STRING : value +value -> STRING | NUMBER | NULL | BOOLEAN | object | array +~~~~~~~~~~ + +注意到左因子被加入了非终结符的 `values` 和 `members` 来保证语法是 LL(1) 的。 + +### 解析表 {#IterativeParserParsingTable} + +基于这份语法,我们可以构造 FIRST 和 FOLLOW 集合。 + +非终结符的 FIRST 集合如下所示: + +| NON-TERMINAL | FIRST | +|:-----------------:|:--------------------------------:| +| array | [ | +| object | { | +| values | ε STRING NUMBER NULL BOOLEAN { [ | +| addition-values | ε COMMA | +| members | ε STRING | +| addition-members | ε COMMA | +| member | STRING | +| value | STRING NUMBER NULL BOOLEAN { [ | +| S | [ { | +| non-empty-members | STRING | +| non-empty-values | STRING NUMBER NULL BOOLEAN { [ | + +FOLLOW 集合如下所示: + +| NON-TERMINAL | FOLLOW | +|:-----------------:|:-------:| +| S | $ | +| array | , $ } ] | +| object | , $ } ] | +| values | ] | +| non-empty-values | ] | +| addition-values | ] | +| members | } | +| non-empty-members | } | +| addition-members | } | +| member | , } | +| value | , } ] | + +最终可以从 FIRST 和 FOLLOW 集合生成解析表: + +| NON-TERMINAL | [ | { | , | : | ] | } | STRING | NUMBER | NULL | BOOLEAN | +|:-----------------:|:---------------------:|:---------------------:|:-------------------:|:-:|:-:|:-:|:-----------------------:|:---------------------:|:---------------------:|:---------------------:| +| S | array | object | | | | | | | | | +| array | [ values ] | | | | | | | | | | +| object | | { members } | | | | | | | | | +| values | non-empty-values | non-empty-values | | | ε | | non-empty-values | non-empty-values | non-empty-values | non-empty-values | +| non-empty-values | value addition-values | value addition-values | | | | | value addition-values | value addition-values | value addition-values | value addition-values | +| addition-values | | | , non-empty-values | | ε | | | | | | +| members | | | | | | ε | non-empty-members | | | | +| non-empty-members | | | | | | | member addition-members | | | | +| addition-members | | | , non-empty-members | | | ε | | | | | +| member | | | | | | | STRING : value | | | | +| value | array | object | | | | | STRING | NUMBER | NULL | BOOLEAN | + +对于上面的语法分析,这里有一个很棒的[工具](http://hackingoff.com/compilers/predict-first-follow-set)。 + +### 实现 {#IterativeParserImplementation} + +基于这份解析表,一个直接的(常规的)将规则反向入栈的实现可以正常工作。 + +在 RapidJSON 中,对直接的实现进行了一些修改: + +首先,在 RapidJSON 中,这份解析表被编码为状态机。 +规则由头部和主体组成。 +状态转换由规则构造。 +除此之外,额外的状态被添加到与 `array` 和 `object` 有关的规则。 +通过这种方式,生成数组值或对象成员可以只用一次状态转移便可完成, +而不需要在直接的实现中的多次出栈/入栈操作。 +这也使得估计栈的大小更加容易。 + +状态图如如下所示: + +![状态图](diagram/iterative-parser-states-diagram.png) + +第二,迭代解析器也在内部栈保存了数组的值个数和对象成员的数量,这也与传统的实现不同。 diff --git a/src/s3select/rapidjson/doc/logo/rapidjson.png b/src/s3select/rapidjson/doc/logo/rapidjson.png Binary files differnew file mode 100644 index 000000000..b3b2f80cc --- /dev/null +++ b/src/s3select/rapidjson/doc/logo/rapidjson.png diff --git a/src/s3select/rapidjson/doc/logo/rapidjson.svg b/src/s3select/rapidjson/doc/logo/rapidjson.svg new file mode 100644 index 000000000..9708d5a06 --- /dev/null +++ b/src/s3select/rapidjson/doc/logo/rapidjson.svg @@ -0,0 +1,119 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="217.15039" + height="60.831055" + id="svg2" + version="1.1" + inkscape:version="0.48.4 r9939" + sodipodi:docname="rapidjson.svg"> + <defs + id="defs4"> + <linearGradient + id="linearGradient3801"> + <stop + style="stop-color:#000000;stop-opacity:1;" + offset="0" + id="stop3803" /> + <stop + style="stop-color:#000000;stop-opacity:0;" + offset="1" + id="stop3805" /> + </linearGradient> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3801" + id="linearGradient3807" + x1="81.25" + y1="52.737183" + x2="122.25" + y2="52.737183" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(1.2378503,0,0,1.1662045,-226.99279,64.427324)" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3801" + id="linearGradient3935" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(-1.4768835,0,0,2.2904698,246.48785,81.630301)" + x1="81.25" + y1="52.737183" + x2="115.96579" + y2="48.439766" /> + <linearGradient + inkscape:collect="always" + xlink:href="#linearGradient3801" + id="linearGradient3947" + gradientUnits="userSpaceOnUse" + gradientTransform="matrix(1.2378503,0,0,1.1662045,-226.99279,-10.072676)" + x1="81.25" + y1="52.737183" + x2="122.25" + y2="52.737183" /> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="2" + inkscape:cx="207.8959" + inkscape:cy="-3.2283687" + inkscape:document-units="px" + inkscape:current-layer="layer1" + showgrid="false" + inkscape:window-width="1920" + inkscape:window-height="1137" + inkscape:window-x="-8" + inkscape:window-y="-8" + inkscape:window-maximized="1" + fit-margin-top="10" + fit-margin-left="10" + fit-margin-right="10" + fit-margin-bottom="10" /> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1" + transform="translate(-39.132812,-38.772339)"> + <text + sodipodi:linespacing="125%" + id="text3939" + y="79.862183" + x="147.5" + style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Microsoft JhengHei;-inkscape-font-specification:Microsoft JhengHei" + xml:space="preserve"><tspan + style="font-size:48px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Inconsolata;-inkscape-font-specification:Inconsolata" + y="79.862183" + x="147.5" + id="tspan3941" + sodipodi:role="line"><tspan + id="tspan3943" + style="font-size:42px;font-style:oblique;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Segoe UI;-inkscape-font-specification:Segoe UI Oblique">Rapid</tspan><tspan + id="tspan3945" + style="font-weight:bold;-inkscape-font-specification:Inconsolata Bold">JSON</tspan></tspan></text> + </g> +</svg> diff --git a/src/s3select/rapidjson/doc/misc/DoxygenLayout.xml b/src/s3select/rapidjson/doc/misc/DoxygenLayout.xml new file mode 100644 index 000000000..b7c958665 --- /dev/null +++ b/src/s3select/rapidjson/doc/misc/DoxygenLayout.xml @@ -0,0 +1,194 @@ +<doxygenlayout version="1.0"> + <!-- Generated by doxygen 1.8.7 --> + <!-- Navigation index tabs for HTML output --> + <navindex> + <tab type="mainpage" visible="yes" title=""/> + <tab type="pages" visible="yes" title="" intro=""/> + <tab type="modules" visible="yes" title="" intro=""/> + <tab type="namespaces" visible="yes" title=""> + <tab type="namespacelist" visible="yes" title="" intro=""/> + <tab type="namespacemembers" visible="yes" title="" intro=""/> + </tab> + <tab type="classes" visible="yes" title=""> + <tab type="classlist" visible="yes" title="" intro=""/> + <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/> + <tab type="hierarchy" visible="yes" title="" intro=""/> + <tab type="classmembers" visible="yes" title="" intro=""/> + </tab> + <tab type="files" visible="yes" title=""> + <tab type="filelist" visible="yes" title="" intro=""/> + <tab type="globals" visible="yes" title="" intro=""/> + </tab> + <tab type="examples" visible="yes" title="" intro=""/> + </navindex> + + <!-- Layout definition for a class page --> + <class> + <briefdescription visible="yes"/> + <includes visible="$SHOW_INCLUDE_FILES"/> + <inheritancegraph visible="$CLASS_GRAPH"/> + <collaborationgraph visible="$COLLABORATION_GRAPH"/> + <memberdecl> + <nestedclasses visible="yes" title=""/> + <publictypes title=""/> + <services title=""/> + <interfaces title=""/> + <publicslots title=""/> + <signals title=""/> + <publicmethods title=""/> + <publicstaticmethods title=""/> + <publicattributes title=""/> + <publicstaticattributes title=""/> + <protectedtypes title=""/> + <protectedslots title=""/> + <protectedmethods title=""/> + <protectedstaticmethods title=""/> + <protectedattributes title=""/> + <protectedstaticattributes title=""/> + <packagetypes title=""/> + <packagemethods title=""/> + <packagestaticmethods title=""/> + <packageattributes title=""/> + <packagestaticattributes title=""/> + <properties title=""/> + <events title=""/> + <privatetypes title=""/> + <privateslots title=""/> + <privatemethods title=""/> + <privatestaticmethods title=""/> + <privateattributes title=""/> + <privatestaticattributes title=""/> + <friends title=""/> + <related title="" subtitle=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <inlineclasses title=""/> + <typedefs title=""/> + <enums title=""/> + <services title=""/> + <interfaces title=""/> + <constructors title=""/> + <functions title=""/> + <related title=""/> + <variables title=""/> + <properties title=""/> + <events title=""/> + </memberdef> + <allmemberslink visible="yes"/> + <usedfiles visible="$SHOW_USED_FILES"/> + <authorsection visible="yes"/> + </class> + + <!-- Layout definition for a namespace page --> + <namespace> + <briefdescription visible="yes"/> + <memberdecl> + <nestednamespaces visible="yes" title=""/> + <constantgroups visible="yes" title=""/> + <classes visible="yes" title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <inlineclasses title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + </memberdef> + <authorsection visible="yes"/> + </namespace> + + <!-- Layout definition for a file page --> + <file> + <briefdescription visible="yes"/> + <includes visible="$SHOW_INCLUDE_FILES"/> + <includegraph visible="$INCLUDE_GRAPH"/> + <includedbygraph visible="$INCLUDED_BY_GRAPH"/> + <sourcelink visible="yes"/> + <memberdecl> + <classes visible="yes" title=""/> + <namespaces visible="yes" title=""/> + <constantgroups visible="yes" title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <inlineclasses title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <functions title=""/> + <variables title=""/> + </memberdef> + <authorsection/> + </file> + + <!-- Layout definition for a group page --> + <group> + <briefdescription visible="yes"/> + <groupgraph visible="$GROUP_GRAPHS"/> + <memberdecl> + <nestedgroups visible="yes" title=""/> + <dirs visible="yes" title=""/> + <files visible="yes" title=""/> + <namespaces visible="yes" title=""/> + <classes visible="yes" title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <enumvalues title=""/> + <functions title=""/> + <variables title=""/> + <signals title=""/> + <publicslots title=""/> + <protectedslots title=""/> + <privateslots title=""/> + <events title=""/> + <properties title=""/> + <friends title=""/> + <membergroups visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + <memberdef> + <pagedocs/> + <inlineclasses title=""/> + <defines title=""/> + <typedefs title=""/> + <enums title=""/> + <enumvalues title=""/> + <functions title=""/> + <variables title=""/> + <signals title=""/> + <publicslots title=""/> + <protectedslots title=""/> + <privateslots title=""/> + <events title=""/> + <properties title=""/> + <friends title=""/> + </memberdef> + <authorsection visible="yes"/> + </group> + + <!-- Layout definition for a directory page --> + <directory> + <briefdescription visible="yes"/> + <directorygraph visible="yes"/> + <memberdecl> + <dirs visible="yes"/> + <files visible="yes"/> + </memberdecl> + <detaileddescription title=""/> + </directory> +</doxygenlayout> diff --git a/src/s3select/rapidjson/doc/misc/doxygenextra.css b/src/s3select/rapidjson/doc/misc/doxygenextra.css new file mode 100644 index 000000000..bd6737585 --- /dev/null +++ b/src/s3select/rapidjson/doc/misc/doxygenextra.css @@ -0,0 +1,274 @@ +body code { + margin: 0; + border: 1px solid #ddd; + background-color: #f8f8f8; + border-radius: 3px; + padding: 0; +} + +a { + color: #4183c4; +} + +a.el { + font-weight: normal; +} + +body, table, div, p, dl { + color: #333333; + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + font-size: 15px; + font-style: normal; + font-variant: normal; + font-weight: normal; + line-height: 25.5px; +} + +body { + background-color: #eee; +} + +div.header { + background-image: none; + background-color: white; + margin: 0px; + border: 0px; +} + +div.headertitle { + width: 858px; + margin: 30px; + padding: 0px; +} + +div.toc { + background-color: #f8f8f8; + border-color: #ddd; + margin-right: 10px; + margin-left: 20px; +} +div.toc h3 { + color: #333333; + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + font-size: 18px; + font-style: normal; + font-variant: normal; + font-weight: normal; +} +div.toc li { + color: #333333; + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + font-size: 12px; + font-style: normal; + font-variant: normal; + font-weight: normal; +} + +.title { + font-size: 2.5em; + line-height: 63.75px; + border-bottom: 1px solid #ddd; + margin-bottom: 15px; + margin-left: 0px; + margin-right: 0px; + margin-top: 0px; +} + +.summary { + float: none !important; + width: auto !important; + padding-top: 10px; + padding-right: 10px !important; +} + +.summary + .headertitle .title { + font-size: 1.5em; + line-height: 2.0em; +} + +body h1 { + font-size: 2em; + line-height: 1.7; + border-bottom: 1px solid #eee; + margin: 1em 0 15px; + padding: 0; + overflow: hidden; +} + +body h2 { + font-size: 1.5em; + line-height: 1.7; + margin: 1em 0 15px; + padding: 0; +} + +pre.fragment { + font-family: Consolas, 'Liberation Mono', Menlo, Courier, monospace; + font-size: 13px; + font-style: normal; + font-variant: normal; + font-weight: normal; + line-height: 19px; +} + +table.doxtable th { + background-color: #f8f8f8; + color: #333333; + font-size: 15px; +} + +table.doxtable td, table.doxtable th { + border: 1px solid #ddd; +} + +#doc-content { + background-color: #fff; + width: 918px; + height: auto !important; + margin-left: 270px !important; +} + +div.contents { + width: 858px; + margin: 30px; +} + +div.line { + font-family: Consolas, 'Liberation Mono', Menlo, Courier, monospace; + font-size: 13px; + font-style: normal; + font-variant: normal; + font-weight: normal; + line-height: 19px; +} + +tt, code, pre { + font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; + font-size: 12px; +} + +div.fragment { + background-color: #f8f8f8; + border: 1px solid #ddd; + font-size: 13px; + line-height: 19px; + overflow: auto; + padding: 6px 10px; + border-radius: 3px; +} + +#topbanner { + position: fixed; + margin: 15px; + z-index: 101; +} + +#projectname +{ + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + font-size: 38px; + font-weight: bold; + line-height: 63.75px; + margin: 0px; + padding: 2px 0px; +} + +#projectbrief +{ + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + font-size: 16px; + line-height: 22.4px; + margin: 0px 0px 13px 0px; + padding: 2px; +} + +/* side bar and search */ + +#side-nav +{ + padding: 10px 0px 20px 20px; + border-top: 60px solid #2980b9; + background-color: #343131; + width: 250px !important; + height: 100% !important; + position: fixed; +} + +#nav-tree +{ + background-color: transparent; + background-image: none; + height: 100% !important; +} + +#nav-tree .label +{ + font-family: Helvetica, arial, freesans, clean, sans-serif, 'Segoe UI Emoji', 'Segoe UI Symbol'; + line-height: 25.5px; + font-size: 15px; +} + +#nav-tree +{ + color: #b3b3b3; +} + +#nav-tree .selected { + background-image: none; +} + +#nav-tree a +{ + color: #b3b3b3; +} + +#github +{ + position: fixed; + left: auto; + right: auto; + width: 250px; +} + +#MSearchBox +{ + margin: 20px; + left: 40px; + right: auto; + position: fixed; + width: 180px; +} + +#MSearchField +{ + width: 121px; +} + +#MSearchResultsWindow +{ + left: 45px !important; +} + +#nav-sync +{ + display: none; +} + +.ui-resizable .ui-resizable-handle +{ + width: 0px; +} + +#nav-path +{ + display: none; +} + +/* external link icon */ +div.contents a[href ^= "http"]:after { + content: " " url(); +} + +.githublogo { + content: url(); +}
\ No newline at end of file diff --git a/src/s3select/rapidjson/doc/misc/footer.html b/src/s3select/rapidjson/doc/misc/footer.html new file mode 100644 index 000000000..77f113118 --- /dev/null +++ b/src/s3select/rapidjson/doc/misc/footer.html @@ -0,0 +1,11 @@ +<!-- HTML footer for doxygen 1.8.7--> +<!-- start footer part --> +<!--BEGIN GENERATE_TREEVIEW--> +<div id="nav-path" class="navpath"><!-- id is needed for treeview function! --> + <ul> + $navpath + </ul> +</div> +<!--END GENERATE_TREEVIEW--> +</body> +</html> diff --git a/src/s3select/rapidjson/doc/misc/header.html b/src/s3select/rapidjson/doc/misc/header.html new file mode 100644 index 000000000..a89ba46b4 --- /dev/null +++ b/src/s3select/rapidjson/doc/misc/header.html @@ -0,0 +1,24 @@ +<!-- HTML header for doxygen 1.8.7--> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> +<meta http-equiv="X-UA-Compatible" content="IE=9"/> +<meta name="generator" content="Doxygen $doxygenversion"/> +<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME--> +<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME--> +<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/> +<script type="text/javascript" src="$relpath^jquery.js"></script> +<script type="text/javascript" src="$relpath^dynsections.js"></script> +$treeview +$search +$mathjax +<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" /> +$extrastylesheet +</head> +<body> +<div id="top"><!-- do not remove this div, it is closed by doxygen! --> +<div id="topbanner"><a href="https://github.com/Tencent/rapidjson" title="RapidJSON GitHub"><i class="githublogo"></i></a></div> +$searchbox +<!--END TITLEAREA--> +<!-- end header part --> diff --git a/src/s3select/rapidjson/doc/npm.md b/src/s3select/rapidjson/doc/npm.md new file mode 100644 index 000000000..6f4e85ad9 --- /dev/null +++ b/src/s3select/rapidjson/doc/npm.md @@ -0,0 +1,31 @@ +## NPM + +# package.json {#package} + +~~~~~~~~~~js +{ + ... + "dependencies": { + ... + "rapidjson": "git@github.com:Tencent/rapidjson.git" + }, + ... + "gypfile": true +} +~~~~~~~~~~ + +# binding.gyp {#binding} + +~~~~~~~~~~js +{ + ... + 'targets': [ + { + ... + 'include_dirs': [ + '<!(node -e \'require("rapidjson")\')' + ] + } + ] +} +~~~~~~~~~~ diff --git a/src/s3select/rapidjson/doc/performance.md b/src/s3select/rapidjson/doc/performance.md new file mode 100644 index 000000000..6f9e1bf8b --- /dev/null +++ b/src/s3select/rapidjson/doc/performance.md @@ -0,0 +1,26 @@ +# Performance + +There is a [native JSON benchmark collection] [1] which evaluates speed, memory usage and code size of various operations among 37 JSON libraries. + +[1]: https://github.com/miloyip/nativejson-benchmark + +The old performance article for RapidJSON 0.1 is provided [here](https://code.google.com/p/rapidjson/wiki/Performance). + +Additionally, you may refer to the following third-party benchmarks. + +## Third-party benchmarks + +* [Basic benchmarks for miscellaneous C++ JSON parsers and generators](https://github.com/mloskot/json_benchmark) by Mateusz Loskot (Jun 2013) + * [casablanca](https://casablanca.codeplex.com/) + * [json_spirit](https://github.com/cierelabs/json_spirit) + * [jsoncpp](http://jsoncpp.sourceforge.net/) + * [libjson](http://sourceforge.net/projects/libjson/) + * [rapidjson](https://github.com/Tencent/rapidjson/) + * [QJsonDocument](http://qt-project.org/doc/qt-5.0/qtcore/qjsondocument.html) + +* [JSON Parser Benchmarking](http://chadaustin.me/2013/01/json-parser-benchmarking/) by Chad Austin (Jan 2013) + * [sajson](https://github.com/chadaustin/sajson) + * [rapidjson](https://github.com/Tencent/rapidjson/) + * [vjson](https://code.google.com/p/vjson/) + * [YAJL](http://lloyd.github.com/yajl/) + * [Jansson](http://www.digip.org/jansson/) diff --git a/src/s3select/rapidjson/doc/performance.zh-cn.md b/src/s3select/rapidjson/doc/performance.zh-cn.md new file mode 100644 index 000000000..2322c9c49 --- /dev/null +++ b/src/s3select/rapidjson/doc/performance.zh-cn.md @@ -0,0 +1,26 @@ +# 性能 + +有一个 [native JSON benchmark collection][1] 项目,能评估 37 个 JSON 库在不同操作下的速度、內存用量及代码大小。 + +[1]: https://github.com/miloyip/nativejson-benchmark + +RapidJSON 0.1 版本的性能测试文章位于 [这里](https://code.google.com/p/rapidjson/wiki/Performance). + +此外,你也可以参考以下这些第三方的评测。 + +## 第三方评测 + +* [Basic benchmarks for miscellaneous C++ JSON parsers and generators](https://github.com/mloskot/json_benchmark) by Mateusz Loskot (Jun 2013) + * [casablanca](https://casablanca.codeplex.com/) + * [json_spirit](https://github.com/cierelabs/json_spirit) + * [jsoncpp](http://jsoncpp.sourceforge.net/) + * [libjson](http://sourceforge.net/projects/libjson/) + * [rapidjson](https://github.com/Tencent/rapidjson/) + * [QJsonDocument](http://qt-project.org/doc/qt-5.0/qtcore/qjsondocument.html) + +* [JSON Parser Benchmarking](http://chadaustin.me/2013/01/json-parser-benchmarking/) by Chad Austin (Jan 2013) + * [sajson](https://github.com/chadaustin/sajson) + * [rapidjson](https://github.com/Tencent/rapidjson/) + * [vjson](https://code.google.com/p/vjson/) + * [YAJL](http://lloyd.github.com/yajl/) + * [Jansson](http://www.digip.org/jansson/) diff --git a/src/s3select/rapidjson/doc/pointer.md b/src/s3select/rapidjson/doc/pointer.md new file mode 100644 index 000000000..9a0e5ca03 --- /dev/null +++ b/src/s3select/rapidjson/doc/pointer.md @@ -0,0 +1,234 @@ +# Pointer + +(This feature was released in v1.1.0) + +JSON Pointer is a standardized ([RFC6901]) way to select a value inside a JSON Document (DOM). This can be analogous to XPath for XML document. However, JSON Pointer is much simpler, and a single JSON Pointer only pointed to a single value. + +Using RapidJSON's implementation of JSON Pointer can simplify some manipulations of the DOM. + +[TOC] + +# JSON Pointer {#JsonPointer} + +A JSON Pointer is a list of zero-to-many tokens, each prefixed by `/`. Each token can be a string or a number. For example, given a JSON: +~~~javascript +{ + "foo" : ["bar", "baz"], + "pi" : 3.1416 +} +~~~ + +The following JSON Pointers resolve this JSON as: + +1. `"/foo"` → `[ "bar", "baz" ]` +2. `"/foo/0"` → `"bar"` +3. `"/foo/1"` → `"baz"` +4. `"/pi"` → `3.1416` + +Note that, an empty JSON Pointer `""` (zero token) resolves to the whole JSON. + +# Basic Usage {#BasicUsage} + +The following example code is self-explanatory. + +~~~cpp +#include "rapidjson/pointer.h" + +// ... +Document d; + +// Create DOM by Set() +Pointer("/project").Set(d, "RapidJSON"); +Pointer("/stars").Set(d, 10); + +// { "project" : "RapidJSON", "stars" : 10 } + +// Access DOM by Get(). It return nullptr if the value does not exist. +if (Value* stars = Pointer("/stars").Get(d)) + stars->SetInt(stars->GetInt() + 1); + +// { "project" : "RapidJSON", "stars" : 11 } + +// Set() and Create() automatically generate parents if not exist. +Pointer("/a/b/0").Create(d); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] } } + +// GetWithDefault() returns reference. And it deep clones the default value. +Value& hello = Pointer("/hello").GetWithDefault(d, "world"); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "world" } + +// Swap() is similar to Set() +Value x("C++"); +Pointer("/hello").Swap(d, x); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "C++" } +// x becomes "world" + +// Erase a member or element, return true if the value exists +bool success = Pointer("/a").Erase(d); +assert(success); + +// { "project" : "RapidJSON", "stars" : 10 } +~~~ + +# Helper Functions {#HelperFunctions} + +Since object-oriented calling convention may be non-intuitive, RapidJSON also provides helper functions, which just wrap the member functions with free-functions. + +The following example does exactly the same as the above one. + +~~~cpp +Document d; + +SetValueByPointer(d, "/project", "RapidJSON"); +SetValueByPointer(d, "/stars", 10); + +if (Value* stars = GetValueByPointer(d, "/stars")) + stars->SetInt(stars->GetInt() + 1); + +CreateValueByPointer(d, "/a/b/0"); + +Value& hello = GetValueByPointerWithDefault(d, "/hello", "world"); + +Value x("C++"); +SwapValueByPointer(d, "/hello", x); + +bool success = EraseValueByPointer(d, "/a"); +assert(success); +~~~ + +The conventions are shown here for comparison: + +1. `Pointer(source).<Method>(root, ...)` +2. `<Method>ValueByPointer(root, Pointer(source), ...)` +3. `<Method>ValueByPointer(root, source, ...)` + +# Resolving Pointer {#ResolvingPointer} + +`Pointer::Get()` or `GetValueByPointer()` function does not modify the DOM. If the tokens cannot match a value in the DOM, it returns `nullptr`. User can use this to check whether a value exists. + +Note that, numerical tokens can represent an array index or member name. The resolving process will match the values according to the types of value. + +~~~javascript +{ + "0" : 123, + "1" : [456] +} +~~~ + +1. `"/0"` → `123` +2. `"/1/0"` → `456` + +The token `"0"` is treated as member name in the first pointer. It is treated as an array index in the second pointer. + +The other functions, including `Create()`, `GetWithDefault()`, `Set()` and `Swap()`, will change the DOM. These functions will always succeed. They will create the parent values if they do not exist. If the parent values do not match the tokens, they will also be forced to change their type. Changing the type also mean fully removal of that DOM subtree. + +Parsing the above JSON into `d`, + +~~~cpp +SetValueByPointer(d, "1/a", 789); // { "0" : 123, "1" : { "a" : 789 } } +~~~ + +## Resolving Minus Sign Token + +Besides, [RFC6901] defines a special token `-` (single minus sign), which represents the pass-the-end element of an array. `Get()` only treats this token as a member name '"-"'. Yet the other functions can resolve this for array, equivalent to calling `Value::PushBack()` to the array. + +~~~cpp +Document d; +d.Parse("{\"foo\":[123]}"); +SetValueByPointer(d, "/foo/-", 456); // { "foo" : [123, 456] } +SetValueByPointer(d, "/-", 789); // { "foo" : [123, 456], "-" : 789 } +~~~ + +## Resolving Document and Value + +When using `p.Get(root)` or `GetValueByPointer(root, p)`, `root` is a (const) `Value&`. That means, it can be a subtree of the DOM. + +The other functions have two groups of signature. One group uses `Document& document` as parameter, another one uses `Value& root`. The first group uses `document.GetAllocator()` for creating values. And the second group needs user to supply an allocator, like the functions in DOM. + +All examples above do not require an allocator parameter, because the first parameter is a `Document&`. But if you want to resolve a pointer to a subtree, you need to supply the allocator as in the following example: + +~~~cpp +class Person { +public: + Person() { + document_ = new Document(); + // CreateValueByPointer() here no need allocator + SetLocation(CreateValueByPointer(*document_, "/residence"), ...); + SetLocation(CreateValueByPointer(*document_, "/office"), ...); + }; + +private: + void SetLocation(Value& location, const char* country, const char* addresses[2]) { + Value::Allocator& a = document_->GetAllocator(); + // SetValueByPointer() here need allocator + SetValueByPointer(location, "/country", country, a); + SetValueByPointer(location, "/address/0", address[0], a); + SetValueByPointer(location, "/address/1", address[1], a); + } + + // ... + + Document* document_; +}; +~~~ + +`Erase()` or `EraseValueByPointer()` does not need allocator. And they return `true` if the value is erased successfully. + +# Error Handling {#ErrorHandling} + +A `Pointer` parses a source string in its constructor. If there is parsing error, `Pointer::IsValid()` returns `false`. And you can use `Pointer::GetParseErrorCode()` and `GetParseErrorOffset()` to retrieve the error information. + +Note that, all resolving functions assumes valid pointer. Resolving with an invalid pointer causes assertion failure. + +# URI Fragment Representation {#URIFragment} + +In addition to the string representation of JSON pointer that we are using till now, [RFC6901] also defines the URI fragment representation of JSON pointer. URI fragment is specified in [RFC3986] "Uniform Resource Identifier (URI): Generic Syntax". + +The main differences are that a the URI fragment always has a `#` (pound sign) in the beginning, and some characters are encoded by percent-encoding in UTF-8 sequence. For example, the following table shows different C/C++ string literals of different representations. + +String Representation | URI Fragment Representation | Pointer Tokens (UTF-8) +----------------------|-----------------------------|------------------------ +`"/foo/0"` | `"#/foo/0"` | `{"foo", 0}` +`"/a~1b"` | `"#/a~1b"` | `{"a/b"}` +`"/m~0n"` | `"#/m~0n"` | `{"m~n"}` +`"/ "` | `"#/%20"` | `{" "}` +`"/\0"` | `"#/%00"` | `{"\0"}` +`"/€"` | `"#/%E2%82%AC"` | `{"€"}` + +RapidJSON fully support URI fragment representation. It automatically detects the pound sign during parsing. + +# Stringify + +You may also stringify a `Pointer` to a string or other output streams. This can be done by: + +~~~ +Pointer p(...); +StringBuffer sb; +p.Stringify(sb); +std::cout << sb.GetString() << std::endl; +~~~ + +It can also stringify to URI fragment representation by `StringifyUriFragment()`. + +# User-Supplied Tokens {#UserSuppliedTokens} + +If a pointer will be resolved multiple times, it should be constructed once, and then apply it to different DOMs or in different times. This reduce time and memory allocation for constructing `Pointer` multiple times. + +We can go one step further, to completely eliminate the parsing process and dynamic memory allocation, we can establish the token array directly: + +~~~cpp +#define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } +#define INDEX(i) { #i, sizeof(#i) - 1, i } + +static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; +static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); +// Equivalent to static const Pointer p("/foo/123"); +~~~ + +This may be useful for memory constrained systems. + +[RFC3986]: https://tools.ietf.org/html/rfc3986 +[RFC6901]: https://tools.ietf.org/html/rfc6901 diff --git a/src/s3select/rapidjson/doc/pointer.zh-cn.md b/src/s3select/rapidjson/doc/pointer.zh-cn.md new file mode 100644 index 000000000..239569d4a --- /dev/null +++ b/src/s3select/rapidjson/doc/pointer.zh-cn.md @@ -0,0 +1,234 @@ +# Pointer + +(本功能于 v1.1.0 发布) + +JSON Pointer 是一个标准化([RFC6901])的方式去选取一个 JSON Document(DOM)中的值。这类似于 XML 的 XPath。然而,JSON Pointer 简单得多,而且每个 JSON Pointer 仅指向单个值。 + +使用 RapidJSON 的 JSON Pointer 实现能简化一些 DOM 的操作。 + +[TOC] + +# JSON Pointer {#JsonPointer} + +一个 JSON Pointer 由一串(零至多个)token 所组成,每个 token 都有 `/` 前缀。每个 token 可以是一个字符串或数字。例如,给定一个 JSON: +~~~javascript +{ + "foo" : ["bar", "baz"], + "pi" : 3.1416 +} +~~~ + +以下的 JSON Pointer 解析为: + +1. `"/foo"` → `[ "bar", "baz" ]` +2. `"/foo/0"` → `"bar"` +3. `"/foo/1"` → `"baz"` +4. `"/pi"` → `3.1416` + +要注意,一个空 JSON Pointer `""` (零个 token)解析为整个 JSON。 + +# 基本使用方法 {#BasicUsage} + +以下的代码范例不解自明。 + +~~~cpp +#include "rapidjson/pointer.h" + +// ... +Document d; + +// 使用 Set() 创建 DOM +Pointer("/project").Set(d, "RapidJSON"); +Pointer("/stars").Set(d, 10); + +// { "project" : "RapidJSON", "stars" : 10 } + +// 使用 Get() 访问 DOM。若该值不存在则返回 nullptr。 +if (Value* stars = Pointer("/stars").Get(d)) + stars->SetInt(stars->GetInt() + 1); + +// { "project" : "RapidJSON", "stars" : 11 } + +// Set() 和 Create() 自动生成父值(如果它们不存在)。 +Pointer("/a/b/0").Create(d); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] } } + +// GetWithDefault() 返回引用。若该值不存在则会深拷贝缺省值。 +Value& hello = Pointer("/hello").GetWithDefault(d, "world"); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "world" } + +// Swap() 和 Set() 相似 +Value x("C++"); +Pointer("/hello").Swap(d, x); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "C++" } +// x 变成 "world" + +// 删去一个成员或元素,若值存在返回 true +bool success = Pointer("/a").Erase(d); +assert(success); + +// { "project" : "RapidJSON", "stars" : 10 } +~~~ + +# 辅助函数 {#HelperFunctions} + +由于面向对象的调用习惯可能不符直觉,RapidJSON 也提供了一些辅助函数,它们把成员函数包装成自由函数。 + +以下的例子与上面例子所做的事情完全相同。 + +~~~cpp +Document d; + +SetValueByPointer(d, "/project", "RapidJSON"); +SetValueByPointer(d, "/stars", 10); + +if (Value* stars = GetValueByPointer(d, "/stars")) + stars->SetInt(stars->GetInt() + 1); + +CreateValueByPointer(d, "/a/b/0"); + +Value& hello = GetValueByPointerWithDefault(d, "/hello", "world"); + +Value x("C++"); +SwapValueByPointer(d, "/hello", x); + +bool success = EraseValueByPointer(d, "/a"); +assert(success); +~~~ + +以下对比 3 种调用方式: + +1. `Pointer(source).<Method>(root, ...)` +2. `<Method>ValueByPointer(root, Pointer(source), ...)` +3. `<Method>ValueByPointer(root, source, ...)` + +# 解析 Pointer {#ResolvingPointer} + +`Pointer::Get()` 或 `GetValueByPointer()` 函数并不修改 DOM。若那些 token 不能匹配 DOM 里的值,这些函数便返回 `nullptr`。使用者可利用这个方法来检查一个值是否存在。 + +注意,数值 token 可表示数组索引或成员名字。解析过程中会按值的类型来匹配。 + +~~~javascript +{ + "0" : 123, + "1" : [456] +} +~~~ + +1. `"/0"` → `123` +2. `"/1/0"` → `456` + +Token `"0"` 在第一个 pointer 中被当作成员名字。它在第二个 pointer 中被当作成数组索引。 + +其他函数会改变 DOM,包括 `Create()`、`GetWithDefault()`、`Set()`、`Swap()`。这些函数总是成功的。若一些父值不存在,就会创建它们。若父值类型不匹配 token,也会强行改变其类型。改变类型也意味着完全移除其 DOM 子树的内容。 + +例如,把上面的 JSON 解译至 `d` 之后, + +~~~cpp +SetValueByPointer(d, "1/a", 789); // { "0" : 123, "1" : { "a" : 789 } } +~~~ + +## 解析负号 token + +另外,[RFC6901] 定义了一个特殊 token `-` (单个负号),用于表示数组最后元素的下一个元素。 `Get()` 只会把此 token 当作成员名字 '"-"'。而其他函数则会以此解析数组,等同于对数组调用 `Value::PushBack()` 。 + +~~~cpp +Document d; +d.Parse("{\"foo\":[123]}"); +SetValueByPointer(d, "/foo/-", 456); // { "foo" : [123, 456] } +SetValueByPointer(d, "/-", 789); // { "foo" : [123, 456], "-" : 789 } +~~~ + +## 解析 Document 及 Value + +当使用 `p.Get(root)` 或 `GetValueByPointer(root, p)`,`root` 是一个(常数) `Value&`。这意味着,它也可以是 DOM 里的一个子树。 + +其他函数有两组签名。一组使用 `Document& document` 作为参数,另一组使用 `Value& root`。第一组使用 `document.GetAllocator()` 去创建值,而第二组则需要使用者提供一个 allocator,如同 DOM 里的函数。 + +以上例子都不需要 allocator 参数,因为它的第一个参数是 `Document&`。但如果你需要对一个子树进行解析,就需要如下面的例子般提供 allocator: + +~~~cpp +class Person { +public: + Person() { + document_ = new Document(); + // CreateValueByPointer() here no need allocator + SetLocation(CreateValueByPointer(*document_, "/residence"), ...); + SetLocation(CreateValueByPointer(*document_, "/office"), ...); + }; + +private: + void SetLocation(Value& location, const char* country, const char* addresses[2]) { + Value::Allocator& a = document_->GetAllocator(); + // SetValueByPointer() here need allocator + SetValueByPointer(location, "/country", country, a); + SetValueByPointer(location, "/address/0", address[0], a); + SetValueByPointer(location, "/address/1", address[1], a); + } + + // ... + + Document* document_; +}; +~~~ + +`Erase()` 或 `EraseValueByPointer()` 不需要 allocator。而且它们成功删除值之后会返回 `true`。 + +# 错误处理 {#ErrorHandling} + +`Pointer` 在其建构函数里会解译源字符串。若有解析错误,`Pointer::IsValid()` 返回 `false`。你可使用 `Pointer::GetParseErrorCode()` 和 `GetParseErrorOffset()` 去获取错信息。 + +要注意的是,所有解析函数都假设 pointer 是合法的。对一个非法 pointer 解析会造成断言失败。 + +# URI 片段表示方式 {#URIFragment} + +除了我们一直在使用的字符串方式表示 JSON pointer,[RFC6901] 也定义了一个 JSON Pointer 的 URI 片段(fragment)表示方式。URI 片段是定义于 [RFC3986] "Uniform Resource Identifier (URI): Generic Syntax"。 + +URI 片段的主要分别是必然以 `#` (pound sign)开头,而一些字符也会以百分比编码成 UTF-8 序列。例如,以下的表展示了不同表示法下的 C/C++ 字符串常数。 + +字符串表示方式 | URI 片段表示方式 | Pointer Tokens (UTF-8) +----------------------|-----------------------------|------------------------ +`"/foo/0"` | `"#/foo/0"` | `{"foo", 0}` +`"/a~1b"` | `"#/a~1b"` | `{"a/b"}` +`"/m~0n"` | `"#/m~0n"` | `{"m~n"}` +`"/ "` | `"#/%20"` | `{" "}` +`"/\0"` | `"#/%00"` | `{"\0"}` +`"/€"` | `"#/%E2%82%AC"` | `{"€"}` + +RapidJSON 完全支持 URI 片段表示方式。它在解译时会自动检测 `#` 号。 + +# 字符串化 + +你也可以把一个 `Pointer` 字符串化,储存于字符串或其他输出流。例如: + +~~~ +Pointer p(...); +StringBuffer sb; +p.Stringify(sb); +std::cout << sb.GetString() << std::endl; +~~~ + +使用 `StringifyUriFragment()` 可以把 pointer 字符串化为 URI 片段表示法。 + +# 使用者提供的 tokens {#UserSuppliedTokens} + +若一个 pointer 会用于多次解析,它应该只被创建一次,然后再施于不同的 DOM ,或在不同时间做解析。这样可以避免多次创键 `Pointer`,节省时间和内存分配。 + +我们甚至可以再更进一步,完全消去解析过程及动态内存分配。我们可以直接生成 token 数组: + +~~~cpp +#define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } +#define INDEX(i) { #i, sizeof(#i) - 1, i } + +static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; +static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); +// Equivalent to static const Pointer p("/foo/123"); +~~~ + +这种做法可能适合内存受限的系统。 + +[RFC3986]: https://tools.ietf.org/html/rfc3986 +[RFC6901]: https://tools.ietf.org/html/rfc6901 diff --git a/src/s3select/rapidjson/doc/sax.md b/src/s3select/rapidjson/doc/sax.md new file mode 100644 index 000000000..d42d04388 --- /dev/null +++ b/src/s3select/rapidjson/doc/sax.md @@ -0,0 +1,509 @@ +# SAX + +The term "SAX" originated from [Simple API for XML](http://en.wikipedia.org/wiki/Simple_API_for_XML). We borrowed this term for JSON parsing and generation. + +In RapidJSON, `Reader` (typedef of `GenericReader<...>`) is the SAX-style parser for JSON, and `Writer` (typedef of `GenericWriter<...>`) is the SAX-style generator for JSON. + +[TOC] + +# Reader {#Reader} + +`Reader` parses a JSON from a stream. While it reads characters from the stream, it analyzes the characters according to the syntax of JSON, and publishes events to a handler. + +For example, here is a JSON. + +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +When a `Reader` parses this JSON, it publishes the following events to the handler sequentially: + +~~~~~~~~~~ +StartObject() +Key("hello", 5, true) +String("world", 5, true) +Key("t", 1, true) +Bool(true) +Key("f", 1, true) +Bool(false) +Key("n", 1, true) +Null() +Key("i") +Uint(123) +Key("pi") +Double(3.1416) +Key("a") +StartArray() +Uint(1) +Uint(2) +Uint(3) +Uint(4) +EndArray(4) +EndObject(7) +~~~~~~~~~~ + +These events can be easily matched with the JSON, but some event parameters need further explanation. Let's see the `simplereader` example which produces exactly the same output as above: + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +struct MyHandler : public BaseReaderHandler<UTF8<>, MyHandler> { + bool Null() { cout << "Null()" << endl; return true; } + bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; } + bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; } + bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; } + bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; } + bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; } + bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; } + bool String(const char* str, SizeType length, bool copy) { + cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool StartObject() { cout << "StartObject()" << endl; return true; } + bool Key(const char* str, SizeType length, bool copy) { + cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; } + bool StartArray() { cout << "StartArray()" << endl; return true; } + bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; } +}; + +void main() { + const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } "; + + MyHandler handler; + Reader reader; + StringStream ss(json); + reader.Parse(ss, handler); +} +~~~~~~~~~~ + +Note that RapidJSON uses templates to statically bind the `Reader` type and the handler type, instead of using classes with virtual functions. This paradigm can improve performance by inlining functions. + +## Handler {#Handler} + +As shown in the previous example, the user needs to implement a handler which consumes the events (via function calls) from the `Reader`. The handler must contain the following member functions. + +~~~~~~~~~~cpp +class Handler { + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +~~~~~~~~~~ + +`Null()` is called when the `Reader` encounters a JSON null value. + +`Bool(bool)` is called when the `Reader` encounters a JSON true or false value. + +When the `Reader` encounters a JSON number, it chooses a suitable C++ type mapping. And then it calls *one* function out of `Int(int)`, `Uint(unsigned)`, `Int64(int64_t)`, `Uint64(uint64_t)` and `Double(double)`. If `kParseNumbersAsStrings` is enabled, `Reader` will always calls `RawNumber()` instead. + +`String(const char* str, SizeType length, bool copy)` is called when the `Reader` encounters a string. The first parameter is pointer to the string. The second parameter is the length of the string (excluding the null terminator). Note that RapidJSON supports null character `\0` inside a string. If such situation happens, `strlen(str) < length`. The last `copy` indicates whether the handler needs to make a copy of the string. For normal parsing, `copy = true`. Only when *insitu* parsing is used, `copy = false`. And be aware that the character type depends on the target encoding, which will be explained later. + +When the `Reader` encounters the beginning of an object, it calls `StartObject()`. An object in JSON is a set of name-value pairs. If the object contains members it first calls `Key()` for the name of member, and then calls functions depending on the type of the value. These calls of name-value pairs repeat until calling `EndObject(SizeType memberCount)`. Note that the `memberCount` parameter is just an aid for the handler; users who do not need this parameter may ignore it. + +Arrays are similar to objects, but simpler. At the beginning of an array, the `Reader` calls `BeginArray()`. If there is elements, it calls functions according to the types of element. Similarly, in the last call `EndArray(SizeType elementCount)`, the parameter `elementCount` is just an aid for the handler. + +Every handler function returns a `bool`. Normally it should return `true`. If the handler encounters an error, it can return `false` to notify the event publisher to stop further processing. + +For example, when we parse a JSON with `Reader` and the handler detects that the JSON does not conform to the required schema, the handler can return `false` and let the `Reader` stop further parsing. This will place the `Reader` in an error state, with error code `kParseErrorTermination`. + +## GenericReader {#GenericReader} + +As mentioned before, `Reader` is a typedef of a template class `GenericReader`: + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> > +class GenericReader { + // ... +}; + +typedef GenericReader<UTF8<>, UTF8<> > Reader; + +} // namespace rapidjson +~~~~~~~~~~ + +The `Reader` uses UTF-8 as both source and target encoding. The source encoding means the encoding in the JSON stream. The target encoding means the encoding of the `str` parameter in `String()` calls. For example, to parse a UTF-8 stream and output UTF-16 string events, you can define a reader by: + +~~~~~~~~~~cpp +GenericReader<UTF8<>, UTF16<> > reader; +~~~~~~~~~~ + +Note that, the default character type of `UTF16` is `wchar_t`. So this `reader` needs to call `String(const wchar_t*, SizeType, bool)` of the handler. + +The third template parameter `Allocator` is the allocator type for internal data structure (actually a stack). + +## Parsing {#SaxParsing} + +The main function of `Reader` is used to parse JSON. + +~~~~~~~~~~cpp +template <unsigned parseFlags, typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); + +// with parseFlags = kDefaultParseFlags +template <typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); +~~~~~~~~~~ + +If an error occurs during parsing, it will return `false`. User can also call `bool HasParseError()`, `ParseErrorCode GetParseErrorCode()` and `size_t GetErrorOffset()` to obtain the error states. In fact, `Document` uses these `Reader` functions to obtain parse errors. Please refer to [DOM](doc/dom.md) for details about parse errors. + +## Token-by-Token Parsing {#TokenByTokenParsing} + +Some users may wish to parse a JSON input stream a single token at a time, instead of immediately parsing an entire document without stopping. To parse JSON this way, instead of calling `Parse`, you can use the `IterativeParse` set of functions: + +~~~~~~~~~~cpp + void IterativeParseInit(); + + template <unsigned parseFlags, typename InputStream, typename Handler> + bool IterativeParseNext(InputStream& is, Handler& handler); + + bool IterativeParseComplete(); +~~~~~~~~~~ + +Here is an example of iteratively parsing JSON, token by token: + +~~~~~~~~~~cpp + reader.IterativeParseInit(); + while (!reader.IterativeParseComplete()) { + reader.IterativeParseNext<kParseDefaultFlags>(is, handler); + // Your handler has been called once. + } +~~~~~~~~~~ + +# Writer {#Writer} + +`Reader` converts (parses) JSON into events. `Writer` does exactly the opposite. It converts events into JSON. + +`Writer` is very easy to use. If your application only need to converts some data into JSON, it may be a good choice to use `Writer` directly, instead of building a `Document` and then stringifying it with a `Writer`. + +In `simplewriter` example, we do exactly the reverse of `simplereader`. + +~~~~~~~~~~cpp +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +void main() { + StringBuffer s; + Writer<StringBuffer> writer(s); + + writer.StartObject(); + writer.Key("hello"); + writer.String("world"); + writer.Key("t"); + writer.Bool(true); + writer.Key("f"); + writer.Bool(false); + writer.Key("n"); + writer.Null(); + writer.Key("i"); + writer.Uint(123); + writer.Key("pi"); + writer.Double(3.1416); + writer.Key("a"); + writer.StartArray(); + for (unsigned i = 0; i < 4; i++) + writer.Uint(i); + writer.EndArray(); + writer.EndObject(); + + cout << s.GetString() << endl; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{"hello":"world","t":true,"f":false,"n":null,"i":123,"pi":3.1416,"a":[0,1,2,3]} +~~~~~~~~~~ + +There are two `String()` and `Key()` overloads. One is the same as defined in handler concept with 3 parameters. It can handle string with null characters. Another one is the simpler version used in the above example. + +Note that, the example code does not pass any parameters in `EndArray()` and `EndObject()`. An `SizeType` can be passed but it will be simply ignored by `Writer`. + +You may doubt that, why not just using `sprintf()` or `std::stringstream` to build a JSON? + +There are various reasons: +1. `Writer` must output a well-formed JSON. If there is incorrect event sequence (e.g. `Int()` just after `StartObject()`), it generates assertion fail in debug mode. +2. `Writer::String()` can handle string escaping (e.g. converting code point `U+000A` to `\n`) and Unicode transcoding. +3. `Writer` handles number output consistently. +4. `Writer` implements the event handler concept. It can be used to handle events from `Reader`, `Document` or other event publisher. +5. `Writer` can be optimized for different platforms. + +Anyway, using `Writer` API is even simpler than generating a JSON by ad hoc methods. + +## Template {#WriterTemplate} + +`Writer` has a minor design difference to `Reader`. `Writer` is a template class, not a typedef. There is no `GenericWriter`. The following is the declaration. + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename Allocator = CrtAllocator<>, unsigned writeFlags = kWriteDefaultFlags> +class Writer { +public: + Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) +// ... +}; + +} // namespace rapidjson +~~~~~~~~~~ + +The `OutputStream` template parameter is the type of output stream. It cannot be deduced and must be specified by user. + +The `SourceEncoding` template parameter specifies the encoding to be used in `String(const Ch*, ...)`. + +The `TargetEncoding` template parameter specifies the encoding in the output stream. + +The `Allocator` is the type of allocator, which is used for allocating internal data structure (a stack). + +The `writeFlags` are combination of the following bit-flags: + +Parse flags | Meaning +------------------------------|----------------------------------- +`kWriteNoFlags` | No flag is set. +`kWriteDefaultFlags` | Default write flags. It is equal to macro `RAPIDJSON_WRITE_DEFAULT_FLAGS`, which is defined as `kWriteNoFlags`. +`kWriteValidateEncodingFlag` | Validate encoding of JSON strings. +`kWriteNanAndInfFlag` | Allow writing of `Infinity`, `-Infinity` and `NaN`. + +Besides, the constructor of `Writer` has a `levelDepth` parameter. This parameter affects the initial memory allocated for storing information per hierarchy level. + +## PrettyWriter {#PrettyWriter} + +While the output of `Writer` is the most condensed JSON without white-spaces, suitable for network transfer or storage, it is not easily readable by human. + +Therefore, RapidJSON provides a `PrettyWriter`, which adds indentation and line feeds in the output. + +The usage of `PrettyWriter` is exactly the same as `Writer`, expect that `PrettyWriter` provides a `SetIndent(Ch indentChar, unsigned indentCharCount)` function. The default is 4 spaces. + +## Completeness and Reset {#CompletenessReset} + +A `Writer` can only output a single JSON, which can be any JSON type at the root. Once the singular event for root (e.g. `String()`), or the last matching `EndObject()` or `EndArray()` event, is handled, the output JSON is well-formed and complete. User can detect this state by calling `Writer::IsComplete()`. + +When a JSON is complete, the `Writer` cannot accept any new events. Otherwise the output will be invalid (i.e. having more than one root). To reuse the `Writer` object, user can call `Writer::Reset(OutputStream& os)` to reset all internal states of the `Writer` with a new output stream. + +# Techniques {#SaxTechniques} + +## Parsing JSON to Custom Data Structure {#CustomDataStructure} + +`Document`'s parsing capability is completely based on `Reader`. Actually `Document` is a handler which receives events from a reader to build a DOM during parsing. + +User may uses `Reader` to build other data structures directly. This eliminates building of DOM, thus reducing memory and improving performance. + +In the following `messagereader` example, `ParseMessages()` parses a JSON which should be an object with key-string pairs. + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/error/en.h" +#include <iostream> +#include <string> +#include <map> + +using namespace std; +using namespace rapidjson; + +typedef map<string, string> MessageMap; + +struct MessageHandler + : public BaseReaderHandler<UTF8<>, MessageHandler> { + MessageHandler() : state_(kExpectObjectStart) { + } + + bool StartObject() { + switch (state_) { + case kExpectObjectStart: + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool String(const char* str, SizeType length, bool) { + switch (state_) { + case kExpectNameOrObjectEnd: + name_ = string(str, length); + state_ = kExpectValue; + return true; + case kExpectValue: + messages_.insert(MessageMap::value_type(name_, string(str, length))); + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; } + + bool Default() { return false; } // All other events are invalid. + + MessageMap messages_; + enum State { + kExpectObjectStart, + kExpectNameOrObjectEnd, + kExpectValue, + }state_; + std::string name_; +}; + +void ParseMessages(const char* json, MessageMap& messages) { + Reader reader; + MessageHandler handler; + StringStream ss(json); + if (reader.Parse(ss, handler)) + messages.swap(handler.messages_); // Only change it if success. + else { + ParseErrorCode e = reader.GetParseErrorCode(); + size_t o = reader.GetErrorOffset(); + cout << "Error: " << GetParseError_En(e) << endl;; + cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl; + } +} + +int main() { + MessageMap messages; + + const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }"; + cout << json1 << endl; + ParseMessages(json1, messages); + + for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr) + cout << itr->first << ": " << itr->second << endl; + + cout << endl << "Parse a JSON with invalid schema." << endl; + const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }"; + cout << json2 << endl; + ParseMessages(json2, messages); + + return 0; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{ "greeting" : "Hello!", "farewell" : "bye-bye!" } +farewell: bye-bye! +greeting: Hello! + +Parse a JSON with invalid schema. +{ "greeting" : "Hello!", "farewell" : "bye-bye!", "foo" : {} } +Error: Terminate parsing due to Handler error. + at offset 59 near '} }...' +~~~~~~~~~~ + +The first JSON (`json1`) was successfully parsed into `MessageMap`. Since `MessageMap` is a `std::map`, the printing order are sorted by the key. This order is different from the JSON's order. + +In the second JSON (`json2`), `foo`'s value is an empty object. As it is an object, `MessageHandler::StartObject()` will be called. However, at that moment `state_ = kExpectValue`, so that function returns `false` and cause the parsing process be terminated. The error code is `kParseErrorTermination`. + +## Filtering of JSON {#Filtering} + +As mentioned earlier, `Writer` can handle the events published by `Reader`. `condense` example simply set a `Writer` as handler of a `Reader`, so it can remove all white-spaces in JSON. `pretty` example uses the same relationship, but replacing `Writer` by `PrettyWriter`. So `pretty` can be used to reformat a JSON with indentation and line feed. + +Actually, we can add intermediate layer(s) to filter the contents of JSON via these SAX-style API. For example, `capitalize` example capitalize all strings in a JSON. + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" +#include <vector> +#include <cctype> + +using namespace rapidjson; + +template<typename OutputHandler> +struct CapitalizeFilter { + CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() { + } + + bool Null() { return out_.Null(); } + bool Bool(bool b) { return out_.Bool(b); } + bool Int(int i) { return out_.Int(i); } + bool Uint(unsigned u) { return out_.Uint(u); } + bool Int64(int64_t i) { return out_.Int64(i); } + bool Uint64(uint64_t u) { return out_.Uint64(u); } + bool Double(double d) { return out_.Double(d); } + bool RawNumber(const char* str, SizeType length, bool copy) { return out_.RawNumber(str, length, copy); } + bool String(const char* str, SizeType length, bool) { + buffer_.clear(); + for (SizeType i = 0; i < length; i++) + buffer_.push_back(std::toupper(str[i])); + return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string + } + bool StartObject() { return out_.StartObject(); } + bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); } + bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); } + bool StartArray() { return out_.StartArray(); } + bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); } + + OutputHandler& out_; + std::vector<char> buffer_; +}; + +int main(int, char*[]) { + // Prepare JSON reader and input stream. + Reader reader; + char readBuffer[65536]; + FileReadStream is(stdin, readBuffer, sizeof(readBuffer)); + + // Prepare JSON writer and output stream. + char writeBuffer[65536]; + FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer)); + Writer<FileWriteStream> writer(os); + + // JSON reader parse from the input stream and let writer generate the output. + CapitalizeFilter<Writer<FileWriteStream> > filter(writer); + if (!reader.Parse(is, filter)) { + fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode())); + return 1; + } + + return 0; +} +~~~~~~~~~~ + +Note that, it is incorrect to simply capitalize the JSON as a string. For example: +~~~~~~~~~~ +["Hello\nWorld"] +~~~~~~~~~~ + +Simply capitalizing the whole JSON would contain incorrect escape character: +~~~~~~~~~~ +["HELLO\NWORLD"] +~~~~~~~~~~ + +The correct result by `capitalize`: +~~~~~~~~~~ +["HELLO\nWORLD"] +~~~~~~~~~~ + +More complicated filters can be developed. However, since SAX-style API can only provide information about a single event at a time, user may need to book-keeping the contextual information (e.g. the path from root value, storage of other related values). Some processing may be easier to be implemented in DOM than SAX. diff --git a/src/s3select/rapidjson/doc/sax.zh-cn.md b/src/s3select/rapidjson/doc/sax.zh-cn.md new file mode 100644 index 000000000..9b11e7683 --- /dev/null +++ b/src/s3select/rapidjson/doc/sax.zh-cn.md @@ -0,0 +1,487 @@ +# SAX + +"SAX" 此术语源于 [Simple API for XML](http://en.wikipedia.org/wiki/Simple_API_for_XML)。我们借了此术语去套用在 JSON 的解析及生成。 + +在 RapidJSON 中,`Reader`(`GenericReader<...>` 的 typedef)是 JSON 的 SAX 风格解析器,而 `Writer`(`GenericWriter<...>` 的 typedef)则是 JSON 的 SAX 风格生成器。 + +[TOC] + +# Reader {#Reader} + +`Reader` 从输入流解析一个 JSON。当它从流中读取字符时,它会基于 JSON 的语法去分析字符,并向处理器发送事件。 + +例如,以下是一个 JSON。 + +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +当一个 `Reader` 解析此 JSON 时,它会顺序地向处理器发送以下的事件: + +~~~~~~~~~~ +StartObject() +Key("hello", 5, true) +String("world", 5, true) +Key("t", 1, true) +Bool(true) +Key("f", 1, true) +Bool(false) +Key("n", 1, true) +Null() +Key("i") +Uint(123) +Key("pi") +Double(3.1416) +Key("a") +StartArray() +Uint(1) +Uint(2) +Uint(3) +Uint(4) +EndArray(4) +EndObject(7) +~~~~~~~~~~ + +除了一些事件参数需要再作解释,这些事件可以轻松地与 JSON 对上。我们可以看看 `simplereader` 例子怎样产生和以上完全相同的结果: + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +struct MyHandler : public BaseReaderHandler<UTF8<>, MyHandler> { + bool Null() { cout << "Null()" << endl; return true; } + bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; } + bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; } + bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; } + bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; } + bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; } + bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; } + bool String(const char* str, SizeType length, bool copy) { + cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool StartObject() { cout << "StartObject()" << endl; return true; } + bool Key(const char* str, SizeType length, bool copy) { + cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; } + bool StartArray() { cout << "StartArray()" << endl; return true; } + bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; } +}; + +void main() { + const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } "; + + MyHandler handler; + Reader reader; + StringStream ss(json); + reader.Parse(ss, handler); +} +~~~~~~~~~~ + +注意 RapidJSON 使用模板去静态挷定 `Reader` 类型及处理器的类型,而不是使用含虚函数的类。这个范式可以通过把函数内联而改善性能。 + +## 处理器 {#Handler} + +如前例所示,使用者需要实现一个处理器(handler),用于处理来自 `Reader` 的事件(函数调用)。处理器必须包含以下的成员函数。 + +~~~~~~~~~~cpp +class Handler { + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +~~~~~~~~~~ + +当 `Reader` 遇到 JSON null 值时会调用 `Null()`。 + +当 `Reader` 遇到 JSON true 或 false 值时会调用 `Bool(bool)`。 + +当 `Reader` 遇到 JSON number,它会选择一个合适的 C++ 类型映射,然后调用 `Int(int)`、`Uint(unsigned)`、`Int64(int64_t)`、`Uint64(uint64_t)` 及 `Double(double)` 的 * 其中之一个 *。 若开启了 `kParseNumbersAsStrings` 选项,`Reader` 便会改为调用 `RawNumber()`。 + +当 `Reader` 遇到 JSON string,它会调用 `String(const char* str, SizeType length, bool copy)`。第一个参数是字符串的指针。第二个参数是字符串的长度(不包含空终止符号)。注意 RapidJSON 支持字串中含有空字符 `\0`。若出现这种情况,便会有 `strlen(str) < length`。最后的 `copy` 参数表示处理器是否需要复制该字符串。在正常解析时,`copy = true`。仅当使用原位解析时,`copy = false`。此外,还要注意字符的类型与目标编码相关,我们稍后会再谈这一点。 + +当 `Reader` 遇到 JSON object 的开始之时,它会调用 `StartObject()`。JSON 的 object 是一个键值对(成员)的集合。若 object 包含成员,它会先为成员的名字调用 `Key()`,然后再按值的类型调用函数。它不断调用这些键值对,直至最终调用 `EndObject(SizeType memberCount)`。注意 `memberCount` 参数对处理器来说只是协助性质,使用者可能不需要此参数。 + +JSON array 与 object 相似,但更简单。在 array 开始时,`Reader` 会调用 `BeginArary()`。若 array 含有元素,它会按元素的类型来读用函数。相似地,最后它会调用 `EndArray(SizeType elementCount)`,其中 `elementCount` 参数对处理器来说只是协助性质。 + +每个处理器函数都返回一个 `bool`。正常它们应返回 `true`。若处理器遇到错误,它可以返回 `false` 去通知事件发送方停止继续处理。 + +例如,当我们用 `Reader` 解析一个 JSON 时,处理器检测到该 JSON 并不符合所需的 schema,那么处理器可以返回 `false`,令 `Reader` 停止之后的解析工作。而 `Reader` 会进入一个错误状态,并以 `kParseErrorTermination` 错误码标识。 + +## GenericReader {#GenericReader} + +前面提及,`Reader` 是 `GenericReader` 模板类的 typedef: + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> > +class GenericReader { + // ... +}; + +typedef GenericReader<UTF8<>, UTF8<> > Reader; + +} // namespace rapidjson +~~~~~~~~~~ + +`Reader` 使用 UTF-8 作为来源及目标编码。来源编码是指 JSON 流的编码。目标编码是指 `String()` 的 `str` 参数所用的编码。例如,要解析一个 UTF-8 流并输出至 UTF-16 string 事件,你需要这么定义一个 reader: + +~~~~~~~~~~cpp +GenericReader<UTF8<>, UTF16<> > reader; +~~~~~~~~~~ + +注意到 `UTF16` 的缺省类型是 `wchar_t`。因此这个 `reader` 需要调用处理器的 `String(const wchar_t*, SizeType, bool)`。 + +第三个模板参数 `Allocator` 是内部数据结构(实际上是一个堆栈)的分配器类型。 + +## 解析 {#SaxParsing} + +`Reader` 的唯一功能就是解析 JSON。 + +~~~~~~~~~~cpp +template <unsigned parseFlags, typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); + +// 使用 parseFlags = kDefaultParseFlags +template <typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); +~~~~~~~~~~ + +若在解析中出现错误,它会返回 `false`。使用者可调用 `bool HasParseEror()`, `ParseErrorCode GetParseErrorCode()` 及 `size_t GetErrorOffset()` 获取错误状态。实际上 `Document` 使用这些 `Reader` 函数去获取解析错误。请参考 [DOM](doc/dom.zh-cn.md) 去了解有关解析错误的细节。 + +# Writer {#Writer} + +`Reader` 把 JSON 转换(解析)成为事件。`Writer` 做完全相反的事情。它把事件转换成 JSON。 + +`Writer` 是非常容易使用的。若你的应用程序只需把一些数据转换成 JSON,可能直接使用 `Writer`,会比建立一个 `Document` 然后用 `Writer` 把它转换成 JSON 更加方便。 + +在 `simplewriter` 例子里,我们做 `simplereader` 完全相反的事情。 + +~~~~~~~~~~cpp +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +void main() { + StringBuffer s; + Writer<StringBuffer> writer(s); + + writer.StartObject(); + writer.Key("hello"); + writer.String("world"); + writer.Key("t"); + writer.Bool(true); + writer.Key("f"); + writer.Bool(false); + writer.Key("n"); + writer.Null(); + writer.Key("i"); + writer.Uint(123); + writer.Key("pi"); + writer.Double(3.1416); + writer.Key("a"); + writer.StartArray(); + for (unsigned i = 0; i < 4; i++) + writer.Uint(i); + writer.EndArray(); + writer.EndObject(); + + cout << s.GetString() << endl; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{"hello":"world","t":true,"f":false,"n":null,"i":123,"pi":3.1416,"a":[0,1,2,3]} +~~~~~~~~~~ + +`String()` 及 `Key()` 各有两个重载。一个是如处理器 concept 般,有 3 个参数。它能处理含空字符的字符串。另一个是如上中使用的较简单版本。 + +注意到,例子代码中的 `EndArray()` 及 `EndObject()` 并没有参数。可以传递一个 `SizeType` 的参数,但它会被 `Writer` 忽略。 + +你可能会怀疑,为什么不使用 `sprintf()` 或 `std::stringstream` 去建立一个 JSON? + +这有几个原因: +1. `Writer` 必然会输出一个结构良好(well-formed)的 JSON。若然有错误的事件次序(如 `Int()` 紧随 `StartObject()` 出现),它会在调试模式中产生断言失败。 +2. `Writer::String()` 可处理字符串转义(如把码点 `U+000A` 转换成 `\n`)及进行 Unicode 转码。 +3. `Writer` 一致地处理 number 的输出。 +4. `Writer` 实现了事件处理器 concept。可用于处理来自 `Reader`、`Document` 或其他事件发生器。 +5. `Writer` 可对不同平台进行优化。 + +无论如何,使用 `Writer` API 去生成 JSON 甚至乎比这些临时方法更简单。 + +## 模板 {#WriterTemplate} + +`Writer` 与 `Reader` 有少许设计区别。`Writer` 是一个模板类,而不是一个 typedef。 并没有 `GenericWriter`。以下是 `Writer` 的声明。 + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename Allocator = CrtAllocator<> > +class Writer { +public: + Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) +// ... +}; + +} // namespace rapidjson +~~~~~~~~~~ + +`OutputStream` 模板参数是输出流的类型。它的类型不可以被自动推断,必须由使用者提供。 + +`SourceEncoding` 模板参数指定了 `String(const Ch*, ...)` 的编码。 + +`TargetEncoding` 模板参数指定输出流的编码。 + +`Allocator` 是分配器的类型,用于分配内部数据结构(一个堆栈)。 + +`writeFlags` 是以下位标志的组合: + +写入位标志 | 意义 +------------------------------|----------------------------------- +`kWriteNoFlags` | 没有任何标志。 +`kWriteDefaultFlags` | 缺省的解析选项。它等于 `RAPIDJSON_WRITE_DEFAULT_FLAGS` 宏,此宏定义为 `kWriteNoFlags`。 +`kWriteValidateEncodingFlag` | 校验 JSON 字符串的编码。 +`kWriteNanAndInfFlag` | 容许写入 `Infinity`, `-Infinity` 及 `NaN`。 + +此外,`Writer` 的构造函数有一 `levelDepth` 参数。存储每层阶信息的初始内存分配量受此参数影响。 + +## PrettyWriter {#PrettyWriter} + +`Writer` 所输出的是没有空格字符的最紧凑 JSON,适合网络传输或储存,但不适合人类阅读。 + +因此,RapidJSON 提供了一个 `PrettyWriter`,它在输出中加入缩进及换行。 + +`PrettyWriter` 的用法与 `Writer` 几乎一样,不同之处是 `PrettyWriter` 提供了一个 `SetIndent(Ch indentChar, unsigned indentCharCount)` 函数。缺省的缩进是 4 个空格。 + +## 完整性及重置 {#CompletenessReset} + +一个 `Writer` 只可输出单个 JSON,其根节点可以是任何 JSON 类型。当处理完单个根节点事件(如 `String()`),或匹配的最后 `EndObject()` 或 `EndArray()` 事件,输出的 JSON 是结构完整(well-formed)及完整的。使用者可调用 `Writer::IsComplete()` 去检测完整性。 + +当 JSON 完整时,`Writer` 不能再接受新的事件。不然其输出便会是不合法的(例如有超过一个根节点)。为了重新利用 `Writer` 对象,使用者可调用 `Writer::Reset(OutputStream& os)` 去重置其所有内部状态及设置新的输出流。 + +# 技巧 {#SaxTechniques} + +## 解析 JSON 至自定义结构 {#CustomDataStructure} + +`Document` 的解析功能完全依靠 `Reader`。实际上 `Document` 是一个处理器,在解析 JSON 时接收事件去建立一个 DOM。 + +使用者可以直接使用 `Reader` 去建立其他数据结构。这消除了建立 DOM 的步骤,从而减少了内存开销并改善性能。 + +在以下的 `messagereader` 例子中,`ParseMessages()` 解析一个 JSON,该 JSON 应该是一个含键值对的 object。 + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/error/en.h" +#include <iostream> +#include <string> +#include <map> + +using namespace std; +using namespace rapidjson; + +typedef map<string, string> MessageMap; + +struct MessageHandler + : public BaseReaderHandler<UTF8<>, MessageHandler> { + MessageHandler() : state_(kExpectObjectStart) { + } + + bool StartObject() { + switch (state_) { + case kExpectObjectStart: + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool String(const char* str, SizeType length, bool) { + switch (state_) { + case kExpectNameOrObjectEnd: + name_ = string(str, length); + state_ = kExpectValue; + return true; + case kExpectValue: + messages_.insert(MessageMap::value_type(name_, string(str, length))); + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; } + + bool Default() { return false; } // All other events are invalid. + + MessageMap messages_; + enum State { + kExpectObjectStart, + kExpectNameOrObjectEnd, + kExpectValue, + }state_; + std::string name_; +}; + +void ParseMessages(const char* json, MessageMap& messages) { + Reader reader; + MessageHandler handler; + StringStream ss(json); + if (reader.Parse(ss, handler)) + messages.swap(handler.messages_); // Only change it if success. + else { + ParseErrorCode e = reader.GetParseErrorCode(); + size_t o = reader.GetErrorOffset(); + cout << "Error: " << GetParseError_En(e) << endl;; + cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl; + } +} + +int main() { + MessageMap messages; + + const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }"; + cout << json1 << endl; + ParseMessages(json1, messages); + + for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr) + cout << itr->first << ": " << itr->second << endl; + + cout << endl << "Parse a JSON with invalid schema." << endl; + const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }"; + cout << json2 << endl; + ParseMessages(json2, messages); + + return 0; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{ "greeting" : "Hello!", "farewell" : "bye-bye!" } +farewell: bye-bye! +greeting: Hello! + +Parse a JSON with invalid schema. +{ "greeting" : "Hello!", "farewell" : "bye-bye!", "foo" : {} } +Error: Terminate parsing due to Handler error. + at offset 59 near '} }...' +~~~~~~~~~~ + +第一个 JSON(`json1`)被成功地解析至 `MessageMap`。由于 `MessageMap` 是一个 `std::map`,打印次序按键值排序。此次序与 JSON 中的次序不同。 + +在第二个 JSON(`json2`)中,`foo` 的值是一个空 object。由于它是一个 object,`MessageHandler::StartObject()` 会被调用。然而,在 `state_ = kExpectValue` 的情况下,该函数会返回 `false`,并导致解析过程终止。错误代码是 `kParseErrorTermination`。 + +## 过滤 JSON {#Filtering} + +如前面提及过,`Writer` 可处理 `Reader` 发出的事件。`example/condense/condense.cpp` 例子简单地设置 `Writer` 作为一个 `Reader` 的处理器,因此它能移除 JSON 中的所有空白字符。`example/pretty/pretty.cpp` 例子使用同样的关系,只是以 `PrettyWriter` 取代 `Writer`。因此 `pretty` 能够重新格式化 JSON,加入缩进及换行。 + +实际上,我们可以使用 SAX 风格 API 去加入(多个)中间层去过滤 JSON 的内容。例如 `capitalize` 例子可以把所有 JSON string 改为大写。 + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" +#include <vector> +#include <cctype> + +using namespace rapidjson; + +template<typename OutputHandler> +struct CapitalizeFilter { + CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() { + } + + bool Null() { return out_.Null(); } + bool Bool(bool b) { return out_.Bool(b); } + bool Int(int i) { return out_.Int(i); } + bool Uint(unsigned u) { return out_.Uint(u); } + bool Int64(int64_t i) { return out_.Int64(i); } + bool Uint64(uint64_t u) { return out_.Uint64(u); } + bool Double(double d) { return out_.Double(d); } + bool RawNumber(const char* str, SizeType length, bool copy) { return out_.RawNumber(str, length, copy); } + bool String(const char* str, SizeType length, bool) { + buffer_.clear(); + for (SizeType i = 0; i < length; i++) + buffer_.push_back(std::toupper(str[i])); + return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string + } + bool StartObject() { return out_.StartObject(); } + bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); } + bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); } + bool StartArray() { return out_.StartArray(); } + bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); } + + OutputHandler& out_; + std::vector<char> buffer_; +}; + +int main(int, char*[]) { + // Prepare JSON reader and input stream. + Reader reader; + char readBuffer[65536]; + FileReadStream is(stdin, readBuffer, sizeof(readBuffer)); + + // Prepare JSON writer and output stream. + char writeBuffer[65536]; + FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer)); + Writer<FileWriteStream> writer(os); + + // JSON reader parse from the input stream and let writer generate the output. + CapitalizeFilter<Writer<FileWriteStream> > filter(writer); + if (!reader.Parse(is, filter)) { + fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode())); + return 1; + } + + return 0; +} +~~~~~~~~~~ + +注意到,不可简单地把 JSON 当作字符串去改为大写。例如: +~~~~~~~~~~ +["Hello\nWorld"] +~~~~~~~~~~ + +简单地把整个 JSON 转为大写的话会产生错误的转义符: +~~~~~~~~~~ +["HELLO\NWORLD"] +~~~~~~~~~~ + +而 `capitalize` 就会产生正确的结果: +~~~~~~~~~~ +["HELLO\nWORLD"] +~~~~~~~~~~ + +我们还可以开发更复杂的过滤器。然而,由于 SAX 风格 API 在某一时间点只能提供单一事件的信息,使用者需要自行记录一些上下文信息(例如从根节点起的路径、储存其他相关值)。对于处理某些情况,用 DOM 会比 SAX 更容易实现。 + diff --git a/src/s3select/rapidjson/doc/schema.md b/src/s3select/rapidjson/doc/schema.md new file mode 100644 index 000000000..238d7a56a --- /dev/null +++ b/src/s3select/rapidjson/doc/schema.md @@ -0,0 +1,505 @@ +# Schema + +(This feature was released in v1.1.0) + +JSON Schema is a draft standard for describing the format of JSON data. The schema itself is also JSON data. By validating a JSON structure with JSON Schema, your code can safely access the DOM without manually checking types, or whether a key exists, etc. It can also ensure that the serialized JSON conform to a specified schema. + +RapidJSON implemented a JSON Schema validator for [JSON Schema Draft v4](http://json-schema.org/documentation.html). If you are not familiar with JSON Schema, you may refer to [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/). + +[TOC] + +# Basic Usage {#Basic} + +First of all, you need to parse a JSON Schema into `Document`, and then compile the `Document` into a `SchemaDocument`. + +Secondly, construct a `SchemaValidator` with the `SchemaDocument`. It is similar to a `Writer` in the sense of handling SAX events. So, you can use `document.Accept(validator)` to validate a document, and then check the validity. + +~~~cpp +#include "rapidjson/schema.h" + +// ... + +Document sd; +if (sd.Parse(schemaJson).HasParseError()) { + // the schema is not a valid JSON. + // ... +} +SchemaDocument schema(sd); // Compile a Document to SchemaDocument +// sd is no longer needed here. + +Document d; +if (d.Parse(inputJson).HasParseError()) { + // the input is not a valid JSON. + // ... +} + +SchemaValidator validator(schema); +if (!d.Accept(validator)) { + // Input JSON is invalid according to the schema + // Output diagnostic information + StringBuffer sb; + validator.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword()); + sb.Clear(); + validator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); +} +~~~ + +Some notes: + +* One `SchemaDocument` can be referenced by multiple `SchemaValidator`s. It will not be modified by `SchemaValidator`s. +* A `SchemaValidator` may be reused to validate multiple documents. To run it for other documents, call `validator.Reset()` first. + +# Validation during parsing/serialization {#Fused} + +Unlike most JSON Schema validator implementations, RapidJSON provides a SAX-based schema validator. Therefore, you can parse a JSON from a stream while validating it on the fly. If the validator encounters a JSON value that invalidates the supplied schema, the parsing will be terminated immediately. This design is especially useful for parsing large JSON files. + +## DOM parsing {#DOM} + +For using DOM in parsing, `Document` needs some preparation and finalizing tasks, in addition to receiving SAX events, thus it needs some work to route the reader, validator and the document. `SchemaValidatingReader` is a helper class that doing such work. + +~~~cpp +#include "rapidjson/filereadstream.h" + +// ... +SchemaDocument schema(sd); // Compile a Document to SchemaDocument + +// Use reader to parse the JSON +FILE* fp = fopen("big.json", "r"); +FileReadStream is(fp, buffer, sizeof(buffer)); + +// Parse JSON from reader, validate the SAX events, and store in d. +Document d; +SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema); +d.Populate(reader); + +if (!reader.GetParseResult()) { + // Not a valid JSON + // When reader.GetParseResult().Code() == kParseErrorTermination, + // it may be terminated by: + // (1) the validator found that the JSON is invalid according to schema; or + // (2) the input stream has I/O error. + + // Check the validation result + if (!reader.IsValid()) { + // Input JSON is invalid according to the schema + // Output diagnostic information + StringBuffer sb; + reader.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword()); + sb.Clear(); + reader.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); + } +} +~~~ + +## SAX parsing {#SAX} + +For using SAX in parsing, it is much simpler. If it only need to validate the JSON without further processing, it is simply: + +~~~ +SchemaValidator validator(schema); +Reader reader; +if (!reader.Parse(stream, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +This is exactly the method used in the [schemavalidator](example/schemavalidator/schemavalidator.cpp) example. The distinct advantage is low memory usage, no matter how big the JSON was (the memory usage depends on the complexity of the schema). + +If you need to handle the SAX events further, then you need to use the template class `GenericSchemaValidator` to set the output handler of the validator: + +~~~ +MyHandler handler; +GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler); +Reader reader; +if (!reader.Parse(ss, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +## Serialization {#Serialization} + +It is also possible to do validation during serializing. This can ensure the result JSON is valid according to the JSON schema. + +~~~ +StringBuffer sb; +Writer<StringBuffer> writer(sb); +GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer); +if (!d.Accept(validator)) { + // Some problem during Accept(), it may be validation or encoding issues. + if (!validator.IsValid()) { + // ... + } +} +~~~ + +Of course, if your application only needs SAX-style serialization, it can simply send SAX events to `SchemaValidator` instead of `Writer`. + +# Remote Schema {#Remote} + +JSON Schema supports [`$ref` keyword](http://spacetelescope.github.io/understanding-json-schema/structuring.html), which is a [JSON pointer](doc/pointer.md) referencing to a local or remote schema. Local pointer is prefixed with `#`, while remote pointer is an relative or absolute URI. For example: + +~~~js +{ "$ref": "definitions.json#/address" } +~~~ + +As `SchemaDocument` does not know how to resolve such URI, it needs a user-provided `IRemoteSchemaDocumentProvider` instance to do so. + +~~~ +class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider { +public: + virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeType length) { + // Resolve the uri and returns a pointer to that schema. + } +}; + +// ... + +MyRemoteSchemaDocumentProvider provider; +SchemaDocument schema(sd, &provider); +~~~ + +# Conformance {#Conformance} + +RapidJSON passed 262 out of 263 tests in [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4). + +The failed test is "changed scope ref invalid" of "change resolution scope" in `refRemote.json`. It is due to that `id` schema keyword and URI combining function are not implemented. + +Besides, the `format` schema keyword for string values is ignored, since it is not required by the specification. + +## Regular Expression {#Regex} + +The schema keyword `pattern` and `patternProperties` uses regular expression to match the required pattern. + +RapidJSON implemented a simple NFA regular expression engine, which is used by default. It supports the following syntax. + +|Syntax|Description| +|------|-----------| +|`ab` | Concatenation | +|<code>a|b</code> | Alternation | +|`a?` | Zero or one | +|`a*` | Zero or more | +|`a+` | One or more | +|`a{3}` | Exactly 3 times | +|`a{3,}` | At least 3 times | +|`a{3,5}`| 3 to 5 times | +|`(ab)` | Grouping | +|`^a` | At the beginning | +|`a$` | At the end | +|`.` | Any character | +|`[abc]` | Character classes | +|`[a-c]` | Character class range | +|`[a-z0-9_]` | Character class combination | +|`[^abc]` | Negated character classes | +|`[^a-c]` | Negated character class range | +|`[\b]` | Backspace (U+0008) | +|<code>\\|</code>, `\\`, ... | Escape characters | +|`\f` | Form feed (U+000C) | +|`\n` | Line feed (U+000A) | +|`\r` | Carriage return (U+000D) | +|`\t` | Tab (U+0009) | +|`\v` | Vertical tab (U+000B) | + +For C++11 compiler, it is also possible to use the `std::regex` by defining `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` and `RAPIDJSON_SCHEMA_USE_STDREGEX=1`. If your schemas do not need `pattern` and `patternProperties`, you can set both macros to zero to disable this feature, which will reduce some code size. + +# Performance {#Performance} + +Most C++ JSON libraries do not yet support JSON Schema. So we tried to evaluate the performance of RapidJSON's JSON Schema validator according to [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark), which tests 11 JavaScript libraries running on Node.js. + +That benchmark runs validations on [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite), in which some test suites and tests are excluded. We made the same benchmarking procedure in [`schematest.cpp`](test/perftest/schematest.cpp). + +On a Mac Book Pro (2.8 GHz Intel Core i7), the following results are collected. + +|Validator|Relative speed|Number of test runs per second| +|---------|:------------:|:----------------------------:| +|RapidJSON|155%|30682| +|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)| +|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)| +|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)| +|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)| +|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)| +|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)| +|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)| +|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)| +|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)| +|tv4|0.5%|93 (± 0.94%)| +|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)| + +That is, RapidJSON is about 1.5x faster than the fastest JavaScript library (ajv). And 1400x faster than the slowest one. + +# Schema violation reporting {#Reporting} + +(Unreleased as of 2017-09-20) + +When validating an instance against a JSON Schema, +it is often desirable to report not only whether the instance is valid, +but also the ways in which it violates the schema. + +The `SchemaValidator` class +collects errors encountered during validation +into a JSON `Value`. +This error object can then be accessed as `validator.GetError()`. + +The structure of the error object is subject to change +in future versions of RapidJSON, +as there is no standard schema for violations. +The details below this point are provisional only. + +## General provisions {#ReportingGeneral} + +Validation of an instance value against a schema +produces an error value. +The error value is always an object. +An empty object `{}` indicates the instance is valid. + +* The name of each member + corresponds to the JSON Schema keyword that is violated. +* The value is either an object describing a single violation, + or an array of such objects. + +Each violation object contains two string-valued members +named `instanceRef` and `schemaRef`. +`instanceRef` contains the URI fragment serialization +of a JSON Pointer to the instance subobject +in which the violation was detected. +`schemaRef` contains the URI of the schema +and the fragment serialization of a JSON Pointer +to the subschema that was violated. + +Individual violation objects can contain other keyword-specific members. +These are detailed further. + +For example, validating this instance: + +~~~json +{"numbers": [1, 2, "3", 4, 5]} +~~~ + +against this schema: + +~~~json +{ + "type": "object", + "properties": { + "numbers": {"$ref": "numbers.schema.json"} + } +} +~~~ + +where `numbers.schema.json` refers +(via a suitable `IRemoteSchemaDocumentProvider`) +to this schema: + +~~~json +{ + "type": "array", + "items": {"type": "number"} +} +~~~ + +produces the following error object: + +~~~json +{ + "type": { + "instanceRef": "#/numbers/2", + "schemaRef": "numbers.schema.json#/items", + "expected": ["number"], + "actual": "string" + } +} +~~~ + +## Validation keywords for numbers {#Numbers} + +### multipleOf {#multipleof} + +* `expected`: required number strictly greater than 0. + The value of the `multipleOf` keyword specified in the schema. +* `actual`: required number. + The instance value. + +### maximum {#maximum} + +* `expected`: required number. + The value of the `maximum` keyword specified in the schema. +* `exclusiveMaximum`: optional boolean. + This will be true if the schema specified `"exclusiveMaximum": true`, + and will be omitted otherwise. +* `actual`: required number. + The instance value. + +### minimum {#minimum} + +* `expected`: required number. + The value of the `minimum` keyword specified in the schema. +* `exclusiveMinimum`: optional boolean. + This will be true if the schema specified `"exclusiveMinimum": true`, + and will be omitted otherwise. +* `actual`: required number. + The instance value. + +## Validation keywords for strings {#Strings} + +### maxLength {#maxLength} + +* `expected`: required number greater than or equal to 0. + The value of the `maxLength` keyword specified in the schema. +* `actual`: required string. + The instance value. + +### minLength {#minLength} + +* `expected`: required number greater than or equal to 0. + The value of the `minLength` keyword specified in the schema. +* `actual`: required string. + The instance value. + +### pattern {#pattern} + +* `actual`: required string. + The instance value. + +(The expected pattern is not reported +because the internal representation in `SchemaDocument` +does not store the pattern in original string form.) + +## Validation keywords for arrays {#Arrays} + +### additionalItems {#additionalItems} + +This keyword is reported +when the value of `items` schema keyword is an array, +the value of `additionalItems` is `false`, +and the instance is an array +with more items than specified in the `items` array. + +* `disallowed`: required integer greater than or equal to 0. + The index of the first item that has no corresponding schema. + +### maxItems and minItems {#maxItems-minItems} + +* `expected`: required integer greater than or equal to 0. + The value of `maxItems` (respectively, `minItems`) + specified in the schema. +* `actual`: required integer greater than or equal to 0. + Number of items in the instance array. + +### uniqueItems {#uniqueItems} + +* `duplicates`: required array + whose items are integers greater than or equal to 0. + Indices of items of the instance that are equal. + +(RapidJSON only reports the first two equal items, +for performance reasons.) + +## Validation keywords for objects + +### maxProperties and minProperties {#maxProperties-minProperties} + +* `expected`: required integer greater than or equal to 0. + The value of `maxProperties` (respectively, `minProperties`) + specified in the schema. +* `actual`: required integer greater than or equal to 0. + Number of properties in the instance object. + +### required {#required} + +* `missing`: required array of one or more unique strings. + The names of properties + that are listed in the value of the `required` schema keyword + but not present in the instance object. + +### additionalProperties {#additionalProperties} + +This keyword is reported +when the schema specifies `additionalProperties: false` +and the name of a property of the instance is +neither listed in the `properties` keyword +nor matches any regular expression in the `patternProperties` keyword. + +* `disallowed`: required string. + Name of the offending property of the instance. + +(For performance reasons, +RapidJSON only reports the first such property encountered.) + +### dependencies {#dependencies} + +* `errors`: required object with one or more properties. + Names and values of its properties are described below. + +Recall that JSON Schema Draft 04 supports +*schema dependencies*, +where presence of a named *controlling* property +requires the instance object to be valid against a subschema, +and *property dependencies*, +where presence of a controlling property +requires other *dependent* properties to be also present. + +For a violated schema dependency, +`errors` will contain a property +with the name of the controlling property +and its value will be the error object +produced by validating the instance object +against the dependent schema. + +For a violated property dependency, +`errors` will contain a property +with the name of the controlling property +and its value will be an array of one or more unique strings +listing the missing dependent properties. + +## Validation keywords for any instance type {#AnyTypes} + +### enum {#enum} + +This keyword has no additional properties +beyond `instanceRef` and `schemaRef`. + +* The allowed values are not listed + because `SchemaDocument` does not store them in original form. +* The violating value is not reported + because it might be unwieldy. + +If you need to report these details to your users, +you can access the necessary information +by following `instanceRef` and `schemaRef`. + +### type {#type} + +* `expected`: required array of one or more unique strings, + each of which is one of the seven primitive types + defined by the JSON Schema Draft 04 Core specification. + Lists the types allowed by the `type` schema keyword. +* `actual`: required string, also one of seven primitive types. + The primitive type of the instance. + +### allOf, anyOf, and oneOf {#allOf-anyOf-oneOf} + +* `errors`: required array of at least one object. + There will be as many items as there are subschemas + in the `allOf`, `anyOf` or `oneOf` schema keyword, respectively. + Each item will be the error value + produced by validating the instance + against the corresponding subschema. + +For `allOf`, at least one error value will be non-empty. +For `anyOf`, all error values will be non-empty. +For `oneOf`, either all error values will be non-empty, +or more than one will be empty. + +### not {#not} + +This keyword has no additional properties +apart from `instanceRef` and `schemaRef`. diff --git a/src/s3select/rapidjson/doc/schema.zh-cn.md b/src/s3select/rapidjson/doc/schema.zh-cn.md new file mode 100644 index 000000000..c85177f9f --- /dev/null +++ b/src/s3select/rapidjson/doc/schema.zh-cn.md @@ -0,0 +1,237 @@ +# Schema + +(本功能于 v1.1.0 发布) + +JSON Schema 是描述 JSON 格式的一个标准草案。一个 schema 本身也是一个 JSON。使用 JSON Schema 去校验 JSON,可以让你的代码安全地访问 DOM,而无须检查类型或键值是否存在等。这也能确保输出的 JSON 是符合指定的 schema。 + +RapidJSON 实现了一个 [JSON Schema Draft v4](http://json-schema.org/documentation.html) 的校验器。若你不熟悉 JSON Schema,可以参考 [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/)。 + +[TOC] + +# 基本用法 {#BasicUsage} + +首先,你要把 JSON Schema 解析成 `Document`,再把它编译成一个 `SchemaDocument`。 + +然后,利用该 `SchemaDocument` 创建一个 `SchemaValidator`。它与 `Writer` 相似,都是能够处理 SAX 事件的。因此,你可以用 `document.Accept(validator)` 去校验一个 JSON,然后再获取校验结果。 + +~~~cpp +#include "rapidjson/schema.h" + +// ... + +Document sd; +if (sd.Parse(schemaJson).HasParseError()) { + // 此 schema 不是合法的 JSON + // ... +} +SchemaDocument schema(sd); // 把一个 Document 编译至 SchemaDocument +// 之后不再需要 sd + +Document d; +if (d.Parse(inputJson).HasParseError()) { + // 输入不是一个合法的 JSON + // ... +} + +SchemaValidator validator(schema); +if (!d.Accept(validator)) { + // 输入的 JSON 不合乎 schema + // 打印诊断信息 + StringBuffer sb; + validator.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword()); + sb.Clear(); + validator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); +} +~~~ + +一些注意点: + +* 一个 `SchemaDocment` 能被多个 `SchemaValidator` 引用。它不会被 `SchemaValidator` 修改。 +* 可以重复使用一个 `SchemaValidator` 来校验多个文件。在校验其他文件前,须先调用 `validator.Reset()`。 + +# 在解析/生成时进行校验 {#ParsingSerialization} + +与大部分 JSON Schema 校验器有所不同,RapidJSON 提供了一个基于 SAX 的 schema 校验器实现。因此,你可以在输入流解析 JSON 的同时进行校验。若校验器遇到一个与 schema 不符的值,就会立即终止解析。这设计对于解析大型 JSON 文件时特别有用。 + +## DOM 解析 {#DomParsing} + +在使用 DOM 进行解析时,`Document` 除了接收 SAX 事件外,还需做一些准备及结束工作,因此,为了连接 `Reader`、`SchemaValidator` 和 `Document` 要做多一点事情。`SchemaValidatingReader` 是一个辅助类去做那些工作。 + +~~~cpp +#include "rapidjson/filereadstream.h" + +// ... +SchemaDocument schema(sd); // 把一个 Document 编译至 SchemaDocument + +// 使用 reader 解析 JSON +FILE* fp = fopen("big.json", "r"); +FileReadStream is(fp, buffer, sizeof(buffer)); + +// 用 reader 解析 JSON,校验它的 SAX 事件,并存储至 d +Document d; +SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema); +d.Populate(reader); + +if (!reader.GetParseResult()) { + // 不是一个合法的 JSON + // 当 reader.GetParseResult().Code() == kParseErrorTermination, + // 它可能是被以下原因中止: + // (1) 校验器发现 JSON 不合乎 schema;或 + // (2) 输入流有 I/O 错误。 + + // 检查校验结果 + if (!reader.IsValid()) { + // 输入的 JSON 不合乎 schema + // 打印诊断信息 + StringBuffer sb; + reader.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword()); + sb.Clear(); + reader.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); + } +} +~~~ + +## SAX 解析 {#SaxParsing} + +使用 SAX 解析时,情况就简单得多。若只需要校验 JSON 而无需进一步处理,那么仅需要: + +~~~ +SchemaValidator validator(schema); +Reader reader; +if (!reader.Parse(stream, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +这种方式和 [schemavalidator](example/schemavalidator/schemavalidator.cpp) 例子完全相同。这带来的独特优势是,无论 JSON 多巨大,永远维持低内存用量(内存用量只与 Schema 的复杂度相关)。 + +若你需要进一步处理 SAX 事件,便可使用模板类 `GenericSchemaValidator` 去设置校验器的输出 `Handler`: + +~~~ +MyHandler handler; +GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler); +Reader reader; +if (!reader.Parse(ss, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +## 生成 {#Serialization} + +我们也可以在生成(serialization)的时候进行校验。这能确保输出的 JSON 符合一个 JSON Schema。 + +~~~ +StringBuffer sb; +Writer<StringBuffer> writer(sb); +GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer); +if (!d.Accept(validator)) { + // Some problem during Accept(), it may be validation or encoding issues. + if (!validator.IsValid()) { + // ... + } +} +~~~ + +当然,如果你的应用仅需要 SAX 风格的生成,那么只需要把 SAX 事件由原来发送到 `Writer`,改为发送到 `SchemaValidator`。 + +# 远程 Schema {#RemoteSchema} + +JSON Schema 支持 [`$ref` 关键字](http://spacetelescope.github.io/understanding-json-schema/structuring.html),它是一个 [JSON pointer](doc/pointer.zh-cn.md) 引用至一个本地(local)或远程(remote) schema。本地指针的首字符是 `#`,而远程指针是一个相对或绝对 URI。例如: + +~~~js +{ "$ref": "definitions.json#/address" } +~~~ + +由于 `SchemaDocument` 并不知道如何处理那些 URI,它需要使用者提供一个 `IRemoteSchemaDocumentProvider` 的实例去处理。 + +~~~ +class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider { +public: + virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeType length) { + // Resolve the uri and returns a pointer to that schema. + } +}; + +// ... + +MyRemoteSchemaDocumentProvider provider; +SchemaDocument schema(sd, &provider); +~~~ + +# 标准的符合程度 {#Conformance} + +RapidJSON 通过了 [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4) 中 263 个测试的 262 个。 + +没通过的测试是 `refRemote.json` 中的 "change resolution scope" - "changed scope ref invalid"。这是由于未实现 `id` schema 关键字及 URI 合并功能。 + +除此以外,关于字符串类型的 `format` schema 关键字也会被忽略,因为标准中并没需求必须实现。 + +## 正则表达式 {#RegEx} + +`pattern` 及 `patternProperties` 这两个 schema 关键字使用了正则表达式去匹配所需的模式。 + +RapidJSON 实现了一个简单的 NFA 正则表达式引擎,并预设使用。它支持以下语法。 + +|语法|描述| +|------|-----------| +|`ab` | 串联 | +|<code>a|b</code> | 交替 | +|`a?` | 零或一次 | +|`a*` | 零或多次 | +|`a+` | 一或多次 | +|`a{3}` | 刚好 3 次 | +|`a{3,}` | 至少 3 次 | +|`a{3,5}`| 3 至 5 次 | +|`(ab)` | 分组 | +|`^a` | 在开始处 | +|`a$` | 在结束处 | +|`.` | 任何字符 | +|`[abc]` | 字符组 | +|`[a-c]` | 字符组范围 | +|`[a-z0-9_]` | 字符组组合 | +|`[^abc]` | 字符组取反 | +|`[^a-c]` | 字符组范围取反 | +|`[\b]` | 退格符 (U+0008) | +|<code>\\|</code>, `\\`, ... | 转义字符 | +|`\f` | 馈页 (U+000C) | +|`\n` | 馈行 (U+000A) | +|`\r` | 回车 (U+000D) | +|`\t` | 制表 (U+0009) | +|`\v` | 垂直制表 (U+000B) | + +对于使用 C++11 编译器的使用者,也可使用 `std::regex`,只需定义 `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` 及 `RAPIDJSON_SCHEMA_USE_STDREGEX=1`。若你的 schema 无需使用 `pattern` 或 `patternProperties`,可以把两个宏都设为零,以禁用此功能,这样做可节省一些代码体积。 + +# 性能 {#Performance} + +大部分 C++ JSON 库都未支持 JSON Schema。因此我们尝试按照 [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark) 去评估 RapidJSON 的 JSON Schema 校验器。该评测测试了 11 个运行在 node.js 上的 JavaScript 库。 + +该评测校验 [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) 中的测试,当中排除了一些测试套件及个别测试。我们在 [`schematest.cpp`](test/perftest/schematest.cpp) 实现了相同的评测。 + +在 MacBook Pro (2.8 GHz Intel Core i7) 上收集到以下结果。 + +|校验器|相对速度|每秒执行的测试数目| +|---------|:------------:|:----------------------------:| +|RapidJSON|155%|30682| +|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)| +|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)| +|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)| +|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)| +|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)| +|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)| +|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)| +|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)| +|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)| +|tv4|0.5%|93 (± 0.94%)| +|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)| + +换言之,RapidJSON 比最快的 JavaScript 库(ajv)快约 1.5x。比最慢的快 1400x。 diff --git a/src/s3select/rapidjson/doc/stream.md b/src/s3select/rapidjson/doc/stream.md new file mode 100644 index 000000000..5d0b0f35e --- /dev/null +++ b/src/s3select/rapidjson/doc/stream.md @@ -0,0 +1,429 @@ +# Stream + +In RapidJSON, `rapidjson::Stream` is a concept for reading/writing JSON. Here we'll first show you how to use provided streams. And then see how to create a custom stream. + +[TOC] + +# Memory Streams {#MemoryStreams} + +Memory streams store JSON in memory. + +## StringStream (Input) {#StringStream} + +`StringStream` is the most basic input stream. It represents a complete, read-only JSON stored in memory. It is defined in `rapidjson/rapidjson.h`. + +~~~~~~~~~~cpp +#include "rapidjson/document.h" // will include "rapidjson/rapidjson.h" + +using namespace rapidjson; + +// ... +const char json[] = "[1, 2, 3, 4]"; +StringStream s(json); + +Document d; +d.ParseStream(s); +~~~~~~~~~~ + +Since this is very common usage, `Document::Parse(const char*)` is provided to do exactly the same as above: + +~~~~~~~~~~cpp +// ... +const char json[] = "[1, 2, 3, 4]"; +Document d; +d.Parse(json); +~~~~~~~~~~ + +Note that, `StringStream` is a typedef of `GenericStringStream<UTF8<> >`, user may use another encodings to represent the character set of the stream. + +## StringBuffer (Output) {#StringBuffer} + +`StringBuffer` is a simple output stream. It allocates a memory buffer for writing the whole JSON. Use `GetString()` to obtain the buffer. + +~~~~~~~~~~cpp +#include "rapidjson/stringbuffer.h" +#include <rapidjson/writer.h> + +StringBuffer buffer; +Writer<StringBuffer> writer(buffer); +d.Accept(writer); + +const char* output = buffer.GetString(); +~~~~~~~~~~ + +When the buffer is full, it will increases the capacity automatically. The default capacity is 256 characters (256 bytes for UTF8, 512 bytes for UTF16, etc.). User can provide an allocator and an initial capacity. + +~~~~~~~~~~cpp +StringBuffer buffer1(0, 1024); // Use its allocator, initial size = 1024 +StringBuffer buffer2(allocator, 1024); +~~~~~~~~~~ + +By default, `StringBuffer` will instantiate an internal allocator. + +Similarly, `StringBuffer` is a typedef of `GenericStringBuffer<UTF8<> >`. + +# File Streams {#FileStreams} + +When parsing a JSON from file, you may read the whole JSON into memory and use ``StringStream`` above. + +However, if the JSON is big, or memory is limited, you can use `FileReadStream`. It only read a part of JSON from file into buffer, and then let the part be parsed. If it runs out of characters in the buffer, it will read the next part from file. + +## FileReadStream (Input) {#FileReadStream} + +`FileReadStream` reads the file via a `FILE` pointer. And user need to provide a buffer. + +~~~~~~~~~~cpp +#include "rapidjson/filereadstream.h" +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("big.json", "rb"); // non-Windows use "r" + +char readBuffer[65536]; +FileReadStream is(fp, readBuffer, sizeof(readBuffer)); + +Document d; +d.ParseStream(is); + +fclose(fp); +~~~~~~~~~~ + +Different from string streams, `FileReadStream` is byte stream. It does not handle encodings. If the file is not UTF-8, the byte stream can be wrapped in a `EncodedInputStream`. We will discuss more about this later in this tutorial. + +Apart from reading file, user can also use `FileReadStream` to read `stdin`. + +## FileWriteStream (Output) {#FileWriteStream} + +`FileWriteStream` is buffered output stream. Its usage is very similar to `FileReadStream`. + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" +#include <rapidjson/writer.h> +#include <cstdio> + +using namespace rapidjson; + +Document d; +d.Parse(json); +// ... + +FILE* fp = fopen("output.json", "wb"); // non-Windows use "w" + +char writeBuffer[65536]; +FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer)); + +Writer<FileWriteStream> writer(os); +d.Accept(writer); + +fclose(fp); +~~~~~~~~~~ + +It can also redirect the output to `stdout`. + +# iostream Wrapper {#iostreamWrapper} + +Due to users' requests, RapidJSON also provides official wrappers for `std::basic_istream` and `std::basic_ostream`. However, please note that the performance will be much lower than the other streams above. + +## IStreamWrapper {#IStreamWrapper} + +`IStreamWrapper` wraps any class derived from `std::istream`, such as `std::istringstream`, `std::stringstream`, `std::ifstream`, `std::fstream`, into RapidJSON's input stream. + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/istreamwrapper.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +ifstream ifs("test.json"); +IStreamWrapper isw(ifs); + +Document d; +d.ParseStream(isw); +~~~ + +For classes derived from `std::wistream`, use `WIStreamWrapper`. + +## OStreamWrapper {#OStreamWrapper} + +Similarly, `OStreamWrapper` wraps any class derived from `std::ostream`, such as `std::ostringstream`, `std::stringstream`, `std::ofstream`, `std::fstream`, into RapidJSON's input stream. + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/ostreamwrapper.h> +#include <rapidjson/writer.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +Document d; +d.Parse(json); + +// ... + +ofstream ofs("output.json"); +OStreamWrapper osw(ofs); + +Writer<OStreamWrapper> writer(osw); +d.Accept(writer); +~~~ + +For classes derived from `std::wostream`, use `WOStreamWrapper`. + +# Encoded Streams {#EncodedStreams} + +Encoded streams do not contain JSON itself, but they wrap byte streams to provide basic encoding/decoding function. + +As mentioned above, UTF-8 byte streams can be read directly. However, UTF-16 and UTF-32 have endian issue. To handle endian correctly, it needs to convert bytes into characters (e.g. `wchar_t` for UTF-16) while reading, and characters into bytes while writing. + +Besides, it also need to handle [byte order mark (BOM)](http://en.wikipedia.org/wiki/Byte_order_mark). When reading from a byte stream, it is needed to detect or just consume the BOM if exists. When writing to a byte stream, it can optionally write BOM. + +If the encoding of stream is known during compile-time, you may use `EncodedInputStream` and `EncodedOutputStream`. If the stream can be UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE JSON, and it is only known in runtime, you may use `AutoUTFInputStream` and `AutoUTFOutputStream`. These streams are defined in `rapidjson/encodedstream.h`. + +Note that, these encoded streams can be applied to streams other than file. For example, you may have a file in memory, or a custom byte stream, be wrapped in encoded streams. + +## EncodedInputStream {#EncodedInputStream} + +`EncodedInputStream` has two template parameters. The first one is a `Encoding` class, such as `UTF8`, `UTF16LE`, defined in `rapidjson/encodings.h`. The second one is the class of stream to be wrapped. + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // EncodedInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("utf16le.json", "rb"); // non-Windows use "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +EncodedInputStream<UTF16LE<>, FileReadStream> eis(bis); // wraps bis into eis + +Document d; // Document is GenericDocument<UTF8<> > +d.ParseStream<0, UTF16LE<> >(eis); // Parses UTF-16LE file into UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +## EncodedOutputStream {#EncodedOutputStream} + +`EncodedOutputStream` is similar but it has a `bool putBOM` parameter in the constructor, controlling whether to write BOM into output byte stream. + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" // FileWriteStream +#include "rapidjson/encodedstream.h" // EncodedOutputStream +#include <rapidjson/writer.h> +#include <cstdio> + +Document d; // Document is GenericDocument<UTF8<> > +// ... + +FILE* fp = fopen("output_utf32le.json", "wb"); // non-Windows use "w" + +char writeBuffer[256]; +FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + +typedef EncodedOutputStream<UTF32LE<>, FileWriteStream> OutputStream; +OutputStream eos(bos, true); // Write BOM + +Writer<OutputStream, UTF8<>, UTF32LE<>> writer(eos); +d.Accept(writer); // This generates UTF32-LE file from UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +## AutoUTFInputStream {#AutoUTFInputStream} + +Sometimes an application may want to handle all supported JSON encoding. `AutoUTFInputStream` will detection encoding by BOM first. If BOM is unavailable, it will use characteristics of valid JSON to make detection. If neither method success, it falls back to the UTF type provided in constructor. + +Since the characters (code units) may be 8-bit, 16-bit or 32-bit. `AutoUTFInputStream` requires a character type which can hold at least 32-bit. We may use `unsigned`, as in the template parameter: + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // AutoUTFInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("any.json", "rb"); // non-Windows use "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // wraps bis into eis + +Document d; // Document is GenericDocument<UTF8<> > +d.ParseStream<0, AutoUTF<unsigned> >(eis); // This parses any UTF file into UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +When specifying the encoding of stream, uses `AutoUTF<CharType>` as in `ParseStream()` above. + +You can obtain the type of UTF via `UTFType GetType()`. And check whether a BOM is found by `HasBOM()` + +## AutoUTFOutputStream {#AutoUTFOutputStream} + +Similarly, to choose encoding for output during runtime, we can use `AutoUTFOutputStream`. This class is not automatic *per se*. You need to specify the UTF type and whether to write BOM in runtime. + +~~~~~~~~~~cpp +using namespace rapidjson; + +void WriteJSONFile(FILE* fp, UTFType type, bool putBOM, const Document& d) { + char writeBuffer[256]; + FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + + typedef AutoUTFOutputStream<unsigned, FileWriteStream> OutputStream; + OutputStream eos(bos, type, putBOM); + + Writer<OutputStream, UTF8<>, AutoUTF<> > writer; + d.Accept(writer); +} +~~~~~~~~~~ + +`AutoUTFInputStream` and `AutoUTFOutputStream` is more convenient than `EncodedInputStream` and `EncodedOutputStream`. They just incur a little bit runtime overheads. + +# Custom Stream {#CustomStream} + +In addition to memory/file streams, user can create their own stream classes which fits RapidJSON's API. For example, you may create network stream, stream from compressed file, etc. + +RapidJSON combines different types using templates. A class containing all required interface can be a stream. The Stream interface is defined in comments of `rapidjson/rapidjson.h`: + +~~~~~~~~~~cpp +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! Flush the buffer. + void Flush(); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +~~~~~~~~~~ + +For input stream, they must implement `Peek()`, `Take()` and `Tell()`. +For output stream, they must implement `Put()` and `Flush()`. +There are two special interface, `PutBegin()` and `PutEnd()`, which are only for *in situ* parsing. Normal streams do not implement them. However, if the interface is not needed for a particular stream, it is still need to a dummy implementation, otherwise will generate compilation error. + +## Example: istream wrapper {#ExampleIStreamWrapper} + +The following example is a simple wrapper of `std::istream`, which only implements 3 functions. + +~~~~~~~~~~cpp +class MyIStreamWrapper { +public: + typedef char Ch; + + MyIStreamWrapper(std::istream& is) : is_(is) { + } + + Ch Peek() const { // 1 + int c = is_.peek(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + Ch Take() { // 2 + int c = is_.get(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + size_t Tell() const { return (size_t)is_.tellg(); } // 3 + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch) { assert(false); } + void Flush() { assert(false); } + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyIStreamWrapper(const MyIStreamWrapper&); + MyIStreamWrapper& operator=(const MyIStreamWrapper&); + + std::istream& is_; +}; +~~~~~~~~~~ + +User can use it to wrap instances of `std::stringstream`, `std::ifstream`. + +~~~~~~~~~~cpp +const char* json = "[1,2,3,4]"; +std::stringstream ss(json); +MyIStreamWrapper is(ss); + +Document d; +d.ParseStream(is); +~~~~~~~~~~ + +Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library. + +## Example: ostream wrapper {#ExampleOStreamWrapper} + +The following example is a simple wrapper of `std::istream`, which only implements 2 functions. + +~~~~~~~~~~cpp +class MyOStreamWrapper { +public: + typedef char Ch; + + MyOStreamWrapper(std::ostream& os) : os_(os) { + } + + Ch Peek() const { assert(false); return '\0'; } + Ch Take() { assert(false); return '\0'; } + size_t Tell() const { } + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch c) { os_.put(c); } // 1 + void Flush() { os_.flush(); } // 2 + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyOStreamWrapper(const MyOStreamWrapper&); + MyOStreamWrapper& operator=(const MyOStreamWrapper&); + + std::ostream& os_; +}; +~~~~~~~~~~ + +User can use it to wrap instances of `std::stringstream`, `std::ofstream`. + +~~~~~~~~~~cpp +Document d; +// ... + +std::stringstream ss; +MyOStreamWrapper os(ss); + +Writer<MyOStreamWrapper> writer(os); +d.Accept(writer); +~~~~~~~~~~ + +Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library. + +# Summary {#Summary} + +This section describes stream classes available in RapidJSON. Memory streams are simple. File stream can reduce the memory required during JSON parsing and generation, if the JSON is stored in file system. Encoded streams converts between byte streams and character streams. Finally, user may create custom streams using a simple interface. diff --git a/src/s3select/rapidjson/doc/stream.zh-cn.md b/src/s3select/rapidjson/doc/stream.zh-cn.md new file mode 100644 index 000000000..6e379bbd6 --- /dev/null +++ b/src/s3select/rapidjson/doc/stream.zh-cn.md @@ -0,0 +1,429 @@ +# 流 + +在 RapidJSON 中,`rapidjson::Stream` 是用於读写 JSON 的概念(概念是指 C++ 的 concept)。在这里我们先介绍如何使用 RapidJSON 提供的各种流。然后再看看如何自行定义流。 + +[TOC] + +# 内存流 {#MemoryStreams} + +内存流把 JSON 存储在内存之中。 + +## StringStream(输入){#StringStream} + +`StringStream` 是最基本的输入流,它表示一个完整的、只读的、存储于内存的 JSON。它在 `rapidjson/rapidjson.h` 中定义。 + +~~~~~~~~~~cpp +#include "rapidjson/document.h" // 会包含 "rapidjson/rapidjson.h" + +using namespace rapidjson; + +// ... +const char json[] = "[1, 2, 3, 4]"; +StringStream s(json); + +Document d; +d.ParseStream(s); +~~~~~~~~~~ + +由于这是非常常用的用法,RapidJSON 提供 `Document::Parse(const char*)` 去做完全相同的事情: + +~~~~~~~~~~cpp +// ... +const char json[] = "[1, 2, 3, 4]"; +Document d; +d.Parse(json); +~~~~~~~~~~ + +需要注意,`StringStream` 是 `GenericStringStream<UTF8<> >` 的 typedef,使用者可用其他编码类去代表流所使用的字符集。 + +## StringBuffer(输出){#StringBuffer} + +`StringBuffer` 是一个简单的输出流。它分配一个内存缓冲区,供写入整个 JSON。可使用 `GetString()` 来获取该缓冲区。 + +~~~~~~~~~~cpp +#include "rapidjson/stringbuffer.h" +#include <rapidjson/writer.h> + +StringBuffer buffer; +Writer<StringBuffer> writer(buffer); +d.Accept(writer); + +const char* output = buffer.GetString(); +~~~~~~~~~~ + +当缓冲区满溢,它将自动增加容量。缺省容量是 256 个字符(UTF8 是 256 字节,UTF16 是 512 字节等)。使用者能自行提供分配器及初始容量。 + +~~~~~~~~~~cpp +StringBuffer buffer1(0, 1024); // 使用它的分配器,初始大小 = 1024 +StringBuffer buffer2(allocator, 1024); +~~~~~~~~~~ + +如无设置分配器,`StringBuffer` 会自行实例化一个内部分配器。 + +相似地,`StringBuffer` 是 `GenericStringBuffer<UTF8<> >` 的 typedef。 + +# 文件流 {#FileStreams} + +当要从文件解析一个 JSON,你可以把整个 JSON 读入内存并使用上述的 `StringStream`。 + +然而,若 JSON 很大,或是内存有限,你可以改用 `FileReadStream`。它只会从文件读取一部分至缓冲区,然后让那部分被解析。若缓冲区的字符都被读完,它会再从文件读取下一部分。 + +## FileReadStream(输入) {#FileReadStream} + +`FileReadStream` 通过 `FILE` 指针读取文件。使用者需要提供一个缓冲区。 + +~~~~~~~~~~cpp +#include "rapidjson/filereadstream.h" +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("big.json", "rb"); // 非 Windows 平台使用 "r" + +char readBuffer[65536]; +FileReadStream is(fp, readBuffer, sizeof(readBuffer)); + +Document d; +d.ParseStream(is); + +fclose(fp); +~~~~~~~~~~ + +与 `StringStreams` 不一样,`FileReadStream` 是一个字节流。它不处理编码。若文件并非 UTF-8 编码,可以把字节流用 `EncodedInputStream` 包装。我们很快会讨论这个问题。 + +除了读取文件,使用者也可以使用 `FileReadStream` 来读取 `stdin`。 + +## FileWriteStream(输出){#FileWriteStream} + +`FileWriteStream` 是一个含缓冲功能的输出流。它的用法与 `FileReadStream` 非常相似。 + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" +#include <rapidjson/writer.h> +#include <cstdio> + +using namespace rapidjson; + +Document d; +d.Parse(json); +// ... + +FILE* fp = fopen("output.json", "wb"); // 非 Windows 平台使用 "w" + +char writeBuffer[65536]; +FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer)); + +Writer<FileWriteStream> writer(os); +d.Accept(writer); + +fclose(fp); +~~~~~~~~~~ + +它也可以把输出导向 `stdout`。 + +# iostream 包装类 {#iostreamWrapper} + +基于用户的要求,RapidJSON 提供了正式的 `std::basic_istream` 和 `std::basic_ostream` 包装类。然而,请注意其性能会大大低于以上的其他流。 + +## IStreamWrapper {#IStreamWrapper} + +`IStreamWrapper` 把任何继承自 `std::istream` 的类(如 `std::istringstream`、`std::stringstream`、`std::ifstream`、`std::fstream`)包装成 RapidJSON 的输入流。 + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/istreamwrapper.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +ifstream ifs("test.json"); +IStreamWrapper isw(ifs); + +Document d; +d.ParseStream(isw); +~~~ + +对于继承自 `std::wistream` 的类,则使用 `WIStreamWrapper`。 + +## OStreamWrapper {#OStreamWrapper} + +相似地,`OStreamWrapper` 把任何继承自 `std::ostream` 的类(如 `std::ostringstream`、`std::stringstream`、`std::ofstream`、`std::fstream`)包装成 RapidJSON 的输出流。 + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/ostreamwrapper.h> +#include <rapidjson/writer.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +Document d; +d.Parse(json); + +// ... + +ofstream ofs("output.json"); +OStreamWrapper osw(ofs); + +Writer<OStreamWrapper> writer(osw); +d.Accept(writer); +~~~ + +对于继承自 `std::wistream` 的类,则使用 `WIStreamWrapper`。 + +# 编码流 {#EncodedStreams} + +编码流(encoded streams)本身不存储 JSON,它们是通过包装字节流来提供基本的编码/解码功能。 + +如上所述,我们可以直接读入 UTF-8 字节流。然而,UTF-16 及 UTF-32 有字节序(endian)问题。要正确地处理字节序,需要在读取时把字节转换成字符(如对 UTF-16 使用 `wchar_t`),以及在写入时把字符转换为字节。 + +除此以外,我们也需要处理 [字节顺序标记(byte order mark, BOM)](http://en.wikipedia.org/wiki/Byte_order_mark)。当从一个字节流读取时,需要检测 BOM,或者仅仅是把存在的 BOM 消去。当把 JSON 写入字节流时,也可选择写入 BOM。 + +若一个流的编码在编译期已知,你可使用 `EncodedInputStream` 及 `EncodedOutputStream`。若一个流可能存储 UTF-8、UTF-16LE、UTF-16BE、UTF-32LE、UTF-32BE 的 JSON,并且编码只能在运行时得知,你便可以使用 `AutoUTFInputStream` 及 `AutoUTFOutputStream`。这些流定义在 `rapidjson/encodedstream.h`。 + +注意到,这些编码流可以施于文件以外的流。例如,你可以用编码流包装内存中的文件或自定义的字节流。 + +## EncodedInputStream {#EncodedInputStream} + +`EncodedInputStream` 含两个模板参数。第一个是 `Encoding` 类型,例如定义于 `rapidjson/encodings.h` 的 `UTF8`、`UTF16LE`。第二个参数是被包装的流的类型。 + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // EncodedInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("utf16le.json", "rb"); // 非 Windows 平台使用 "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +EncodedInputStream<UTF16LE<>, FileReadStream> eis(bis); // 用 eis 包装 bis + +Document d; // Document 为 GenericDocument<UTF8<> > +d.ParseStream<0, UTF16LE<> >(eis); // 把 UTF-16LE 文件解析至内存中的 UTF-8 + +fclose(fp); +~~~~~~~~~~ + +## EncodedOutputStream {#EncodedOutputStream} + +`EncodedOutputStream` 也是相似的,但它的构造函数有一个 `bool putBOM` 参数,用于控制是否在输出字节流写入 BOM。 + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" // FileWriteStream +#include "rapidjson/encodedstream.h" // EncodedOutputStream +#include <rapidjson/writer.h> +#include <cstdio> + +Document d; // Document 为 GenericDocument<UTF8<> > +// ... + +FILE* fp = fopen("output_utf32le.json", "wb"); // 非 Windows 平台使用 "w" + +char writeBuffer[256]; +FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + +typedef EncodedOutputStream<UTF32LE<>, FileWriteStream> OutputStream; +OutputStream eos(bos, true); // 写入 BOM + +Writer<OutputStream, UTF8<>, UTF32LE<>> writer(eos); +d.Accept(writer); // 这里从内存的 UTF-8 生成 UTF32-LE 文件 + +fclose(fp); +~~~~~~~~~~ + +## AutoUTFInputStream {#AutoUTFInputStream} + +有时候,应用软件可能需要㲃理所有可支持的 JSON 编码。`AutoUTFInputStream` 会先使用 BOM 来检测编码。若 BOM 不存在,它便会使用合法 JSON 的特性来检测。若两种方法都失败,它就会倒退至构造函数提供的 UTF 类型。 + +由于字符(编码单元/code unit)可能是 8 位、16 位或 32 位,`AutoUTFInputStream` 需要一个能至少储存 32 位的字符类型。我们可以使用 `unsigned` 作为模板参数: + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // AutoUTFInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("any.json", "rb"); // 非 Windows 平台使用 "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // 用 eis 包装 bis + +Document d; // Document 为 GenericDocument<UTF8<> > +d.ParseStream<0, AutoUTF<unsigned> >(eis); // 把任何 UTF 编码的文件解析至内存中的 UTF-8 + +fclose(fp); +~~~~~~~~~~ + +当要指定流的编码,可使用上面例子中 `ParseStream()` 的参数 `AutoUTF<CharType>`。 + +你可以使用 `UTFType GetType()` 去获取 UTF 类型,并且用 `HasBOM()` 检测输入流是否含有 BOM。 + +## AutoUTFOutputStream {#AutoUTFOutputStream} + +相似地,要在运行时选择输出的编码,我们可使用 `AutoUTFOutputStream`。这个类本身并非「自动」。你需要在运行时指定 UTF 类型,以及是否写入 BOM。 + +~~~~~~~~~~cpp +using namespace rapidjson; + +void WriteJSONFile(FILE* fp, UTFType type, bool putBOM, const Document& d) { + char writeBuffer[256]; + FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + + typedef AutoUTFOutputStream<unsigned, FileWriteStream> OutputStream; + OutputStream eos(bos, type, putBOM); + + Writer<OutputStream, UTF8<>, AutoUTF<> > writer; + d.Accept(writer); +} +~~~~~~~~~~ + +`AutoUTFInputStream`/`AutoUTFOutputStream` 是比 `EncodedInputStream`/`EncodedOutputStream` 方便。但前者会产生一点运行期额外开销。 + +# 自定义流 {#CustomStream} + +除了内存/文件流,使用者可创建自行定义适配 RapidJSON API 的流类。例如,你可以创建网络流、从压缩文件读取的流等等。 + +RapidJSON 利用模板结合不同的类型。只要一个类包含所有所需的接口,就可以作为一个流。流的接合定义在 `rapidjson/rapidjson.h` 的注释里: + +~~~~~~~~~~cpp +concept Stream { + typename Ch; //!< 流的字符类型 + + //! 从流读取当前字符,不移动读取指针(read cursor) + Ch Peek() const; + + //! 从流读取当前字符,移动读取指针至下一字符。 + Ch Take(); + + //! 获取读取指针。 + //! \return 从开始以来所读过的字符数量。 + size_t Tell(); + + //! 从当前读取指针开始写入操作。 + //! \return 返回开始写入的指针。 + Ch* PutBegin(); + + //! 写入一个字符。 + void Put(Ch c); + + //! 清空缓冲区。 + void Flush(); + + //! 完成写作操作。 + //! \param begin PutBegin() 返回的开始写入指针。 + //! \return 已写入的字符数量。 + size_t PutEnd(Ch* begin); +} +~~~~~~~~~~ + +输入流必须实现 `Peek()`、`Take()` 及 `Tell()`。 +输出流必须实现 `Put()` 及 `Flush()`。 +`PutBegin()` 及 `PutEnd()` 是特殊的接口,仅用于原位(*in situ*)解析。一般的流不需实现它们。然而,即使接口不需用于某些流,仍然需要提供空实现,否则会产生编译错误。 + +## 例子:istream 的包装类 {#ExampleIStreamWrapper} + +以下的简单例子是 `std::istream` 的包装类,它只需现 3 个函数。 + +~~~~~~~~~~cpp +class MyIStreamWrapper { +public: + typedef char Ch; + + MyIStreamWrapper(std::istream& is) : is_(is) { + } + + Ch Peek() const { // 1 + int c = is_.peek(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + Ch Take() { // 2 + int c = is_.get(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + size_t Tell() const { return (size_t)is_.tellg(); } // 3 + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch) { assert(false); } + void Flush() { assert(false); } + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyIStreamWrapper(const MyIStreamWrapper&); + MyIStreamWrapper& operator=(const MyIStreamWrapper&); + + std::istream& is_; +}; +~~~~~~~~~~ + +使用者能用它来包装 `std::stringstream`、`std::ifstream` 的实例。 + +~~~~~~~~~~cpp +const char* json = "[1,2,3,4]"; +std::stringstream ss(json); +MyIStreamWrapper is(ss); + +Document d; +d.ParseStream(is); +~~~~~~~~~~ + +但要注意,由于标准库的内部开销问,此实现的性能可能不如 RapidJSON 的内存/文件流。 + +## 例子:ostream 的包装类 {#ExampleOStreamWrapper} + +以下的例子是 `std::istream` 的包装类,它只需实现 2 个函数。 + +~~~~~~~~~~cpp +class MyOStreamWrapper { +public: + typedef char Ch; + + OStreamWrapper(std::ostream& os) : os_(os) { + } + + Ch Peek() const { assert(false); return '\0'; } + Ch Take() { assert(false); return '\0'; } + size_t Tell() const { } + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch c) { os_.put(c); } // 1 + void Flush() { os_.flush(); } // 2 + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyOStreamWrapper(const MyOStreamWrapper&); + MyOStreamWrapper& operator=(const MyOStreamWrapper&); + + std::ostream& os_; +}; +~~~~~~~~~~ + +使用者能用它来包装 `std::stringstream`、`std::ofstream` 的实例。 + +~~~~~~~~~~cpp +Document d; +// ... + +std::stringstream ss; +MyOStreamWrapper os(ss); + +Writer<MyOStreamWrapper> writer(os); +d.Accept(writer); +~~~~~~~~~~ + +但要注意,由于标准库的内部开销问,此实现的性能可能不如 RapidJSON 的内存/文件流。 + +# 总结 {#Summary} + +本节描述了 RapidJSON 提供的各种流的类。内存流很简单。若 JSON 存储在文件中,文件流可减少 JSON 解析及生成所需的内存量。编码流在字节流和字符流之间作转换。最后,使用者可使用一个简单接口创建自定义的流。 diff --git a/src/s3select/rapidjson/doc/tutorial.md b/src/s3select/rapidjson/doc/tutorial.md new file mode 100644 index 000000000..a86aafdfc --- /dev/null +++ b/src/s3select/rapidjson/doc/tutorial.md @@ -0,0 +1,536 @@ +# Tutorial + +This tutorial introduces the basics of the Document Object Model(DOM) API. + +As shown in [Usage at a glance](@ref index), JSON can be parsed into a DOM, and then the DOM can be queried and modified easily, and finally be converted back to JSON. + +[TOC] + +# Value & Document {#ValueDocument} + +Each JSON value is stored in a type called `Value`. A `Document`, representing the DOM, contains the root `Value` of the DOM tree. All public types and functions of RapidJSON are defined in the `rapidjson` namespace. + +# Query Value {#QueryValue} + +In this section, we will use excerpt from `example/tutorial/tutorial.cpp`. + +Assume we have the following JSON stored in a C string (`const char* json`): +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +Parse it into a `Document`: +~~~~~~~~~~cpp +#include "rapidjson/document.h" + +using namespace rapidjson; + +// ... +Document document; +document.Parse(json); +~~~~~~~~~~ + +The JSON is now parsed into `document` as a *DOM tree*: + +![DOM in the tutorial](diagram/tutorial.png) + +Since the update to RFC 7159, the root of a conforming JSON document can be any JSON value. In earlier RFC 4627, only objects or arrays were allowed as root values. In this case, the root is an object. +~~~~~~~~~~cpp +assert(document.IsObject()); +~~~~~~~~~~ + +Let's query whether a `"hello"` member exists in the root object. Since a `Value` can contain different types of value, we may need to verify its type and use suitable API to obtain the value. In this example, `"hello"` member associates with a JSON string. +~~~~~~~~~~cpp +assert(document.HasMember("hello")); +assert(document["hello"].IsString()); +printf("hello = %s\n", document["hello"].GetString()); +~~~~~~~~~~ + +~~~~~~~~~~ +hello = world +~~~~~~~~~~ + +JSON true/false values are represented as `bool`. +~~~~~~~~~~cpp +assert(document["t"].IsBool()); +printf("t = %s\n", document["t"].GetBool() ? "true" : "false"); +~~~~~~~~~~ + +~~~~~~~~~~ +t = true +~~~~~~~~~~ + +JSON null can be queried with `IsNull()`. +~~~~~~~~~~cpp +printf("n = %s\n", document["n"].IsNull() ? "null" : "?"); +~~~~~~~~~~ + +~~~~~~~~~~ +n = null +~~~~~~~~~~ + +JSON number type represents all numeric values. However, C++ needs more specific type for manipulation. + +~~~~~~~~~~cpp +assert(document["i"].IsNumber()); + +// In this case, IsUint()/IsInt64()/IsUint64() also return true. +assert(document["i"].IsInt()); +printf("i = %d\n", document["i"].GetInt()); +// Alternatively (int)document["i"] + +assert(document["pi"].IsNumber()); +assert(document["pi"].IsDouble()); +printf("pi = %g\n", document["pi"].GetDouble()); +~~~~~~~~~~ + +~~~~~~~~~~ +i = 123 +pi = 3.1416 +~~~~~~~~~~ + +JSON array contains a number of elements. +~~~~~~~~~~cpp +// Using a reference for consecutive access is handy and faster. +const Value& a = document["a"]; +assert(a.IsArray()); +for (SizeType i = 0; i < a.Size(); i++) // Uses SizeType instead of size_t + printf("a[%d] = %d\n", i, a[i].GetInt()); +~~~~~~~~~~ + +~~~~~~~~~~ +a[0] = 1 +a[1] = 2 +a[2] = 3 +a[3] = 4 +~~~~~~~~~~ + +Note that, RapidJSON does not automatically convert values between JSON types. For example, if a value is a string, it is invalid to call `GetInt()`. In debug mode it will fail on assertion. In release mode, the behavior is undefined. + +In the following sections we discuss details about querying individual types. + +## Query Array {#QueryArray} + +By default, `SizeType` is typedef of `unsigned`. In most systems, an array is limited to store up to 2^32-1 elements. + +You may access the elements in an array by integer literal, for example, `a[0]`, `a[1]`, `a[2]`. + +Array is similar to `std::vector`: instead of using indices, you may also use iterator to access all the elements. +~~~~~~~~~~cpp +for (Value::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr) + printf("%d ", itr->GetInt()); +~~~~~~~~~~ + +And other familiar query functions: +* `SizeType Capacity() const` +* `bool Empty() const` + +### Range-based For Loop (New in v1.1.0) + +When C++11 is enabled, you can use range-based for loop to access all elements in an array. + +~~~~~~~~~~cpp +for (auto& v : a.GetArray()) + printf("%d ", v.GetInt()); +~~~~~~~~~~ + +## Query Object {#QueryObject} + +Similar to Array, we can access all object members by iterator: + +~~~~~~~~~~cpp +static const char* kTypeNames[] = + { "Null", "False", "True", "Object", "Array", "String", "Number" }; + +for (Value::ConstMemberIterator itr = document.MemberBegin(); + itr != document.MemberEnd(); ++itr) +{ + printf("Type of member %s is %s\n", + itr->name.GetString(), kTypeNames[itr->value.GetType()]); +} +~~~~~~~~~~ + +~~~~~~~~~~ +Type of member hello is String +Type of member t is True +Type of member f is False +Type of member n is Null +Type of member i is Number +Type of member pi is Number +Type of member a is Array +~~~~~~~~~~ + +Note that, when `operator[](const char*)` cannot find the member, it will fail on assertion. + +If we are unsure whether a member exists, we need to call `HasMember()` before calling `operator[](const char*)`. However, this incurs two lookup. A better way is to call `FindMember()`, which can check the existence of a member and obtain its value at once: + +~~~~~~~~~~cpp +Value::ConstMemberIterator itr = document.FindMember("hello"); +if (itr != document.MemberEnd()) + printf("%s\n", itr->value.GetString()); +~~~~~~~~~~ + +### Range-based For Loop (New in v1.1.0) + +When C++11 is enabled, you can use range-based for loop to access all members in an object. + +~~~~~~~~~~cpp +for (auto& m : document.GetObject()) + printf("Type of member %s is %s\n", + m.name.GetString(), kTypeNames[m.value.GetType()]); +~~~~~~~~~~ + +## Querying Number {#QueryNumber} + +JSON provides a single numerical type called Number. Number can be an integer or a real number. RFC 4627 says the range of Number is specified by the parser implementation. + +As C++ provides several integer and floating point number types, the DOM tries to handle these with the widest possible range and good performance. + +When a Number is parsed, it is stored in the DOM as one of the following types: + +Type | Description +-----------|--------------------------------------- +`unsigned` | 32-bit unsigned integer +`int` | 32-bit signed integer +`uint64_t` | 64-bit unsigned integer +`int64_t` | 64-bit signed integer +`double` | 64-bit double precision floating point + +When querying a number, you can check whether the number can be obtained as the target type: + +Checking | Obtaining +------------------|--------------------- +`bool IsNumber()` | N/A +`bool IsUint()` | `unsigned GetUint()` +`bool IsInt()` | `int GetInt()` +`bool IsUint64()` | `uint64_t GetUint64()` +`bool IsInt64()` | `int64_t GetInt64()` +`bool IsDouble()` | `double GetDouble()` + +Note that, an integer value may be obtained in various ways without conversion. For example, A value `x` containing 123 will make `x.IsInt() == x.IsUint() == x.IsInt64() == x.IsUint64() == true`. But a value `y` containing -3000000000 will only make `x.IsInt64() == true`. + +When obtaining the numeric values, `GetDouble()` will convert internal integer representation to a `double`. Note that, `int` and `unsigned` can be safely converted to `double`, but `int64_t` and `uint64_t` may lose precision (since mantissa of `double` is only 52-bits). + +## Query String {#QueryString} + +In addition to `GetString()`, the `Value` class also contains `GetStringLength()`. Here explains why: + +According to RFC 4627, JSON strings can contain Unicode character `U+0000`, which must be escaped as `"\u0000"`. The problem is that, C/C++ often uses null-terminated string, which treats `\0` as the terminator symbol. + +To conform with RFC 4627, RapidJSON supports string containing `U+0000` character. If you need to handle this, you can use `GetStringLength()` to obtain the correct string length. + +For example, after parsing the following JSON to `Document d`: + +~~~~~~~~~~js +{ "s" : "a\u0000b" } +~~~~~~~~~~ +The correct length of the string `"a\u0000b"` is 3, as returned by `GetStringLength()`. But `strlen()` returns 1. + +`GetStringLength()` can also improve performance, as user may often need to call `strlen()` for allocating buffer. + +Besides, `std::string` also support a constructor: + +~~~~~~~~~~cpp +string(const char* s, size_t count); +~~~~~~~~~~ + +which accepts the length of string as parameter. This constructor supports storing null character within the string, and should also provide better performance. + +## Comparing values + +You can use `==` and `!=` to compare values. Two values are equal if and only if they have same type and contents. You can also compare values with primitive types. Here is an example: + +~~~~~~~~~~cpp +if (document["hello"] == document["n"]) /*...*/; // Compare values +if (document["hello"] == "world") /*...*/; // Compare value with literal string +if (document["i"] != 123) /*...*/; // Compare with integers +if (document["pi"] != 3.14) /*...*/; // Compare with double. +~~~~~~~~~~ + +Array/object compares their elements/members in order. They are equal if and only if their whole subtrees are equal. + +Note that, currently if an object contains duplicated named member, comparing equality with any object is always `false`. + +# Create/Modify Values {#CreateModifyValues} + +There are several ways to create values. After a DOM tree is created and/or modified, it can be saved as JSON again using `Writer`. + +## Change Value Type {#ChangeValueType} +When creating a `Value` or `Document` by default constructor, its type is Null. To change its type, call `SetXXX()` or assignment operator, for example: + +~~~~~~~~~~cpp +Document d; // Null +d.SetObject(); + +Value v; // Null +v.SetInt(10); +v = 10; // Shortcut, same as above +~~~~~~~~~~ + +### Overloaded Constructors +There are also overloaded constructors for several types: + +~~~~~~~~~~cpp +Value b(true); // calls Value(bool) +Value i(-123); // calls Value(int) +Value u(123u); // calls Value(unsigned) +Value d(1.5); // calls Value(double) +~~~~~~~~~~ + +To create empty object or array, you may use `SetObject()`/`SetArray()` after default constructor, or using the `Value(Type)` in one call: + +~~~~~~~~~~cpp +Value o(kObjectType); +Value a(kArrayType); +~~~~~~~~~~ + +## Move Semantics {#MoveSemantics} + +A very special decision during design of RapidJSON is that, assignment of value does not copy the source value to destination value. Instead, the value from source is moved to the destination. For example, + +~~~~~~~~~~cpp +Value a(123); +Value b(456); +a = b; // b becomes a Null value, a becomes number 456. +~~~~~~~~~~ + +![Assignment with move semantics.](diagram/move1.png) + +Why? What is the advantage of this semantics? + +The simple answer is performance. For fixed size JSON types (Number, True, False, Null), copying them is fast and easy. However, For variable size JSON types (String, Array, Object), copying them will incur a lot of overheads. And these overheads are often unnoticed. Especially when we need to create temporary object, copy it to another variable, and then destruct it. + +For example, if normal *copy* semantics was used: + +~~~~~~~~~~cpp +Document d; +Value o(kObjectType); +{ + Value contacts(kArrayType); + // adding elements to contacts array. + // ... + o.AddMember("contacts", contacts, d.GetAllocator()); // deep clone contacts (may be with lots of allocations) + // destruct contacts. +} +~~~~~~~~~~ + +![Copy semantics makes a lots of copy operations.](diagram/move2.png) + +The object `o` needs to allocate a buffer of same size as contacts, makes a deep clone of it, and then finally contacts is destructed. This will incur a lot of unnecessary allocations/deallocations and memory copying. + +There are solutions to prevent actual copying these data, such as reference counting and garbage collection(GC). + +To make RapidJSON simple and fast, we chose to use *move* semantics for assignment. It is similar to `std::auto_ptr` which transfer ownership during assignment. Move is much faster and simpler, it just destructs the original value, `memcpy()` the source to destination, and finally sets the source as Null type. + +So, with move semantics, the above example becomes: + +~~~~~~~~~~cpp +Document d; +Value o(kObjectType); +{ + Value contacts(kArrayType); + // adding elements to contacts array. + o.AddMember("contacts", contacts, d.GetAllocator()); // just memcpy() of contacts itself to the value of new member (16 bytes) + // contacts became Null here. Its destruction is trivial. +} +~~~~~~~~~~ + +![Move semantics makes no copying.](diagram/move3.png) + +This is called move assignment operator in C++11. As RapidJSON supports C++03, it adopts move semantics using assignment operator, and all other modifying function like `AddMember()`, `PushBack()`. + +### Move semantics and temporary values {#TemporaryValues} + +Sometimes, it is convenient to construct a Value in place, before passing it to one of the "moving" functions, like `PushBack()` or `AddMember()`. As temporary objects can't be converted to proper Value references, the convenience function `Move()` is available: + +~~~~~~~~~~cpp +Value a(kArrayType); +Document::AllocatorType& allocator = document.GetAllocator(); +// a.PushBack(Value(42), allocator); // will not compile +a.PushBack(Value().SetInt(42), allocator); // fluent API +a.PushBack(Value(42).Move(), allocator); // same as above +~~~~~~~~~~ + +## Create String {#CreateString} +RapidJSON provides two strategies for storing string. + +1. copy-string: allocates a buffer, and then copy the source data into it. +2. const-string: simply store a pointer of string. + +Copy-string is always safe because it owns a copy of the data. Const-string can be used for storing a string literal, and for in-situ parsing which will be mentioned in the DOM section. + +To make memory allocation customizable, RapidJSON requires users to pass an instance of allocator, whenever an operation may require allocation. This design is needed to prevent storing an allocator (or Document) pointer per Value. + +Therefore, when we assign a copy-string, we call this overloaded `SetString()` with allocator: + +~~~~~~~~~~cpp +Document document; +Value author; +char buffer[10]; +int len = sprintf(buffer, "%s %s", "Milo", "Yip"); // dynamically created string. +author.SetString(buffer, len, document.GetAllocator()); +memset(buffer, 0, sizeof(buffer)); +// author.GetString() still contains "Milo Yip" after buffer is destroyed +~~~~~~~~~~ + +In this example, we get the allocator from a `Document` instance. This is a common idiom when using RapidJSON. But you may use other instances of allocator. + +Besides, the above `SetString()` requires length. This can handle null characters within a string. There is another `SetString()` overloaded function without the length parameter. And it assumes the input is null-terminated and calls a `strlen()`-like function to obtain the length. + +Finally, for a string literal or string with a safe life-cycle one can use the const-string version of `SetString()`, which lacks an allocator parameter. For string literals (or constant character arrays), simply passing the literal as parameter is safe and efficient: + +~~~~~~~~~~cpp +Value s; +s.SetString("rapidjson"); // can contain null character, length derived at compile time +s = "rapidjson"; // shortcut, same as above +~~~~~~~~~~ + +For a character pointer, RapidJSON requires it to be marked as safe before using it without copying. This can be achieved by using the `StringRef` function: + +~~~~~~~~~cpp +const char * cstr = getenv("USER"); +size_t cstr_len = ...; // in case length is available +Value s; +// s.SetString(cstr); // will not compile +s.SetString(StringRef(cstr)); // ok, assume safe lifetime, null-terminated +s = StringRef(cstr); // shortcut, same as above +s.SetString(StringRef(cstr,cstr_len)); // faster, can contain null character +s = StringRef(cstr,cstr_len); // shortcut, same as above + +~~~~~~~~~ + +## Modify Array {#ModifyArray} +Value with array type provides an API similar to `std::vector`. + +* `Clear()` +* `Reserve(SizeType, Allocator&)` +* `Value& PushBack(Value&, Allocator&)` +* `template <typename T> GenericValue& PushBack(T, Allocator&)` +* `Value& PopBack()` +* `ValueIterator Erase(ConstValueIterator pos)` +* `ValueIterator Erase(ConstValueIterator first, ConstValueIterator last)` + +Note that, `Reserve(...)` and `PushBack(...)` may allocate memory for the array elements, therefore requiring an allocator. + +Here is an example of `PushBack()`: + +~~~~~~~~~~cpp +Value a(kArrayType); +Document::AllocatorType& allocator = document.GetAllocator(); + +for (int i = 5; i <= 10; i++) + a.PushBack(i, allocator); // allocator is needed for potential realloc(). + +// Fluent interface +a.PushBack("Lua", allocator).PushBack("Mio", allocator); +~~~~~~~~~~ + +This API differs from STL in that `PushBack()`/`PopBack()` return the array reference itself. This is called _fluent interface_. + +If you want to add a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)) to the array, you need to create a string Value by using the copy-string API. To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place: + +~~~~~~~~~~cpp +// in-place Value parameter +contact.PushBack(Value("copy", document.GetAllocator()).Move(), // copy string + document.GetAllocator()); + +// explicit parameters +Value val("key", document.GetAllocator()); // copy string +contact.PushBack(val, document.GetAllocator()); +~~~~~~~~~~ + +## Modify Object {#ModifyObject} +The Object class is a collection of key-value pairs (members). Each key must be a string value. To modify an object, either add or remove members. The following API is for adding members: + +* `Value& AddMember(Value&, Value&, Allocator& allocator)` +* `Value& AddMember(StringRefType, Value&, Allocator&)` +* `template <typename T> Value& AddMember(StringRefType, T value, Allocator&)` + +Here is an example. + +~~~~~~~~~~cpp +Value contact(kObject); +contact.AddMember("name", "Milo", document.GetAllocator()); +contact.AddMember("married", true, document.GetAllocator()); +~~~~~~~~~~ + +The name parameter with `StringRefType` is similar to the interface of the `SetString` function for string values. These overloads are used to avoid the need for copying the `name` string, since constant key names are very common in JSON objects. + +If you need to create a name from a non-constant string or a string without sufficient lifetime (see [Create String](#CreateString)), you need to create a string Value by using the copy-string API. To avoid the need for an intermediate variable, you can use a [temporary value](#TemporaryValues) in place: + +~~~~~~~~~~cpp +// in-place Value parameter +contact.AddMember(Value("copy", document.GetAllocator()).Move(), // copy string + Value().Move(), // null value + document.GetAllocator()); + +// explicit parameters +Value key("key", document.GetAllocator()); // copy string name +Value val(42); // some value +contact.AddMember(key, val, document.GetAllocator()); +~~~~~~~~~~ + +For removing members, there are several choices: + +* `bool RemoveMember(const Ch* name)`: Remove a member by search its name (linear time complexity). +* `bool RemoveMember(const Value& name)`: same as above but `name` is a Value. +* `MemberIterator RemoveMember(MemberIterator)`: Remove a member by iterator (_constant_ time complexity). +* `MemberIterator EraseMember(MemberIterator)`: similar to the above but it preserves order of members (linear time complexity). +* `MemberIterator EraseMember(MemberIterator first, MemberIterator last)`: remove a range of members, preserves order (linear time complexity). + +`MemberIterator RemoveMember(MemberIterator)` uses a "move-last" trick to achieve constant time complexity. Basically the member at iterator is destructed, and then the last element is moved to that position. So the order of the remaining members are changed. + +## Deep Copy Value {#DeepCopyValue} +If we really need to copy a DOM tree, we can use two APIs for deep copy: constructor with allocator, and `CopyFrom()`. + +~~~~~~~~~~cpp +Document d; +Document::AllocatorType& a = d.GetAllocator(); +Value v1("foo"); +// Value v2(v1); // not allowed + +Value v2(v1, a); // make a copy +assert(v1.IsString()); // v1 untouched +d.SetArray().PushBack(v1, a).PushBack(v2, a); +assert(v1.IsNull() && v2.IsNull()); // both moved to d + +v2.CopyFrom(d, a); // copy whole document to v2 +assert(d.IsArray() && d.Size() == 2); // d untouched +v1.SetObject().AddMember("array", v2, a); +d.PushBack(v1, a); +~~~~~~~~~~ + +## Swap Values {#SwapValues} + +`Swap()` is also provided. + +~~~~~~~~~~cpp +Value a(123); +Value b("Hello"); +a.Swap(b); +assert(a.IsString()); +assert(b.IsInt()); +~~~~~~~~~~ + +Swapping two DOM trees is fast (constant time), despite the complexity of the trees. + +# What's next {#WhatsNext} + +This tutorial shows the basics of DOM tree query and manipulation. There are several important concepts in RapidJSON: + +1. [Streams](doc/stream.md) are channels for reading/writing JSON, which can be a in-memory string, or file stream, etc. User can also create their streams. +2. [Encoding](doc/encoding.md) defines which character encoding is used in streams and memory. RapidJSON also provide Unicode conversion/validation internally. +3. [DOM](doc/dom.md)'s basics are already covered in this tutorial. Uncover more advanced features such as *in situ* parsing, other parsing options and advanced usages. +4. [SAX](doc/sax.md) is the foundation of parsing/generating facility in RapidJSON. Learn how to use `Reader`/`Writer` to implement even faster applications. Also try `PrettyWriter` to format the JSON. +5. [Performance](doc/performance.md) shows some in-house and third-party benchmarks. +6. [Internals](doc/internals.md) describes some internal designs and techniques of RapidJSON. + +You may also refer to the [FAQ](doc/faq.md), API documentation, examples and unit tests. diff --git a/src/s3select/rapidjson/doc/tutorial.zh-cn.md b/src/s3select/rapidjson/doc/tutorial.zh-cn.md new file mode 100644 index 000000000..8b24ff11f --- /dev/null +++ b/src/s3select/rapidjson/doc/tutorial.zh-cn.md @@ -0,0 +1,535 @@ +# 教程 + +本教程简介文件对象模型(Document Object Model, DOM)API。 + +如 [用法一览](../readme.zh-cn.md#用法一览) 中所示,可以解析一个 JSON 至 DOM,然后就可以轻松查询及修改 DOM,并最终转换回 JSON。 + +[TOC] + +# Value 及 Document {#ValueDocument} + +每个 JSON 值都储存为 `Value` 类,而 `Document` 类则表示整个 DOM,它存储了一个 DOM 树的根 `Value`。RapidJSON 的所有公开类型及函数都在 `rapidjson` 命名空间中。 + +# 查询 Value {#QueryValue} + +在本节中,我们会使用到 `example/tutorial/tutorial.cpp` 中的代码片段。 + +假设我们用 C 语言的字符串储存一个 JSON(`const char* json`): +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +把它解析至一个 `Document`: +~~~~~~~~~~cpp +#include "rapidjson/document.h" + +using namespace rapidjson; + +// ... +Document document; +document.Parse(json); +~~~~~~~~~~ + +那么现在该 JSON 就会被解析至 `document` 中,成为一棵 *DOM 树 *: + +![教程中的 DOM](diagram/tutorial.png) + +自从 RFC 7159 作出更新,合法 JSON 文件的根可以是任何类型的 JSON 值。而在较早的 RFC 4627 中,根值只允许是 Object 或 Array。而在上述例子中,根是一个 Object。 +~~~~~~~~~~cpp +assert(document.IsObject()); +~~~~~~~~~~ + +让我们查询一下根 Object 中有没有 `"hello"` 成员。由于一个 `Value` 可包含不同类型的值,我们可能需要验证它的类型,并使用合适的 API 去获取其值。在此例中,`"hello"` 成员关联到一个 JSON String。 +~~~~~~~~~~cpp +assert(document.HasMember("hello")); +assert(document["hello"].IsString()); +printf("hello = %s\n", document["hello"].GetString()); +~~~~~~~~~~ + +~~~~~~~~~~ +world +~~~~~~~~~~ + +JSON True/False 值是以 `bool` 表示的。 +~~~~~~~~~~cpp +assert(document["t"].IsBool()); +printf("t = %s\n", document["t"].GetBool() ? "true" : "false"); +~~~~~~~~~~ + +~~~~~~~~~~ +true +~~~~~~~~~~ + +JSON Null 值可用 `IsNull()` 查询。 +~~~~~~~~~~cpp +printf("n = %s\n", document["n"].IsNull() ? "null" : "?"); +~~~~~~~~~~ + +~~~~~~~~~~ +null +~~~~~~~~~~ + +JSON Number 类型表示所有数值。然而,C++ 需要使用更专门的类型。 + +~~~~~~~~~~cpp +assert(document["i"].IsNumber()); + +// 在此情况下,IsUint()/IsInt64()/IsUint64() 也会返回 true +assert(document["i"].IsInt()); +printf("i = %d\n", document["i"].GetInt()); +// 另一种用法: (int)document["i"] + +assert(document["pi"].IsNumber()); +assert(document["pi"].IsDouble()); +printf("pi = %g\n", document["pi"].GetDouble()); +~~~~~~~~~~ + +~~~~~~~~~~ +i = 123 +pi = 3.1416 +~~~~~~~~~~ + +JSON Array 包含一些元素。 +~~~~~~~~~~cpp +// 使用引用来连续访问,方便之余还更高效。 +const Value& a = document["a"]; +assert(a.IsArray()); +for (SizeType i = 0; i < a.Size(); i++) // 使用 SizeType 而不是 size_t + printf("a[%d] = %d\n", i, a[i].GetInt()); +~~~~~~~~~~ + +~~~~~~~~~~ +a[0] = 1 +a[1] = 2 +a[2] = 3 +a[3] = 4 +~~~~~~~~~~ + +注意,RapidJSON 并不自动转换各种 JSON 类型。例如,对一个 String 的 Value 调用 `GetInt()` 是非法的。在调试模式下,它会被断言失败。在发布模式下,其行为是未定义的。 + +以下将会讨论有关查询各类型的细节。 + +## 查询 Array {#QueryArray} + +缺省情况下,`SizeType` 是 `unsigned` 的 typedef。在多数系统中,Array 最多能存储 2^32-1 个元素。 + +你可以用整数字面量访问元素,如 `a[0]`、`a[1]`、`a[2]`。 + +Array 与 `std::vector` 相似,除了使用索引,也可使用迭代器来访问所有元素。 +~~~~~~~~~~cpp +for (Value::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr) + printf("%d ", itr->GetInt()); +~~~~~~~~~~ + +还有一些熟悉的查询函数: +* `SizeType Capacity() const` +* `bool Empty() const` + +### 范围 for 循环 (v1.1.0 中的新功能) + +当使用 C++11 功能时,你可使用范围 for 循环去访问 Array 内的所有元素。 + +~~~~~~~~~~cpp +for (auto& v : a.GetArray()) + printf("%d ", v.GetInt()); +~~~~~~~~~~ + +## 查询 Object {#QueryObject} + +和 Array 相似,我们可以用迭代器去访问所有 Object 成员: + +~~~~~~~~~~cpp +static const char* kTypeNames[] = + { "Null", "False", "True", "Object", "Array", "String", "Number" }; + +for (Value::ConstMemberIterator itr = document.MemberBegin(); + itr != document.MemberEnd(); ++itr) +{ + printf("Type of member %s is %s\n", + itr->name.GetString(), kTypeNames[itr->value.GetType()]); +} +~~~~~~~~~~ + +~~~~~~~~~~ +Type of member hello is String +Type of member t is True +Type of member f is False +Type of member n is Null +Type of member i is Number +Type of member pi is Number +Type of member a is Array +~~~~~~~~~~ + +注意,当 `operator[](const char*)` 找不到成员,它会断言失败。 + +若我们不确定一个成员是否存在,便需要在调用 `operator[](const char*)` 前先调用 `HasMember()`。然而,这会导致两次查找。更好的做法是调用 `FindMember()`,它能同时检查成员是否存在并返回它的 Value: + +~~~~~~~~~~cpp +Value::ConstMemberIterator itr = document.FindMember("hello"); +if (itr != document.MemberEnd()) + printf("%s\n", itr->value.GetString()); +~~~~~~~~~~ + +### 范围 for 循环 (v1.1.0 中的新功能) + +当使用 C++11 功能时,你可使用范围 for 循环去访问 Object 内的所有成员。 + +~~~~~~~~~~cpp +for (auto& m : document.GetObject()) + printf("Type of member %s is %s\n", + m.name.GetString(), kTypeNames[m.value.GetType()]); +~~~~~~~~~~ + +## 查询 Number {#QueryNumber} + +JSON 只提供一种数值类型──Number。数字可以是整数或实数。RFC 4627 规定数字的范围由解析器指定。 + +由于 C++ 提供多种整数及浮点数类型,DOM 尝试尽量提供最广的范围及良好性能。 + +当解析一个 Number 时, 它会被存储在 DOM 之中,成为下列其中一个类型: + +类型 | 描述 +-----------|--------------------------------------- +`unsigned` | 32 位无号整数 +`int` | 32 位有号整数 +`uint64_t` | 64 位无号整数 +`int64_t` | 64 位有号整数 +`double` | 64 位双精度浮点数 + +当查询一个 Number 时, 你可以检查该数字是否能以目标类型来提取: + +查检 | 提取 +------------------|--------------------- +`bool IsNumber()` | 不适用 +`bool IsUint()` | `unsigned GetUint()` +`bool IsInt()` | `int GetInt()` +`bool IsUint64()` | `uint64_t GetUint64()` +`bool IsInt64()` | `int64_t GetInt64()` +`bool IsDouble()` | `double GetDouble()` + +注意,一个整数可能用几种类型来提取,而无需转换。例如,一个名为 `x` 的 Value 包含 123,那么 `x.IsInt() == x.IsUint() == x.IsInt64() == x.IsUint64() == true`。但如果一个名为 `y` 的 Value 包含 -3000000000,那么仅会令 `x.IsInt64() == true`。 + +当要提取 Number 类型,`GetDouble()` 是会把内部整数的表示转换成 `double`。注意 `int` 和 `unsigned` 可以安全地转换至 `double`,但 `int64_t` 及 `uint64_t` 可能会丧失精度(因为 `double` 的尾数只有 52 位)。 + +## 查询 String {#QueryString} + +除了 `GetString()`,`Value` 类也有一个 `GetStringLength()`。这里会解释个中原因。 + +根据 RFC 4627,JSON String 可包含 Unicode 字符 `U+0000`,在 JSON 中会表示为 `"\u0000"`。问题是,C/C++ 通常使用空字符结尾字符串(null-terminated string),这种字符串把 ``\0'` 作为结束符号。 + +为了符合 RFC 4627,RapidJSON 支持包含 `U+0000` 的 String。若你需要处理这些 String,便可使用 `GetStringLength()` 去获得正确的字符串长度。 + +例如,当解析以下的 JSON 至 `Document d` 之后: + +~~~~~~~~~~js +{ "s" : "a\u0000b" } +~~~~~~~~~~ +`"a\u0000b"` 值的正确长度应该是 3。但 `strlen()` 会返回 1。 + +`GetStringLength()` 也可以提高性能,因为用户可能需要调用 `strlen()` 去分配缓冲。 + +此外,`std::string` 也支持这个构造函数: + +~~~~~~~~~~cpp +string(const char* s, size_t count); +~~~~~~~~~~ + +此构造函数接受字符串长度作为参数。它支持在字符串中存储空字符,也应该会有更好的性能。 + +## 比较两个 Value + +你可使用 `==` 及 `!=` 去比较两个 Value。当且仅当两个 Value 的类型及内容相同,它们才当作相等。你也可以比较 Value 和它的原生类型值。以下是一个例子。 + +~~~~~~~~~~cpp +if (document["hello"] == document["n"]) /*...*/; // 比较两个值 +if (document["hello"] == "world") /*...*/; // 与字符串字面量作比较 +if (document["i"] != 123) /*...*/; // 与整数作比较 +if (document["pi"] != 3.14) /*...*/; // 与 double 作比较 +~~~~~~~~~~ + +Array/Object 顺序以它们的元素/成员作比较。当且仅当它们的整个子树相等,它们才当作相等。 + +注意,现时若一个 Object 含有重复命名的成员,它与任何 Object 作比较都总会返回 `false`。 + +# 创建/修改值 {#CreateModifyValues} + +有多种方法去创建值。 当一个 DOM 树被创建或修改后,可使用 `Writer` 再次存储为 JSON。 + +## 改变 Value 类型 {#ChangeValueType} +当使用默认构造函数创建一个 Value 或 Document,它的类型便会是 Null。要改变其类型,需调用 `SetXXX()` 或赋值操作,例如: + +~~~~~~~~~~cpp +Document d; // Null +d.SetObject(); + +Value v; // Null +v.SetInt(10); +v = 10; // 简写,和上面的相同 +~~~~~~~~~~ + +### 构造函数的各个重载 +几个类型也有重载构造函数: + +~~~~~~~~~~cpp +Value b(true); // 调用 Value(bool) +Value i(-123); // 调用 Value(int) +Value u(123u); // 调用 Value(unsigned) +Value d(1.5); // 调用 Value(double) +~~~~~~~~~~ + +要重建空 Object 或 Array,可在默认构造函数后使用 `SetObject()`/`SetArray()`,或一次性使用 `Value(Type)`: + +~~~~~~~~~~cpp +Value o(kObjectType); +Value a(kArrayType); +~~~~~~~~~~ + +## 转移语义(Move Semantics) {#MoveSemantics} + +在设计 RapidJSON 时有一个非常特别的决定,就是 Value 赋值并不是把来源 Value 复制至目的 Value,而是把来源 Value 转移(move)至目的 Value。例如: + +~~~~~~~~~~cpp +Value a(123); +Value b(456); +b = a; // a 变成 Null,b 变成数字 123。 +~~~~~~~~~~ + +![使用移动语义赋值。](diagram/move1.png) + +为什么?此语义有何优点? + +最简单的答案就是性能。对于固定大小的 JSON 类型(Number、True、False、Null),复制它们是简单快捷。然而,对于可变大小的 JSON 类型(String、Array、Object),复制它们会产生大量开销,而且这些开销常常不被察觉。尤其是当我们需要创建临时 Object,把它复制至另一变量,然后再析构它。 + +例如,若使用正常 * 复制 * 语义: + +~~~~~~~~~~cpp +Value o(kObjectType); +{ + Value contacts(kArrayType); + // 把元素加进 contacts 数组。 + // ... + o.AddMember("contacts", contacts, d.GetAllocator()); // 深度复制 contacts (可能有大量内存分配) + // 析构 contacts。 +} +~~~~~~~~~~ + +![复制语义产生大量的复制操作。](diagram/move2.png) + +那个 `o` Object 需要分配一个和 contacts 相同大小的缓冲区,对 conacts 做深度复制,并最终要析构 contacts。这样会产生大量无必要的内存分配/释放,以及内存复制。 + +有一些方案可避免实质地复制这些数据,例如引用计数(reference counting)、垃圾回收(garbage collection, GC)。 + +为了使 RapidJSON 简单及快速,我们选择了对赋值采用 * 转移 * 语义。这方法与 `std::auto_ptr` 相似,都是在赋值时转移拥有权。转移快得多简单得多,只需要析构原来的 Value,把来源 `memcpy()` 至目标,最后把来源设置为 Null 类型。 + +因此,使用转移语义后,上面的例子变成: + +~~~~~~~~~~cpp +Value o(kObjectType); +{ + Value contacts(kArrayType); + // adding elements to contacts array. + o.AddMember("contacts", contacts, d.GetAllocator()); // 只需 memcpy() contacts 本身至新成员的 Value(16 字节) + // contacts 在这里变成 Null。它的析构是平凡的。 +} +~~~~~~~~~~ + +![转移语义不需复制。](diagram/move3.png) + +在 C++11 中这称为转移赋值操作(move assignment operator)。由于 RapidJSON 支持 C++03,它在赋值操作采用转移语义,其它修改型函数如 `AddMember()`, `PushBack()` 也采用转移语义。 + +### 转移语义及临时值 {#TemporaryValues} + +有时候,我们想直接构造一个 Value 并传递给一个“转移”函数(如 `PushBack()`、`AddMember()`)。由于临时对象是不能转换为正常的 Value 引用,我们加入了一个方便的 `Move()` 函数: + +~~~~~~~~~~cpp +Value a(kArrayType); +Document::AllocatorType& allocator = document.GetAllocator(); +// a.PushBack(Value(42), allocator); // 不能通过编译 +a.PushBack(Value().SetInt(42), allocator); // fluent API +a.PushBack(Value(42).Move(), allocator); // 和上一行相同 +~~~~~~~~~~ + +## 创建 String {#CreateString} +RapidJSON 提供两个 String 的存储策略。 + +1. copy-string: 分配缓冲区,然后把来源数据复制至它。 +2. const-string: 简单地储存字符串的指针。 + +Copy-string 总是安全的,因为它拥有数据的克隆。Const-string 可用于存储字符串字面量,以及用于在 DOM 一节中将会提到的 in-situ 解析中。 + +为了让用户自定义内存分配方式,当一个操作可能需要内存分配时,RapidJSON 要求用户传递一个 allocator 实例作为 API 参数。此设计避免了在每个 Value 存储 allocator(或 document)的指针。 + +因此,当我们把一个 copy-string 赋值时, 调用含有 allocator 的 `SetString()` 重载函数: + +~~~~~~~~~~cpp +Document document; +Value author; +char buffer[10]; +int len = sprintf(buffer, "%s %s", "Milo", "Yip"); // 动态创建的字符串。 +author.SetString(buffer, len, document.GetAllocator()); +memset(buffer, 0, sizeof(buffer)); +// 清空 buffer 后 author.GetString() 仍然包含 "Milo Yip" +~~~~~~~~~~ + +在此例子中,我们使用 `Document` 实例的 allocator。这是使用 RapidJSON 时常用的惯用法。但你也可以用其他 allocator 实例。 + +另外,上面的 `SetString()` 需要长度参数。这个 API 能处理含有空字符的字符串。另一个 `SetString()` 重载函数没有长度参数,它假设输入是空字符结尾的,并会调用类似 `strlen()` 的函数去获取长度。 + +最后,对于字符串字面量或有安全生命周期的字符串,可以使用 const-string 版本的 `SetString()`,它没有 +allocator 参数。对于字符串字面量(或字符数组常量),只需简单地传递字面量,又安全又高效: + +~~~~~~~~~~cpp +Value s; +s.SetString("rapidjson"); // 可包含空字符,长度在编译期推导 +s = "rapidjson"; // 上行的缩写 +~~~~~~~~~~ + +对于字符指针,RapidJSON 需要作一个标记,代表它不复制也是安全的。可以使用 `StringRef` 函数: + +~~~~~~~~~cpp +const char * cstr = getenv("USER"); +size_t cstr_len = ...; // 如果有长度 +Value s; +// s.SetString(cstr); // 这不能通过编译 +s.SetString(StringRef(cstr)); // 可以,假设它的生命周期安全,并且是以空字符结尾的 +s = StringRef(cstr); // 上行的缩写 +s.SetString(StringRef(cstr, cstr_len));// 更快,可处理空字符 +s = StringRef(cstr, cstr_len); // 上行的缩写 + +~~~~~~~~~ + +## 修改 Array {#ModifyArray} +Array 类型的 Value 提供与 `std::vector` 相似的 API。 + +* `Clear()` +* `Reserve(SizeType, Allocator&)` +* `Value& PushBack(Value&, Allocator&)` +* `template <typename T> GenericValue& PushBack(T, Allocator&)` +* `Value& PopBack()` +* `ValueIterator Erase(ConstValueIterator pos)` +* `ValueIterator Erase(ConstValueIterator first, ConstValueIterator last)` + +注意,`Reserve(...)` 及 `PushBack(...)` 可能会为数组元素分配内存,所以需要一个 allocator。 + +以下是 `PushBack()` 的例子: + +~~~~~~~~~~cpp +Value a(kArrayType); +Document::AllocatorType& allocator = document.GetAllocator(); + +for (int i = 5; i <= 10; i++) + a.PushBack(i, allocator); // 可能需要调用 realloc() 所以需要 allocator + +// 流畅接口(Fluent interface) +a.PushBack("Lua", allocator).PushBack("Mio", allocator); +~~~~~~~~~~ + +与 STL 不一样的是,`PushBack()`/`PopBack()` 返回 Array 本身的引用。这称为流畅接口(_fluent interface_)。 + +如果你想在 Array 中加入一个非常量字符串,或是一个没有足够生命周期的字符串(见 [Create String](#CreateString)),你需要使用 copy-string API 去创建一个 String。为了避免加入中间变量,可以就地使用一个 [临时值](#TemporaryValues): + +~~~~~~~~~~cpp +// 就地 Value 参数 +contact.PushBack(Value("copy", document.GetAllocator()).Move(), // copy string + document.GetAllocator()); + +// 显式 Value 参数 +Value val("key", document.GetAllocator()); // copy string +contact.PushBack(val, document.GetAllocator()); +~~~~~~~~~~ + +## 修改 Object {#ModifyObject} +Object 是键值对的集合。每个键必须为 String。要修改 Object,方法是增加或移除成员。以下的 API 用来增加成员: + +* `Value& AddMember(Value&, Value&, Allocator& allocator)` +* `Value& AddMember(StringRefType, Value&, Allocator&)` +* `template <typename T> Value& AddMember(StringRefType, T value, Allocator&)` + +以下是一个例子。 + +~~~~~~~~~~cpp +Value contact(kObject); +contact.AddMember("name", "Milo", document.GetAllocator()); +contact.AddMember("married", true, document.GetAllocator()); +~~~~~~~~~~ + +使用 `StringRefType` 作为 name 参数的重载版本与字符串的 `SetString` 的接口相似。 这些重载是为了避免复制 `name` 字符串,因为 JSON object 中经常会使用常数键名。 + +如果你需要从非常数字符串或生命周期不足的字符串创建键名(见 [创建 String](#CreateString)),你需要使用 copy-string API。为了避免中间变量,可以就地使用 [临时值](#TemporaryValues): + +~~~~~~~~~~cpp +// 就地 Value 参数 +contact.AddMember(Value("copy", document.GetAllocator()).Move(), // copy string + Value().Move(), // null value + document.GetAllocator()); + +// 显式参数 +Value key("key", document.GetAllocator()); // copy string name +Value val(42); // 某 Value +contact.AddMember(key, val, document.GetAllocator()); +~~~~~~~~~~ + +移除成员有几个选择: + +* `bool RemoveMember(const Ch* name)`:使用键名来移除成员(线性时间复杂度)。 +* `bool RemoveMember(const Value& name)`:除了 `name` 是一个 Value,和上一行相同。 +* `MemberIterator RemoveMember(MemberIterator)`:使用迭代器移除成员(_ 常数 _ 时间复杂度)。 +* `MemberIterator EraseMember(MemberIterator)`:和上行相似但维持成员次序(线性时间复杂度)。 +* `MemberIterator EraseMember(MemberIterator first, MemberIterator last)`:移除一个范围内的成员,维持次序(线性时间复杂度)。 + +`MemberIterator RemoveMember(MemberIterator)` 使用了“转移最后”手法来达成常数时间复杂度。基本上就是析构迭代器位置的成员,然后把最后的成员转移至迭代器位置。因此,成员的次序会被改变。 + +## 深复制 Value {#DeepCopyValue} +若我们真的要复制一个 DOM 树,我们可使用两个 APIs 作深复制:含 allocator 的构造函数及 `CopyFrom()`。 + +~~~~~~~~~~cpp +Document d; +Document::AllocatorType& a = d.GetAllocator(); +Value v1("foo"); +// Value v2(v1); // 不容许 + +Value v2(v1, a); // 制造一个克隆 +assert(v1.IsString()); // v1 不变 +d.SetArray().PushBack(v1, a).PushBack(v2, a); +assert(v1.IsNull() && v2.IsNull()); // 两个都转移动 d + +v2.CopyFrom(d, a); // 把整个 document 复制至 v2 +assert(d.IsArray() && d.Size() == 2); // d 不变 +v1.SetObject().AddMember("array", v2, a); +d.PushBack(v1, a); +~~~~~~~~~~ + +## 交换 Value {#SwapValues} + +RapidJSON 也提供 `Swap()`。 + +~~~~~~~~~~cpp +Value a(123); +Value b("Hello"); +a.Swap(b); +assert(a.IsString()); +assert(b.IsInt()); +~~~~~~~~~~ + +无论两棵 DOM 树有多复杂,交换是很快的(常数时间)。 + +# 下一部分 {#WhatsNext} + +本教程展示了如何询查及修改 DOM 树。RapidJSON 还有一个重要概念: + +1. [流](doc/stream.zh-cn.md) 是读写 JSON 的通道。流可以是内存字符串、文件流等。用户也可以自定义流。 +2. [编码](doc/encoding.zh-cn.md) 定义在流或内存中使用的字符编码。RapidJSON 也在内部提供 Unicode 转换及校验功能。 +3. [DOM](doc/dom.zh-cn.md) 的基本功能已在本教程里介绍。还有更高级的功能,如原位(*in situ*)解析、其他解析选项及高级用法。 +4. [SAX](doc/sax.zh-cn.md) 是 RapidJSON 解析/生成功能的基础。学习使用 `Reader`/`Writer` 去实现更高性能的应用程序。也可以使用 `PrettyWriter` 去格式化 JSON。 +5. [性能](doc/performance.zh-cn.md) 展示一些我们做的及第三方的性能测试。 +6. [技术内幕](doc/internals.md) 讲述一些 RapidJSON 内部的设计及技术。 + +你也可以参考 [常见问题](doc/faq.zh-cn.md)、API 文档、例子及单元测试。 diff --git a/src/s3select/rapidjson/docker/debian/Dockerfile b/src/s3select/rapidjson/docker/debian/Dockerfile new file mode 100644 index 000000000..76f0235e5 --- /dev/null +++ b/src/s3select/rapidjson/docker/debian/Dockerfile @@ -0,0 +1,8 @@ +# BUILD: docker build -t rapidjson-debian . +# RUN: docker run -it -v "$PWD"/../..:/rapidjson rapidjson-debian + +FROM debian:jessie + +RUN apt-get update && apt-get install -y g++ cmake doxygen valgrind + +ENTRYPOINT ["/bin/bash"] |