diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..b8f8fd65 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,73 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[CMakeLists.txt] +indent_size = 2 + +[cmake/**] +indent_size = 2 + +[Makefile] +indent_style = tab + +[*.md] +indent_size = 2 +trim_trailing_whitespace = false + +[docs/mkdocs/docs/**.md] +indent_size = 4 + +[*.{yml,yaml}] +indent_size = 2 + +[{.clang-format,.clang-tidy}] +indent_size = 2 + +# Test data must not be auto-changed. +[test/unit_test/test_data/**] +indent_style = unset +indent_size = unset +tab_width = unset +charset = unset +end_of_line = unset +trim_trailing_whitespace = false +insert_final_newline = false + +# Leave the endings of the file header template files untouched +[.reuse/templates/*.jinja2] +insert_final_newline = false + +# Leave the endings of the example output files untouched +[docs/examples/*.output] +trim_trailing_whitespace = false +insert_final_newline = false + +# Unset all the settings for build directories +[build*/**] +indent_style = unset +indent_size = unset +tab_width = unset +end_of_line = unset +charset = unset +trim_trailing_whitespace = unset +insert_final_newline = unset + +# Unset all the settings for third party files +[thirdparty/**] +indent_style = unset +indent_size = unset +tab_width = unset +end_of_line = unset +charset = unset +trim_trailing_whitespace = unset +insert_final_newline = unset diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 595408bf..9b2abe74 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -10,7 +10,7 @@ body: Make sure you give it a short and specific **title** so that the report is searchable and uniquely identifiable for developers. Note that this form is for bug reports only. Please [open a discussion](https://github.com/fktn-k/fkYAML/discussions/new) - for questions, feature requests, or support requests. + for questions, feature requests, or support requests. - type: textarea id: summary attributes: @@ -26,7 +26,7 @@ body: attributes: label: Reproduction steps description: > - How do you trigger the bug? + How do you trigger the bug? Let the developers who see this report reproduce your situation as you do. Please describe your reproduction steps as specifically as possible. validations: @@ -37,7 +37,7 @@ body: label: Expected vs. actual results description: > Please describe what you expected to happen after the steps above and - what actually happened. + what actually happened. validations: required: true - type: textarea @@ -73,9 +73,9 @@ body: attributes: label: Library version description: > - Which version of the library did you use? - If it is a released version, please enter the version number. - Otherwise, please enter the commit hash. + Which version of the library did you use? + If it is a released version, please enter the version number. + Otherwise, please enter the commit hash. If you got the library from another source as the GitHub repository (e.g., via a package manager), please also state the source as well. validations: diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index e41a8c67..6e00abd8 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,4 +2,4 @@ blank_issues_enabled: false contact_links: - name: Ask a question url: https://github.com/fktn-k/fkYAML/discussions - about: Ask questions and discuss with other community members. \ No newline at end of file + about: Ask questions and discuss with other community members. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 84bf1631..f0641715 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,4 +15,4 @@ Read the [CONTRIBUTING.md](https://github.com/fktn-k/fkYAML/blob/develop/CONTRIB - The C++11 support varies between different **compilers** and versions. Please note the [list of supported compilers](https://github.com/fktn-k/fkYAML/blob/develop/README.md#supported-compilers). Some compilers like GCC 4.7 (and earlier), Clang 3.3 (and earlier), or Microsoft Visual Studio 13.0 and earlier are known not to work due to missing or incomplete C++11 support. Please refrain from proposing changes that work around these compiler's limitations with `#ifdef`s or other means. - Please refrain from proposing changes that would **break [YAML](https://yaml.org/) specifications**. If you propose a conformant extension of YAML to be supported by the library, please motivate this extension. -- Please do not open pull requests that address **multiple issues**. \ No newline at end of file +- Please do not open pull requests that address **multiple issues**. diff --git a/.github/workflows/amalgamation_check.yml b/.github/workflows/amalgamation_check.yml index 841d008b..f75f6315 100644 --- a/.github/workflows/amalgamation_check.yml +++ b/.github/workflows/amalgamation_check.yml @@ -28,7 +28,7 @@ jobs: timeout-minutes: 10 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check if the amalgamated header file is up-to-date. run: make check-amalgamate diff --git a/.github/workflows/clang_format_check.yml b/.github/workflows/clang_format_check.yml index 0f6c1cab..2bd25b78 100644 --- a/.github/workflows/clang_format_check.yml +++ b/.github/workflows/clang_format_check.yml @@ -33,7 +33,7 @@ jobs: - test/unit_test steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index f8e246c4..4f929e1d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -36,13 +36,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) # - name: Autobuild - # uses: github/codeql-action/autobuild@v2 + # uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,6 +69,6 @@ jobs: cmake --build ${{github.workspace}}/build --config Release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index eaca5005..1b44749b 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -26,9 +26,11 @@ jobs: coverage: timeout-minutes: 10 runs-on: ubuntu-latest + permissions: + pull-requests: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -39,11 +41,46 @@ jobs: lcov -v - name: Generate coverage - run: make lcov-coverage + run: make html-coverage - name: Upload coverage to Coveralls uses: coverallsapp/github-action@v2 with: github-token: ${{secrets.GITHUB_TOKEN}} file: ${{github.workspace}}/build_coverage/coverage/fkYAML.info - format: lcov \ No newline at end of file + format: lcov + + - name: Decide the artifact name + id: create_zip + if: github.event_name == 'pull_request' + env: + PR_NUMBER: ${{github.event.number}} + run: | + echo "artifact name: fkYAML_coverage.pr${{env.PR_NUMBER}}" + echo "artifact_name=fkYAML_coverage.pr${{env.PR_NUMBER}}" >> $GITHUB_OUTPUT + + - name: Upload coverage as an artifact + id: upload_artifact_step + if: steps.create_zip.conclusion == 'success' + uses: actions/upload-artifact@v4 + with: + name: ${{steps.create_zip.outputs.artifact_name}} + path: | + ${{github.workspace}}/build_coverage/coverage/* + ${{github.workspace}}/build_coverage/html/** + overwrite: true + + - name: Notify the artifact URL + if: steps.upload_artifact_step.conclusion == 'success' + uses: thollander/actions-comment-pull-request@v2 + with: + message: | + ## :octocat: Upload Coverage Event Notification + Coverage data has been uploaded for the commit [${{github.event.pull_request.head.sha}}](https://github.com/${{github.repository}}/commit/${{github.event.pull_request.head.sha}}). + You can download the artifact which contains the same file uploaded to the Coveralls and its HTML version. + + | Name | ${{steps.create_zip.outputs.artifact_name}}.zip | + |:-----|:-----------------------------------------------------| + | ID | ${{steps.upload_artifact_step.outputs.artifact-id}} | + | URL | ${{steps.upload_artifact_step.outputs.artifact-url}} | + comment_tag: notification diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 5cdb775c..7d5c8555 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -39,7 +39,7 @@ jobs: single_header: ["ON", "OFF"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -64,7 +64,7 @@ jobs: DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -89,7 +89,7 @@ jobs: DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -114,7 +114,7 @@ jobs: DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml index dca8ac02..3bc0dfcb 100644 --- a/.github/workflows/publish_docs.yml +++ b/.github/workflows/publish_docs.yml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -40,10 +40,10 @@ jobs: run: make -C docs/mkdocs build - name: Setup Pages - uses: actions/configure-pages@v3 + uses: actions/configure-pages@v4 - name: Upload API documents - uses: actions/upload-pages-artifact@v2 + uses: actions/upload-pages-artifact@v3 with: path: ${{github.workspace}}/docs/mkdocs/site @@ -58,4 +58,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v2 + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/release_package.yml b/.github/workflows/release_package.yml index 6220820f..73a3de5b 100644 --- a/.github/workflows/release_package.yml +++ b/.github/workflows/release_package.yml @@ -2,7 +2,7 @@ name: ReleasePackage on: push: - branches: + branches: - main workflow_dispatch: @@ -17,19 +17,19 @@ jobs: include: - os: ubuntu-latest artifact_name: fkYAML.tgz - single_header: "ON" + single_header: "OFF" - os: ubuntu-latest artifact_name: fkYAML_single_header.tgz - single_header: "OFF" + single_header: "ON" - os: windows-latest artifact_name: fkYAML.zip - single_header: "ON" + single_header: "OFF" - os: windows-latest artifact_name: fkYAML_single_header.zip - single_header: "OFF" + single_header: "ON" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -54,7 +54,7 @@ jobs: ls - name: Upload Artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ${{matrix.artifact_name}} path: ${{matrix.artifact_name}} diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 57d9485e..03dc18a5 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -45,7 +45,7 @@ jobs: use_single_header: ["ON", "OFF"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -64,7 +64,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -76,7 +76,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -93,7 +93,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -118,7 +118,7 @@ jobs: apt-get install -y git unzip git --version - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -143,7 +143,7 @@ jobs: apt-get install -y git unzip git --version - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -174,7 +174,7 @@ jobs: build_type: [ Debug, Release ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -211,7 +211,7 @@ jobs: apt-get install -y git unzip git --version - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -248,7 +248,7 @@ jobs: apt-get install -y git unzip git --version - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f5828fbd..fc22059f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -39,7 +39,7 @@ jobs: single_header: ["ON", "OFF"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -62,7 +62,7 @@ jobs: single_header: ["ON", "OFF"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -85,7 +85,7 @@ jobs: build_type: [ Debug, Release ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive @@ -97,4 +97,4 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -j ${{env.JOBS}} \ No newline at end of file + run: ctest -C ${{matrix.build_type}} --output-on-failure -j ${{env.JOBS}} diff --git a/.reuse/README.md b/.reuse/README.md index ef505136..de6a8ce7 100644 --- a/.reuse/README.md +++ b/.reuse/README.md @@ -8,4 +8,4 @@ $ cd path/to/fkYAML $ bash reuse.sh ``` -See for more information. \ No newline at end of file +See for more information. diff --git a/.reuse/templates/fkYAML.commented.jinja2 b/.reuse/templates/fkYAML.commented.jinja2 index 70af554a..07e2d07b 100644 --- a/.reuse/templates/fkYAML.commented.jinja2 +++ b/.reuse/templates/fkYAML.commented.jinja2 @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// {% for copyright_line in copyright_lines %} diff --git a/.reuse/templates/fkYAML_support.jinja2 b/.reuse/templates/fkYAML_support.jinja2 index 93f49a9a..616082b1 100644 --- a/.reuse/templates/fkYAML_support.jinja2 +++ b/.reuse/templates/fkYAML_support.jinja2 @@ -1,6 +1,6 @@ _______ __ __ __ _____ __ __ __ | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -| __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +| __| _ < \_ _/| ___ | _ | |___ version 0.3.3 |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML {% for copyright_line in copyright_lines %} diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cd5191f..c39e7fdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,44 +1,62 @@ # Changelog +## [v0.3.3](https://github.com/fktn-k/fkYAML/releases/tag/v0.3.3) (2024-03-31) + +[Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.3.2...v0.3.3) + +- \#302 Fixed parse error on alias mapping keys [\#303](https://github.com/fktn-k/fkYAML/pull/303) ([fktn-k](https://github.com/fktn-k)) +- \#292 Better handling for flow indicators in permitted scalar contexts [\#293](https://github.com/fktn-k/fkYAML/pull/293) ([stephenwhittle](https://github.com/stephenwhittle)) +- \#288 Fixed incorrect parse results from mapping entries split across newlines [\#289](https://github.com/fktn-k/fkYAML/pull/289) ([fktn-k](https://github.com/fktn-k)) + +- Further improvements of input handlings [\#301](https://github.com/fktn-k/fkYAML/pull/301) ([fktn-k](https://github.com/fktn-k)) +- Fixed warnings and made future warnings as errors [\#300](https://github.com/fktn-k/fkYAML/pull/300) ([fktn-k](https://github.com/fktn-k)) +- \#298 Add at\(\) API to the basic\_node class [\#299](https://github.com/fktn-k/fkYAML/pull/299) ([fktn-k](https://github.com/fktn-k)) +- Improve handling UTF encoded inputs [\#296](https://github.com/fktn-k/fkYAML/pull/296) ([fktn-k](https://github.com/fktn-k)) +- \#297 Add a note for checking the coverage & upload the same as an artifact [\#295](https://github.com/fktn-k/fkYAML/pull/295) ([fktn-k](https://github.com/fktn-k)) +- modied the way of formatting error messages for exception objects [\#291](https://github.com/fktn-k/fkYAML/pull/291) ([fktn-k](https://github.com/fktn-k)) +- Resolve warnings against using Node.js 16 [\#290](https://github.com/fktn-k/fkYAML/pull/290) ([fktn-k](https://github.com/fktn-k)) +- Add .editorconfig file [\#287](https://github.com/fktn-k/fkYAML/pull/287) ([fktn-k](https://github.com/fktn-k)) + ## [v0.3.2](https://github.com/fktn-k/fkYAML/releases/tag/v0.3.2) (2024-03-17) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.3.1...v0.3.2) -- Clean up CMakeLists files [\#280](https://github.com/fktn-k/fkYAML/pull/280) ([fktn-k](https://github.com/fktn-k)) -- Add workflow jobs with the macos-13 runner image [\#274](https://github.com/fktn-k/fkYAML/pull/274) ([fktn-k](https://github.com/fktn-k)) -- Modify handling node properties [\#270](https://github.com/fktn-k/fkYAML/pull/270) ([fktn-k](https://github.com/fktn-k)) -- \#237 Support char8\_t in deserialization [\#269](https://github.com/fktn-k/fkYAML/pull/269) ([fktn-k](https://github.com/fktn-k)) - - \#281 Fixed the parser crash due to comments right after a sequence block key [\#284](https://github.com/fktn-k/fkYAML/pull/284) ([fktn-k](https://github.com/fktn-k)) - \#282 Don't traverse up to the parent node immediately after parsing a flow-mapping value [\#283](https://github.com/fktn-k/fkYAML/pull/283) ([stephenwhittle](https://github.com/stephenwhittle)) - \#277 Fixed incorrect parse result from plain scalars starting with special values [\#278](https://github.com/fktn-k/fkYAML/pull/278) ([fktn-k](https://github.com/fktn-k)) - \#275 Fixed parse error on plain scalars containing flow indicators [\#276](https://github.com/fktn-k/fkYAML/pull/276) ([fktn-k](https://github.com/fktn-k)) - \#272 Fix parse error on a block sequence containing a comment within [\#273](https://github.com/fktn-k/fkYAML/pull/273) ([fktn-k](https://github.com/fktn-k)) +- Clean up CMakeLists files [\#280](https://github.com/fktn-k/fkYAML/pull/280) ([fktn-k](https://github.com/fktn-k)) - Updated documents [\#279](https://github.com/fktn-k/fkYAML/pull/279) ([fktn-k](https://github.com/fktn-k)) +- Add workflow jobs with the macos-13 runner image [\#274](https://github.com/fktn-k/fkYAML/pull/274) ([fktn-k](https://github.com/fktn-k)) - Updated copyright year [\#271](https://github.com/fktn-k/fkYAML/pull/271) ([fktn-k](https://github.com/fktn-k)) +- Modify handling node properties [\#270](https://github.com/fktn-k/fkYAML/pull/270) ([fktn-k](https://github.com/fktn-k)) +- \#237 Support char8\_t in deserialization [\#269](https://github.com/fktn-k/fkYAML/pull/269) ([fktn-k](https://github.com/fktn-k)) ## [v0.3.1](https://github.com/fktn-k/fkYAML/releases/tag/v0.3.1) (2023-12-21) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.3.0...v0.3.1) -- \#229 build example code and use output [\#264](https://github.com/fktn-k/fkYAML/pull/264) ([fktn-k](https://github.com/fktn-k)) -- Added paths filter to regulate workflow executions [\#263](https://github.com/fktn-k/fkYAML/pull/263) ([fktn-k](https://github.com/fktn-k)) -- \#255 serve single header file [\#261](https://github.com/fktn-k/fkYAML/pull/261) ([fktn-k](https://github.com/fktn-k)) -- \#256 Added specialization of from\_node\(\) for std::map with compatible key/value types [\#260](https://github.com/fktn-k/fkYAML/pull/260) ([fktn-k](https://github.com/fktn-k)) -- Allow double quotation marks in plain scalars [\#253](https://github.com/fktn-k/fkYAML/pull/253) ([fktn-k](https://github.com/fktn-k)) - - Avoid using unintended C++14 features [\#266](https://github.com/fktn-k/fkYAML/pull/266) ([fktn-k](https://github.com/fktn-k)) - Allow a dash\(-\) at the first character of a plain scalar [\#254](https://github.com/fktn-k/fkYAML/pull/254) ([fktn-k](https://github.com/fktn-k)) - \#242 Fix error in parsing string consisting only of numbers and multiple dots [\#251](https://github.com/fktn-k/fkYAML/pull/251) ([fktn-k](https://github.com/fktn-k)) - fixed typos which needs to be target\_link\_libraries\(\) in tutorials [\#267](https://github.com/fktn-k/fkYAML/pull/267) ([fktn-k](https://github.com/fktn-k)) - \#229 Fix the documentation build errors [\#265](https://github.com/fktn-k/fkYAML/pull/265) ([fktn-k](https://github.com/fktn-k)) +- \#229 build example code and use output [\#264](https://github.com/fktn-k/fkYAML/pull/264) ([fktn-k](https://github.com/fktn-k)) +- Added paths filter to regulate workflow executions [\#263](https://github.com/fktn-k/fkYAML/pull/263) ([fktn-k](https://github.com/fktn-k)) +- \#255 serve single header file [\#261](https://github.com/fktn-k/fkYAML/pull/261) ([fktn-k](https://github.com/fktn-k)) +- \#256 Added specialization of from\_node\(\) for std::map with compatible key/value types [\#260](https://github.com/fktn-k/fkYAML/pull/260) ([fktn-k](https://github.com/fktn-k)) +- Allow double quotation marks in plain scalars [\#253](https://github.com/fktn-k/fkYAML/pull/253) ([fktn-k](https://github.com/fktn-k)) ## [v0.3.0](https://github.com/fktn-k/fkYAML/releases/tag/v0.3.0) (2023-12-10) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.2.3...v0.3.0) +- Fixed missing callings of `fclose()` in the unit tests [\#246](https://github.com/fktn-k/fkYAML/pull/246) ([fktn-k](https://github.com/fktn-k)) + +- Update copyright description [\#249](https://github.com/fktn-k/fkYAML/pull/249) ([fktn-k](https://github.com/fktn-k)) - Add GCC compiler versions tested in GA workflows [\#248](https://github.com/fktn-k/fkYAML/pull/248) ([fktn-k](https://github.com/fktn-k)) - \#240 Support non-string-scalar node keys in basic\_node ctor with std::initializer\_list [\#247](https://github.com/fktn-k/fkYAML/pull/247) ([fktn-k](https://github.com/fktn-k)) - \#240 Support explicit block mappings & non-scalar nodes as mapping keys [\#245](https://github.com/fktn-k/fkYAML/pull/245) ([fktn-k](https://github.com/fktn-k)) @@ -46,63 +64,50 @@ - \#240 implement node comparison [\#243](https://github.com/fktn-k/fkYAML/pull/243) ([fktn-k](https://github.com/fktn-k)) - Feature/238 apply correct noexcept [\#241](https://github.com/fktn-k/fkYAML/pull/241) ([fktn-k](https://github.com/fktn-k)) -- Fixed missing callings of `fclose()` in the unit tests [\#246](https://github.com/fktn-k/fkYAML/pull/246) ([fktn-k](https://github.com/fktn-k)) - -- Update copyright description [\#249](https://github.com/fktn-k/fkYAML/pull/249) ([fktn-k](https://github.com/fktn-k)) - ## [v0.2.3](https://github.com/fktn-k/fkYAML/releases/tag/v0.2.3) (2023-12-03) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.2.2...v0.2.3) -- Feature/232 user defined literal for deserialization [\#235](https://github.com/fktn-k/fkYAML/pull/235) ([fktn-k](https://github.com/fktn-k)) -- \#159 Support block scalar styles [\#228](https://github.com/fktn-k/fkYAML/pull/228) ([fktn-k](https://github.com/fktn-k)) -- Feature/226 add natvis file [\#227](https://github.com/fktn-k/fkYAML/pull/227) ([fktn-k](https://github.com/fktn-k)) - - fixed parsing UTF-16 encoded inputs which do not end with a surrogate… [\#234](https://github.com/fktn-k/fkYAML/pull/234) ([fktn-k](https://github.com/fktn-k)) +- Feature/232 user defined literal for deserialization [\#235](https://github.com/fktn-k/fkYAML/pull/235) ([fktn-k](https://github.com/fktn-k)) - Improved contribution guide [\#231](https://github.com/fktn-k/fkYAML/pull/231) ([fktn-k](https://github.com/fktn-k)) - Removed dependency on the cmake-format tool [\#230](https://github.com/fktn-k/fkYAML/pull/230) ([fktn-k](https://github.com/fktn-k)) +- \#159 Support block scalar styles [\#228](https://github.com/fktn-k/fkYAML/pull/228) ([fktn-k](https://github.com/fktn-k)) +- Feature/226 add natvis file [\#227](https://github.com/fktn-k/fkYAML/pull/227) ([fktn-k](https://github.com/fktn-k)) - Fixed wrong URLs in the releases page of the documentation [\#225](https://github.com/fktn-k/fkYAML/pull/225) ([fktn-k](https://github.com/fktn-k)) ## [v0.2.2](https://github.com/fktn-k/fkYAML/releases/tag/v0.2.2) (2023-11-27) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.2.1...v0.2.2) +- \#218 Fixed lacking quotes & escapes during serializtion [\#220](https://github.com/fktn-k/fkYAML/pull/220) ([fktn-k](https://github.com/fktn-k)) +- \#217 Fix parse errors on strings containing single quotes [\#219](https://github.com/fktn-k/fkYAML/pull/219) ([fktn-k](https://github.com/fktn-k)) + - \#209 Support auto-detection of Unicode encoding types including BOMs [\#223](https://github.com/fktn-k/fkYAML/pull/223) ([fktn-k](https://github.com/fktn-k)) - \#214 Emit more descriptive errors [\#222](https://github.com/fktn-k/fkYAML/pull/222) ([fktn-k](https://github.com/fktn-k)) - \#209 support UTF-16 / UTF-32 for deserialization input characters [\#221](https://github.com/fktn-k/fkYAML/pull/221) ([fktn-k](https://github.com/fktn-k)) -- \#218 Fixed lacking quotes & escapes during serializtion [\#220](https://github.com/fktn-k/fkYAML/pull/220) ([fktn-k](https://github.com/fktn-k)) -- \#217 Fix parse errors on strings containing single quotes [\#219](https://github.com/fktn-k/fkYAML/pull/219) ([fktn-k](https://github.com/fktn-k)) - ## [v0.2.1](https://github.com/fktn-k/fkYAML/releases/tag/v0.2.1) (2023-11-19) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.2.0...v0.2.1) -- \#209 support escaped unicode [\#215](https://github.com/fktn-k/fkYAML/pull/215) ([fktn-k](https://github.com/fktn-k)) -- \#209 Support unescaped UTF-8 input characters in deserialization [\#210](https://github.com/fktn-k/fkYAML/pull/210) ([fktn-k](https://github.com/fktn-k)) -- \#207 Support markers for the end of directives/documents [\#208](https://github.com/fktn-k/fkYAML/pull/208) ([fktn-k](https://github.com/fktn-k)) -- \#190 Support specialization of deserialization for a vector of user-defined type objects [\#203](https://github.com/fktn-k/fkYAML/pull/203) ([fktn-k](https://github.com/fktn-k)) -- \#195 Implement insertion/extraction operators for basic\_node template class [\#201](https://github.com/fktn-k/fkYAML/pull/201) ([fktn-k](https://github.com/fktn-k)) - - \#211 Fix indentation handling [\#213](https://github.com/fktn-k/fkYAML/pull/213) ([fktn-k](https://github.com/fktn-k)) - \#211 fixed bug in parsing single quoted strings [\#212](https://github.com/fktn-k/fkYAML/pull/212) ([fktn-k](https://github.com/fktn-k)) - \#205 Detect duplicate keys as an error [\#206](https://github.com/fktn-k/fkYAML/pull/206) ([fktn-k](https://github.com/fktn-k)) - \#200 Allow a space in unquoted strings [\#202](https://github.com/fktn-k/fkYAML/pull/202) ([fktn-k](https://github.com/fktn-k)) +- \#209 support escaped unicode [\#215](https://github.com/fktn-k/fkYAML/pull/215) ([fktn-k](https://github.com/fktn-k)) +- \#209 Support unescaped UTF-8 input characters in deserialization [\#210](https://github.com/fktn-k/fkYAML/pull/210) ([fktn-k](https://github.com/fktn-k)) +- \#207 Support markers for the end of directives/documents [\#208](https://github.com/fktn-k/fkYAML/pull/208) ([fktn-k](https://github.com/fktn-k)) - Unified doxygen comment style [\#204](https://github.com/fktn-k/fkYAML/pull/204) ([fktn-k](https://github.com/fktn-k)) +- \#190 Support specialization of deserialization for a vector of user-defined type objects [\#203](https://github.com/fktn-k/fkYAML/pull/203) ([fktn-k](https://github.com/fktn-k)) +- \#195 Implement insertion/extraction operators for basic\_node template class [\#201](https://github.com/fktn-k/fkYAML/pull/201) ([fktn-k](https://github.com/fktn-k)) ## [v0.2.0](https://github.com/fktn-k/fkYAML/releases/tag/v0.2.0) (2023-11-06) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.1.3...v0.2.0) -- \#187 support compatible strings as an argument in basic\_node ctors [\#188](https://github.com/fktn-k/fkYAML/pull/188) ([fktn-k](https://github.com/fktn-k)) -- \#180 Generalize getter APIs for node value references [\#184](https://github.com/fktn-k/fkYAML/pull/184) ([fktn-k](https://github.com/fktn-k)) -- \#180 clean up public apis [\#183](https://github.com/fktn-k/fkYAML/pull/183) ([fktn-k](https://github.com/fktn-k)) -- \#177 Add test for input adapters only [\#178](https://github.com/fktn-k/fkYAML/pull/178) ([fktn-k](https://github.com/fktn-k)) -- \#167 Use std::map as the default mapping node type [\#174](https://github.com/fktn-k/fkYAML/pull/174) ([fktn-k](https://github.com/fktn-k)) -- \#105 Support std::initializer\_list in basic\_node constructors [\#173](https://github.com/fktn-k/fkYAML/pull/173) ([fktn-k](https://github.com/fktn-k)) - - \#185 fixed missing calls for fclose\(\) in input adapter tests [\#186](https://github.com/fktn-k/fkYAML/pull/186) ([fktn-k](https://github.com/fktn-k)) - \#175 support detecting indentation in deserialization [\#176](https://github.com/fktn-k/fkYAML/pull/176) ([fktn-k](https://github.com/fktn-k)) @@ -112,7 +117,13 @@ - \#179 Migrate the gh-page content with MkDocs [\#193](https://github.com/fktn-k/fkYAML/pull/193) ([fktn-k](https://github.com/fktn-k)) - \#179 Created tutorial pages [\#192](https://github.com/fktn-k/fkYAML/pull/192) ([fktn-k](https://github.com/fktn-k)) - \#179 Migrate API docs with MkDocs [\#189](https://github.com/fktn-k/fkYAML/pull/189) ([fktn-k](https://github.com/fktn-k)) +- \#187 support compatible strings as an argument in basic\_node ctors [\#188](https://github.com/fktn-k/fkYAML/pull/188) ([fktn-k](https://github.com/fktn-k)) +- \#180 Generalize getter APIs for node value references [\#184](https://github.com/fktn-k/fkYAML/pull/184) ([fktn-k](https://github.com/fktn-k)) +- \#180 clean up public apis [\#183](https://github.com/fktn-k/fkYAML/pull/183) ([fktn-k](https://github.com/fktn-k)) - \#179 introduce mkdocs for documentation [\#182](https://github.com/fktn-k/fkYAML/pull/182) ([fktn-k](https://github.com/fktn-k)) +- \#177 Add test for input adapters only [\#178](https://github.com/fktn-k/fkYAML/pull/178) ([fktn-k](https://github.com/fktn-k)) +- \#167 Use std::map as the default mapping node type [\#174](https://github.com/fktn-k/fkYAML/pull/174) ([fktn-k](https://github.com/fktn-k)) +- \#105 Support std::initializer\_list in basic\_node constructors [\#173](https://github.com/fktn-k/fkYAML/pull/173) ([fktn-k](https://github.com/fktn-k)) ## [v0.1.3](https://github.com/fktn-k/fkYAML/releases/tag/v0.1.3) (2023-10-21) @@ -123,13 +134,15 @@ - \#164 Use default initial values for class member variables [\#168](https://github.com/fktn-k/fkYAML/pull/168) ([fktn-k](https://github.com/fktn-k)) - \#18 Resolve warnings while building library/tests [\#165](https://github.com/fktn-k/fkYAML/pull/165) ([fktn-k](https://github.com/fktn-k)) - \#145 Expand swap\(\) support for basic node [\#163](https://github.com/fktn-k/fkYAML/pull/163) ([fktn-k](https://github.com/fktn-k)) - - \#160 Added the Codacy badge to README.md [\#162](https://github.com/fktn-k/fkYAML/pull/162) ([fktn-k](https://github.com/fktn-k)) ## [v0.1.2](https://github.com/fktn-k/fkYAML/releases/tag/v0.1.2) (2023-10-18) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.1.1...v0.1.2) +- Fix version\_macros.hpp file path in Makefile [\#147](https://github.com/fktn-k/fkYAML/pull/147) ([fktn-k](https://github.com/fktn-k)) + +- Updated documents [\#156](https://github.com/fktn-k/fkYAML/pull/156) ([fktn-k](https://github.com/fktn-k)) - \#150 Added tests for basic\_deserializer class [\#155](https://github.com/fktn-k/fkYAML/pull/155) ([fktn-k](https://github.com/fktn-k)) - \#150 added/modified unit tests for lexical\_analyzer test [\#154](https://github.com/fktn-k/fkYAML/pull/154) ([fktn-k](https://github.com/fktn-k)) - \#150 Covered a missing branch in ordered\_map class [\#153](https://github.com/fktn-k/fkYAML/pull/153) ([fktn-k](https://github.com/fktn-k)) @@ -137,10 +150,6 @@ - \#150 Add unit tests for input\_handler class [\#151](https://github.com/fktn-k/fkYAML/pull/151) ([fktn-k](https://github.com/fktn-k)) - \#133 refactor lexer [\#146](https://github.com/fktn-k/fkYAML/pull/146) ([fktn-k](https://github.com/fktn-k)) -- Fix version\_macros.hpp file path in Makefile [\#147](https://github.com/fktn-k/fkYAML/pull/147) ([fktn-k](https://github.com/fktn-k)) - -- Updated documents [\#156](https://github.com/fktn-k/fkYAML/pull/156) ([fktn-k](https://github.com/fktn-k)) - ## [v0.1.1](https://github.com/fktn-k/fkYAML/releases/tag/v0.1.1) (2023-10-15) [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.1.0...v0.1.1) @@ -149,16 +158,15 @@ [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.0.1...v0.1.0) +- \#128 fixed filtering source files to generate full-spec coverage [\#140](https://github.com/fktn-k/fkYAML/pull/140) ([fktn-k](https://github.com/fktn-k)) + +- \#127 Update examples in readme [\#141](https://github.com/fktn-k/fkYAML/pull/141) ([fktn-k](https://github.com/fktn-k)) - \#136 Auto-generate a helper source file for the clang-tidy tool [\#139](https://github.com/fktn-k/fkYAML/pull/139) ([fktn-k](https://github.com/fktn-k)) - \#135 optimize build scripts [\#138](https://github.com/fktn-k/fkYAML/pull/138) ([fktn-k](https://github.com/fktn-k)) - \#132 Classify source files in detail directory [\#137](https://github.com/fktn-k/fkYAML/pull/137) ([fktn-k](https://github.com/fktn-k)) - \#126 Generalize serialization/deserialization features [\#134](https://github.com/fktn-k/fkYAML/pull/134) ([fktn-k](https://github.com/fktn-k)) -- \#127 Generalize conversions between nodes and native data [\#129](https://github.com/fktn-k/fkYAML/pull/129) ([fktn-k](https://github.com/fktn-k)) - -- \#128 fixed filtering source files to generate full-spec coverage [\#140](https://github.com/fktn-k/fkYAML/pull/140) ([fktn-k](https://github.com/fktn-k)) - -- \#127 Update examples in readme [\#141](https://github.com/fktn-k/fkYAML/pull/141) ([fktn-k](https://github.com/fktn-k)) - \#128 moved internal impl to detail dir/namespace [\#131](https://github.com/fktn-k/fkYAML/pull/131) ([fktn-k](https://github.com/fktn-k)) +- \#127 Generalize conversions between nodes and native data [\#129](https://github.com/fktn-k/fkYAML/pull/129) ([fktn-k](https://github.com/fktn-k)) - \#123 Changed file naming convention [\#125](https://github.com/fktn-k/fkYAML/pull/125) ([fktn-k](https://github.com/fktn-k)) - \#123 Change naming conventions to lower\_snake\_case [\#124](https://github.com/fktn-k/fkYAML/pull/124) ([fktn-k](https://github.com/fktn-k)) @@ -166,14 +174,13 @@ [Full Changelog](https://github.com/fktn-k/fkYAML/compare/v0.0.0...v0.0.1) +- \#120 added target to use github-changelog-generator tool [\#121](https://github.com/fktn-k/fkYAML/pull/121) ([fktn-k](https://github.com/fktn-k)) - \#118 introduce cmake-format [\#119](https://github.com/fktn-k/fkYAML/pull/119) ([fktn-k](https://github.com/fktn-k)) +- \#116 use reuse software for file headers [\#117](https://github.com/fktn-k/fkYAML/pull/117) ([fktn-k](https://github.com/fktn-k)) - Expand usable key types [\#115](https://github.com/fktn-k/fkYAML/pull/115) ([fktn-k](https://github.com/fktn-k)) - \#113 Generate API documentation only for public members [\#114](https://github.com/fktn-k/fkYAML/pull/114) ([fktn-k](https://github.com/fktn-k)) - \#111 Use docker images for Clang compilers during CI jobs [\#112](https://github.com/fktn-k/fkYAML/pull/112) ([fktn-k](https://github.com/fktn-k)) - \#109 Use official docker image for GCC during CI jobs [\#110](https://github.com/fktn-k/fkYAML/pull/110) ([fktn-k](https://github.com/fktn-k)) - -- \#120 added target to use github-changelog-generator tool [\#121](https://github.com/fktn-k/fkYAML/pull/121) ([fktn-k](https://github.com/fktn-k)) -- \#116 use reuse software for file headers [\#117](https://github.com/fktn-k/fkYAML/pull/117) ([fktn-k](https://github.com/fktn-k)) - \#101 added memory leak check with Valgrind [\#108](https://github.com/fktn-k/fkYAML/pull/108) ([fktn-k](https://github.com/fktn-k)) - \#21 Update issue templates [\#100](https://github.com/fktn-k/fkYAML/pull/100) ([fktn-k](https://github.com/fktn-k)) - \#34 add unit tests for deserializer [\#97](https://github.com/fktn-k/fkYAML/pull/97) ([fktn-k](https://github.com/fktn-k)) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22ef6927..fa2e6d66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.8) project( fkYAML - VERSION 0.3.2 + VERSION 0.3.3 LANGUAGES CXX) ############################################################# diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a719c720..8fbb448f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ For questions, feature or support requests, please [open a discussion](https://g ## Contribution Steps -Basically, follow [the contribution guideline](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) on the GitHub Docs. +Basically, follow [the contribution guideline](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) in the GitHub Docs. To make changes to the fkYAML project, you need to edit the following files: ### 1. [`include/fkYAML/**.hpp`](https://github.com/fktn-k/fkYAML/tree/develop/include/fkYAML) @@ -41,16 +41,38 @@ Note that the amalgamation tool assumes that your current directory is at the ro ### 2. [`test/unit_test/*.cpp`](https://github.com/fktn-k/fkYAML/tree/develop/test/unit_test) -These files contain the [Catch2](https://github.com/catchorg/Catch2) unit tests from which current coverage data is generated. (Click [here](https://coveralls.io/github/fktn-k/fkYAML) to see the current coverage of the library's code.) If you have added or changed a feature, please also modify a unit test to the associated file(s) to keep covering 100% of the lines/branches in the fkYAML library. The unit tests can be compiled and executed with: +These files contain the [Catch2](https://github.com/catchorg/Catch2) unit tests from which current coverage data is generated. (Click [here](https://coveralls.io/github/fktn-k/fkYAML) to see the current coverage of the library's code.) If you have added or changed a feature, please also modify a unit test to the associated file(s) to keep covering 100% of the lines/branches in the fkYAML library. + +The unit tests can be compiled and executed with the following commands: ```bash $ cd path/to/fkYAML -$ cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug -DFK_YAML_BUILD_TEST=ON +$ cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug -DFK_YAML_BUILD_TEST=ON [-DFK_YAML_USE_SINGLE_HEADER=ON|OFF] $ cmake --build build --config Debug $ ctest -C Debug --test-dir build --output-on-failure ``` -Furthermore, you can test the single-header version of fkYAML by passing `-DFK_YAML_USE_SINGLE_HEADER=ON` when you configure CMake. +Furthermore, you can test the single-header version of fkYAML by passing `-DFK_YAML_USE_SINGLE_HEADER=ON` when you configure CMake. (disabled by default) + +#### Check the coverage locally + +In the GitHub Action workflows, test coverage information is generated with the [LCOV](https://github.com/linux-test-project/lcov) tool. +The same file as uploaded to the Coveralls and its HTML version (generated by `genhtml`) can be downloaded from the link in the autogenerated message from the bot per push event. + +You might want to check the coverage locally before creating a PR, for example, to ensure that the coverage remains still after you change the library sources. +In such cases, please run the following commands and open a generated `index.html` file in the `/build_coverage/html` directory to check the result. +Make sure `lcov` and `genhtml` commands are available before execution. + +```bash +$ cd /path/to/fkYAML +$ make html-coverage +``` + +##### For Windows users (help needed) + +I currently don't know the way in which the dependent commands, `lcov` and `genhtml`, are run on the "pure" Windows platform, neither using [WSL2](https://learn.microsoft.com/en-us/windows/wsl/) nor [Cygwin](https://www.cygwin.com/). +Until such a way is found, I recommend you use WSL2 or other similar means where you can run a Linux environment on Windows. +If you have an idea to run those tools on the "pure" Windows platform, please open a discussion or a PR to share it with the other Windows users. ### 3. [`docs/mkdocs/docs/**.md`](https://github.com/fktn-k/fkYAML/tree/develop/docs/mkdocs/docs) @@ -84,7 +106,7 @@ As a policy of this project, however, all the workflow checks must be passed bef ## Please don't -- The C++11 support varies between different **compilers** and versions. Please note the [list of supported compilers](https://github.com/fktn-k/fkYAML/blob/develop/README.md#supported-compilers). Some compilers like GCC 4.7 (and earlier), Clang 3.3 (and earlier), or Microsoft Visual Studio 13.0 and earlier are known not to work due to missing or incomplete C++11 support. Please refrain from proposing changes that work around these compiler's limitations with `#ifdef`s or other means. +- The C++11 support varies between different **compilers** and versions. Please note the [list of supported compilers](https://github.com/fktn-k/fkYAML/blob/develop/README.md#supported-compilers). Some compilers like GCC 4.7 (and earlier), Clang 3.3 (and earlier), or Microsoft Visual Studio 13.0 (and earlier) are known not to work due to missing or incomplete C++11 support. Please refrain from proposing changes that work around these compiler's limitations with `#ifdef`s or other means. - Please refrain from proposing changes that would **break [YAML](https://yaml.org/) specifications**. If you propose a conformant extension of YAML to be supported by the library, please motivate this extension. - Please do not open pull requests that address **multiple issues**. diff --git a/LICENSES/BSD-3-Clause.txt b/LICENSES/BSD-3-Clause.txt index ea890afb..086d3992 100644 --- a/LICENSES/BSD-3-Clause.txt +++ b/LICENSES/BSD-3-Clause.txt @@ -1,4 +1,4 @@ -Copyright (c) . +Copyright (c) . Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/Makefile b/Makefile index 65391136..84e2931a 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ TEST_SRCS = $(shell find test -type f \( -name '*.hpp' -o -name '*.cpp' \) | sor # target version definition TARGET_MAJOR_VERSION := 0 TARGET_MINOR_VERSION := 3 -TARGET_PATCH_VERSION := 2 +TARGET_PATCH_VERSION := 3 TARGET_VERSION_FULL := $(TARGET_MAJOR_VERSION).$(TARGET_MINOR_VERSION).$(TARGET_PATCH_VERSION) VERSION_MACRO_FILE := include/fkYAML/detail/macros/version_macros.hpp diff --git a/README.md b/README.md index 324ea3d1..c5e164d8 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ Thanks a lot! - [**Catch2**](https://github.com/catchorg/Catch2) as a unit-test framework. - [**github-changelog-generator**](https://github.com/github-changelog-generator/github-changelog-generator) to generate the [CHANGELOG.md](https://github.com/fktn-k/fkYAML/tree/develop/CHANGELOG.md) file. - [**include-what-you-use**](https://github.com/include-what-you-use/include-what-you-use) to check the fkYAML library source files are each self-contained. -- [**lcov**](https://ltp.sourceforge.net/coverage/lcov.php) to process coverage information and generate an HTML view. +- [**lcov**](https://github.com/linux-test-project/lcov) to process coverage information and generate an HTML view. - [**Material for MkDocs**](https://squidfunk.github.io/mkdocs-material/) for the style of the documentation site. - [**MkDocs**](https://www.mkdocs.org/) as the documentation site generator. - [**reuse-tool**](https://github.com/fsfe/reuse-tool) to generate license/copyright headers in source files to meet [REUSE software](https://reuse.software/) recommendations. diff --git a/cmake/config.cmake.in b/cmake/config.cmake.in index f3a26328..355a610a 100644 --- a/cmake/config.cmake.in +++ b/cmake/config.cmake.in @@ -4,4 +4,4 @@ find_package_handle_standard_args(@PROJECT_NAME@ CONFIG_MODE) if(NOT TARGET @PROJECT_NAME@::@FK_YAML_TARGET_NAME@) include("${CMAKE_CURRENT_LIST_DIR}/@FK_YAML_TARGETS_EXPORT_NAME@.cmake") -endif() \ No newline at end of file +endif() diff --git a/cmake/fkYAMLConfigVersion.cmake.in b/cmake/fkYAMLConfigVersion.cmake.in index 553782da..dd64d2f8 100644 --- a/cmake/fkYAMLConfigVersion.cmake.in +++ b/cmake/fkYAMLConfigVersion.cmake.in @@ -16,4 +16,3 @@ else() set(PACKAGE_VERSION_EXACT TRUE) endif() endif() - \ No newline at end of file diff --git a/cmake/pkg-config.pc.in b/cmake/pkg-config.pc.in index a56e2a27..f04e7303 100644 --- a/cmake/pkg-config.pc.in +++ b/cmake/pkg-config.pc.in @@ -1,4 +1,4 @@ NAME: ${PROJECT_NAME} Description: A header-only C++ library for YAML Version: ${PROJECT_VERSION} -Cflags: -I"${CMAKE_INSTALL_FULL_INCLUDEDIR}" \ No newline at end of file +Cflags: -I"${CMAKE_INSTALL_FULL_INCLUDEDIR}" diff --git a/docs/examples/ex_basic_node_at_basic_node.cpp b/docs/examples/ex_basic_node_at_basic_node.cpp new file mode 100644 index 00000000..8a48bcae --- /dev/null +++ b/docs/examples/ex_basic_node_at_basic_node.cpp @@ -0,0 +1,51 @@ +#include +#include + +int main() +{ + // create a YAML sequence node. + fkyaml::node n1 = {123, 234, 345, 456}; + + // print YAML nodes at the following indexes. + fkyaml::node index_zero = 0; + fkyaml::node index_one = 1; + fkyaml::node index_two = 2; + fkyaml::node index_three = 3; + std::cout << n1[index_zero] << std::endl; + std::cout << n1[index_one] << std::endl; + std::cout << n1[index_two] << std::endl; + std::cout << n1[index_three] << std::endl; + + // try to print a YAML node with an index which exceeds the size. + try + { + fkyaml::node index_four = 4; + std::cout << n1.at(index_four) << std::endl; + } + catch (const fkyaml::out_of_range& e) + { + std::cout << e.what() << std::endl; + } + + // create a YAML node. + fkyaml::node n2 = {{"foo", true}, {"bar", 123}}; + + // print YAML nodes associated with the following keys. + fkyaml::node foo_key = "foo"; + fkyaml::node bar_key = "bar"; + std::cout << std::boolalpha << n2[foo_key] << std::endl; + std::cout << n2[bar_key] << std::endl; + + // try to print a YAML node with a key which does not exist. + try + { + fkyaml::node true_key = true; + std::cout << n2.at(true_key) << std::endl; + } + catch (const fkyaml::out_of_range& e) + { + std::cout << e.what() << std::endl; + } + + return 0; +} diff --git a/docs/examples/ex_basic_node_at_basic_node.output b/docs/examples/ex_basic_node_at_basic_node.output new file mode 100644 index 00000000..e348080f --- /dev/null +++ b/docs/examples/ex_basic_node_at_basic_node.output @@ -0,0 +1,8 @@ +123 +234 +345 +456 +out_of_range: index 4 is out of range +true +123 +out_of_range: key 'true' is not found. diff --git a/docs/examples/ex_basic_node_at_compatible_type.cpp b/docs/examples/ex_basic_node_at_compatible_type.cpp new file mode 100644 index 00000000..971fb790 --- /dev/null +++ b/docs/examples/ex_basic_node_at_compatible_type.cpp @@ -0,0 +1,43 @@ +#include +#include + +int main() +{ + // create a YAML sequence node. + fkyaml::node n1 = {123, 234, 345, 456}; + + // print YAML nodes at the following indexes. + std::cout << n1.at(0) << std::endl; + std::cout << n1.at(1) << std::endl; + std::cout << n1.at(2) << std::endl; + std::cout << n1.at(3) << std::endl; + + // try to print a YAML node with an index which exceeds the size. + try + { + std::cout << n1.at(4) << std::endl; + } + catch (const fkyaml::out_of_range& e) + { + std::cout << e.what() << std::endl; + } + + // create a YAML mapping node. + fkyaml::node n2 = {{"foo", true}, {"bar", 123}}; + + // print YAML nodes associated with the following keys. + std::cout << std::boolalpha << n2.at("foo") << std::endl; + std::cout << n2.at("bar") << std::endl; + + // try to print a YAML node with a key which does not exist. + try + { + std::cout << n2.at(true) << std::endl; + } + catch (const fkyaml::out_of_range& e) + { + std::cout << e.what() << std::endl; + } + + return 0; +} diff --git a/docs/examples/ex_basic_node_at_compatible_type.output b/docs/examples/ex_basic_node_at_compatible_type.output new file mode 100644 index 00000000..e348080f --- /dev/null +++ b/docs/examples/ex_basic_node_at_compatible_type.output @@ -0,0 +1,8 @@ +123 +234 +345 +456 +out_of_range: index 4 is out of range +true +123 +out_of_range: key 'true' is not found. diff --git a/docs/examples/ex_basic_node_operator_gt.cpp b/docs/examples/ex_basic_node_operator_gt.cpp index 68f81816..cd1bf995 100644 --- a/docs/examples/ex_basic_node_operator_gt.cpp +++ b/docs/examples/ex_basic_node_operator_gt.cpp @@ -35,4 +35,4 @@ int main() std::cout << (float_2 > seq_2) << std::endl; return 0; -} \ No newline at end of file +} diff --git a/docs/examples/ex_basic_node_operator_lt.cpp b/docs/examples/ex_basic_node_operator_lt.cpp index 55d33ef6..9413b826 100644 --- a/docs/examples/ex_basic_node_operator_lt.cpp +++ b/docs/examples/ex_basic_node_operator_lt.cpp @@ -35,4 +35,4 @@ int main() std::cout << (float_2 < seq_2) << std::endl; return 0; -} \ No newline at end of file +} diff --git a/docs/examples/ex_basic_node_subscript_operator_basic_node.cpp b/docs/examples/ex_basic_node_subscript_operator_basic_node.cpp index ac6d4fbb..d4450570 100644 --- a/docs/examples/ex_basic_node_subscript_operator_basic_node.cpp +++ b/docs/examples/ex_basic_node_subscript_operator_basic_node.cpp @@ -7,17 +7,31 @@ int main() fkyaml::node n1 = {123, 234, 345, 456}; // print YAML nodes at the following indexes. - std::cout << n1[0] << std::endl; - std::cout << n1[1] << std::endl; - std::cout << n1[2] << std::endl; - std::cout << n1[3] << std::endl; + fkyaml::node index_zero = 0; + fkyaml::node index_one = 1; + fkyaml::node index_two = 2; + fkyaml::node index_three = 3; + std::cout << n1[index_zero] << std::endl; + std::cout << n1[index_one] << std::endl; + std::cout << n1[index_two] << std::endl; + std::cout << n1[index_three] << std::endl; + + // this will cause an undefined behavior! + // fkyaml::node index_four = 4; + // std::cout << n1[index_four] << std::endl; // create a YAML node. fkyaml::node n2 = {{"foo", true}, {"bar", 123}}; // print YAML nodes associated with the following keys. - std::cout << std::boolalpha << n2[fkyaml::node("foo")] << std::endl; - std::cout << n2[fkyaml::node("bar")] << std::endl; + fkyaml::node foo_key = "foo"; + fkyaml::node bar_key = "bar"; + std::cout << std::boolalpha << n2[foo_key] << std::endl; + std::cout << n2[bar_key] << std::endl; + + // try to access a YAML node with a key which does not exist. + fkyaml::node true_key = true; + std::cout << n2[true_key] << std::endl; return 0; } diff --git a/docs/examples/ex_basic_node_subscript_operator_basic_node.output b/docs/examples/ex_basic_node_subscript_operator_basic_node.output index 58f755cc..4c9a43f9 100644 --- a/docs/examples/ex_basic_node_subscript_operator_basic_node.output +++ b/docs/examples/ex_basic_node_subscript_operator_basic_node.output @@ -4,3 +4,4 @@ 456 true 123 +null diff --git a/docs/examples/ex_basic_node_subscript_operator_compatible_type.cpp b/docs/examples/ex_basic_node_subscript_operator_compatible_type.cpp index 6f1f7903..f2dd7c6a 100644 --- a/docs/examples/ex_basic_node_subscript_operator_compatible_type.cpp +++ b/docs/examples/ex_basic_node_subscript_operator_compatible_type.cpp @@ -12,11 +12,18 @@ int main() std::cout << n1[2] << std::endl; std::cout << n1[3] << std::endl; + // this will cause an undefined behavior! + // std::cout << n1[4] << std::endl; + // create a YAML mapping node. fkyaml::node n2 = {{"foo", true}, {"bar", 123}}; // print YAML nodes associated with the following keys. std::cout << std::boolalpha << n2["foo"] << std::endl; std::cout << n2["bar"] << std::endl; + + // try to access a YAML node with a key which does not exist. + std::cout << n2[true] << std::endl; + return 0; } diff --git a/docs/examples/ex_basic_node_subscript_operator_compatible_type.output b/docs/examples/ex_basic_node_subscript_operator_compatible_type.output index 58f755cc..4c9a43f9 100644 --- a/docs/examples/ex_basic_node_subscript_operator_compatible_type.output +++ b/docs/examples/ex_basic_node_subscript_operator_compatible_type.output @@ -4,3 +4,4 @@ 456 true 123 +null diff --git a/docs/examples/ex_exception_constructor_msg.cpp b/docs/examples/ex_exception_constructor_msg.cpp index d4a2c45e..4a6496ca 100644 --- a/docs/examples/ex_exception_constructor_msg.cpp +++ b/docs/examples/ex_exception_constructor_msg.cpp @@ -12,4 +12,4 @@ int main() std::cout << e.what() << std::endl; } return 0; -} \ No newline at end of file +} diff --git a/docs/examples/ex_macros_versions.output b/docs/examples/ex_macros_versions.output index f22bac64..0fd06d0c 100644 --- a/docs/examples/ex_macros_versions.output +++ b/docs/examples/ex_macros_versions.output @@ -1 +1 @@ -fkYAML version 0.3.1 +fkYAML version 0.3.3 diff --git a/docs/examples/example.yaml b/docs/examples/example.yaml index dd3be4b9..300baf5f 100644 --- a/docs/examples/example.yaml +++ b/docs/examples/example.yaml @@ -13,4 +13,4 @@ novels: year: 1932 - title: Never Let Me Go author: Kazuo Ishiguro - year: 2005 \ No newline at end of file + year: 2005 diff --git a/docs/examples/input.yaml b/docs/examples/input.yaml index e79b98e4..bfe18daf 100644 --- a/docs/examples/input.yaml +++ b/docs/examples/input.yaml @@ -1,3 +1,3 @@ foo: true bar: 123 -baz: 3.14 \ No newline at end of file +baz: 3.14 diff --git a/docs/mkdocs/docs/api/basic_node/at.md b/docs/mkdocs/docs/api/basic_node/at.md new file mode 100644 index 00000000..5e05aadf --- /dev/null +++ b/docs/mkdocs/docs/api/basic_node/at.md @@ -0,0 +1,136 @@ +Defined in header [``](https://github.com/fktn-k/fkYAML/blob/develop/include/fkYAML/node.hpp) + +# fkyaml::basic_node::at + +```cpp +template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> +basic_node& at(KeyType&& key); // (1) + +template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> +const basic_node& at(KeyType&& key) const; // (2) + +template < + typename KeyType, detail::enable_if_t>::value, int> = 0> +basic_node& at(KeyType&& key); // (3) + +template < + typename KeyType, detail::enable_if_t>::value, int> = 0> +const basic_node& at(KeyType&& key) const; // (4) +``` + +Access to a YAML node element with either an index (for sequences) or a key (for mappings). +Before accessing the element, this function checks the bounds in the case of a sequence or the existence of a key in the case of a mapping. +This function therefore costs a bit more than [`basic_node::operator[]()`](operator[].md) function due to the extra checks. +Furthermore, this function may throw the following exceptions: + +* [`fkyaml::type_error`](../exception/type_error.md) + * if the queried node is neither a sequence nor a mapping, or + * if the queried node is a sequence but the given `key` is not an integer. +* [`fkyaml::out_of_range`](../exception/out_of_range.md) + * if the given key does not exist in the queried mapping, or + * if the given index exceeds the size of the queried sequence. + +## Overload (1), (2) + +```cpp +template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> +basic_node& at(KeyType&& key); // (1) + +template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> +const basic_node& at(KeyType&& key) const; // (2) +``` + +Accesses to an element in the YAML sequence/mapping node with the given key object of a compatible type with the [`basic_node`](index.md) class, i.e., a type with which a [`basic_node`](index.md) object is constructible. +These overloads internally construct a [`basic_node`](index.md) object with `key`. + + +### **Parameters** + +***`index`*** [in] +: An index/key for an element in the YAML sequence/mapping node. + +### **Return Value** + +Reference, or constant reference, to the YAML node object associated with the given index/key. + +???+ Example + + ```cpp + --8<-- "examples/ex_basic_node_at_compatible_type.cpp" + ``` + + output: + ```bash + --8<-- "examples/ex_basic_node_at_compatible_type.output" + ``` + +## Overload (3), (4) + +```cpp +template < + typename KeyType, detail::enable_if_t>::value, int> = 0> +basic_node& at(KeyType&& key); // (3) + +template < + typename KeyType, detail::enable_if_t>::value, int> = 0> +const basic_node& at(KeyType&& key) const; // (4) +``` + +Accesses to an element in the YAML sequence/mapping node with the given [`basic_node`](index.md) key object. +Unlike the overloads (1) and (2) above, these overloads do not internally construct a [`basic_node`](index.md) object. +So, these overloads works more effectively when some key objects are used multiple times, for instance, in a for-loop. + +### **Template Parameters** + +***KeyType*** +: A key type which is a kind of the [`basic_node`](index.md) template class. + +### **Parameters** + +***`key`*** [in] +: An index/key for an element in the YAML sequence/mapping node. + +### **Return Value** + +Reference, or constant reference, to the YAML node object associated with the given index/key. + +???+ Example + + ```cpp + --8<-- "examples/ex_basic_node_at_basic_node.cpp" + ``` + + output: + ```bash + --8<-- "examples/ex_basic_node_at_basic_node.output" + ``` + +## **See Also** + +* [basic_node](index.md) +* [size](size.md) +* [contains](contains.md) +* [operator[]](operator[].md) +* [operator<<](insertion_operator.md) +* [out_of_range](../exception/out_of_range.md) +* [type_error](../exception/type_error.md) diff --git a/docs/mkdocs/docs/api/basic_node/boolean_type.md b/docs/mkdocs/docs/api/basic_node/boolean_type.md index a778bbd8..368e8c01 100644 --- a/docs/mkdocs/docs/api/basic_node/boolean_type.md +++ b/docs/mkdocs/docs/api/basic_node/boolean_type.md @@ -29,4 +29,4 @@ With the decided type, boolean objects are stored directly inside a [`basic_node ### **See Also** -* [basic_node](index.md) \ No newline at end of file +* [basic_node](index.md) diff --git a/docs/mkdocs/docs/api/basic_node/end.md b/docs/mkdocs/docs/api/basic_node/end.md index 4e97f39c..92375a22 100644 --- a/docs/mkdocs/docs/api/basic_node/end.md +++ b/docs/mkdocs/docs/api/basic_node/end.md @@ -34,4 +34,4 @@ An iterator to the past-the-last element of a container node value (either seque * [iterator](iterator.md) * [const_iterator](const_iterator.md) * [begin](begin.md) -* [operator<<](insertion_operator.md) \ No newline at end of file +* [operator<<](insertion_operator.md) diff --git a/docs/mkdocs/docs/api/basic_node/index.md b/docs/mkdocs/docs/api/basic_node/index.md index e2fecaec..df4e820e 100644 --- a/docs/mkdocs/docs/api/basic_node/index.md +++ b/docs/mkdocs/docs/api/basic_node/index.md @@ -36,16 +36,16 @@ This class provides features to handle YAML nodes. | Name | Description | |-------------------------------------------------|------------------------------------------------------------| +| [sequence_type](sequence_type.md) | The type used to store sequence node value containers. | +| [mapping_type](mapping_type.md) | The type used to store mapping node value containers. | | [boolean_type](boolean_type.md) | The type used to store boolean node values. | -| [const_iterator](const_iterator.md) | The type for constant iterators. | -| [float_number_type](float_number_type.md) | The type used to store float number node values. | | [integer_type](integer_type.md) | The type used to store integer node values. | -| [iterator](iterator.md) | The type for non-constant iterators. | -| [mapping_type](mapping_type.md) | The type used to store mapping node value containers. | -| [node_t](node_t.md) | The type used to store the internal value type. | -| [sequence_type](sequence_type.md) | The type used to store sequence node value containers. | +| [float_number_type](float_number_type.md) | The type used to store float number node values. | | [string_type](string_type.md) | The type used to store string node values. | | [value_converter_type](value_converter_type.md) | The type used to convert between node and native data. | +| [iterator](iterator.md) | The type for non-constant iterators. | +| [const_iterator](const_iterator.md) | The type for constant iterators. | +| [node_t](node_t.md) | The type used to store the internal value type. | | [yaml_version_t](yaml_version_t.md) | The type used to store the enable version of YAML. | ## Member Functions @@ -96,10 +96,11 @@ This class provides features to handle YAML nodes. | [empty](empty.md) | checks if a basic_node has an empty container. | | [size](size.md) | returns the size of a container value of a basic_node. | -### Access Elements in Containers -| Name | Description | -|-----------------------------|---------------------------------------------| -| [operator[]](operator[].md) | accesses an item specified by the key/index | +### Accessors for Container Elements +| Name | Description | +| --------------------------- | ----------------------------------------------------------------- | +| [operator[]](operator[].md) | accesses an item specified by the key/index. | +| [at](at.md) | accesses an item specified by the key/index with bounds checking. | ### Lexicographical Comparison Operators | Name | Description | diff --git a/docs/mkdocs/docs/api/basic_node/node_t.md b/docs/mkdocs/docs/api/basic_node/node_t.md index 9d64679a..e10c8bc0 100644 --- a/docs/mkdocs/docs/api/basic_node/node_t.md +++ b/docs/mkdocs/docs/api/basic_node/node_t.md @@ -47,4 +47,4 @@ This enumeration collects the different YAML value types. They are internally us * [is_boolean](is_boolean.md) * [is_integer](is_integer.md) * [is_float_number](is_float_number.md) -* [is_string](is_string.md) \ No newline at end of file +* [is_string](is_string.md) diff --git a/docs/mkdocs/docs/api/basic_node/operator[].md b/docs/mkdocs/docs/api/basic_node/operator[].md index 0f055291..ee911d69 100644 --- a/docs/mkdocs/docs/api/basic_node/operator[].md +++ b/docs/mkdocs/docs/api/basic_node/operator[].md @@ -28,19 +28,21 @@ template < const basic_node& operator[](KeyType&& key) const; // (4) ``` -Access to a YAML node element with either a key or an index. -If the node is neither a mapping nor a sequence, a [`fkyaml::type_error`](../exception/type_error.md) will be thrown. +Access to a YAML node element with either an index (for sequences) or a key (for mappings). +If the node is neither a sequence nor a mapping, or if the node is a sequence but the given `key` is not an integer, a [`fkyaml::type_error`](../exception/type_error.md) will be thrown. !!! Danger This API does not check the size of a sequence node before accessing the element. - To avoid undefined behaviors, please make sure the argument `index` is smaller than the actual sequence size with a return value of the [`basic_node::size()`](size.md) function. + To avoid undefined behaviors, please make sure the argument `key` is smaller than the actual sequence size with a return value of the [`basic_node::size()`](size.md) function. + If bounds check is necessary, prefer [`basic_node::at()`](at.md) function. !!! Warning - This API does not check the existence of the given key in the YAML mapping node. - If the given key does not exist, a default [basic_node](index.md) object will be created. + This API does not check the existence of the given `key` in a mapping node. + If the key does not exist, a default [basic_node](index.md) object will be created. Please make sure that the node has the given key in advance by calling the [`basic_node::contains()`](contains.md) function. + If such a behavior is unwanted, prefer [`basic_node::at()`](at.md) function. ## Overload (1), (2) @@ -64,7 +66,6 @@ const basic_node& operator[](KeyType&& key) const; // (2) Accesses to an element in the YAML sequence/mapping node with the given key object of a compatible type with the [`basic_node`](index.md) class, i.e., a type with which a [`basic_node`](index.md) object is constructible. These overloads internally construct a [`basic_node`](index.md) object with `key`. -If the node is a scalar, or if it is a sequence but the key is not an integer, a [`fkyaml::type_error`](../exception/type_error.md) will be thrown. ### **Parameters** @@ -101,7 +102,6 @@ const basic_node& operator[](KeyType&& key) const; // (4) Accesses to an element in the YAML sequence/mapping node with the given [`basic_node`](index.md) key object. Unlike the overloads (1) and (2) above, these overloads do not internally construct a [`basic_node`](index.md) object. So, these overloads works more effectively when some key objects are used multiple times, for instance, in a for-loop. -If the node is a scalar, or if it is a sequence but the key is not an integer, a [`fkyaml::type_error`](../exception/type_error.md) will be thrown. ### **Template Parameters** @@ -133,5 +133,6 @@ Reference, or constant reference, to the YAML node object associated with the gi * [basic_node](index.md) * [size](size.md) * [contains](contains.md) +* [at](at.md) * [operator<<](insertion_operator.md) * [type_error](../exception/type_error.md) diff --git a/docs/mkdocs/docs/api/basic_node/set_yaml_version.md b/docs/mkdocs/docs/api/basic_node/set_yaml_version.md index f6e547e7..e02e2f9a 100644 --- a/docs/mkdocs/docs/api/basic_node/set_yaml_version.md +++ b/docs/mkdocs/docs/api/basic_node/set_yaml_version.md @@ -28,4 +28,4 @@ Sets the version of the YAML format to the `basic_node` object. * [basic_node](index.md) * [yaml_verion_t](yaml_version_t.md) -* [set_yaml_version](set_yaml_version.md) \ No newline at end of file +* [set_yaml_version](set_yaml_version.md) diff --git a/docs/mkdocs/docs/api/basic_node/type.md b/docs/mkdocs/docs/api/basic_node/type.md index 97160ccc..b4a3ba69 100644 --- a/docs/mkdocs/docs/api/basic_node/type.md +++ b/docs/mkdocs/docs/api/basic_node/type.md @@ -35,4 +35,4 @@ The type of the YAML node value. ### **See Also** -* [node_t](node_t.md) \ No newline at end of file +* [node_t](node_t.md) diff --git a/docs/mkdocs/docs/api/exception/out_of_range.md b/docs/mkdocs/docs/api/exception/out_of_range.md new file mode 100644 index 00000000..db5169ee --- /dev/null +++ b/docs/mkdocs/docs/api/exception/out_of_range.md @@ -0,0 +1,24 @@ +Defined in header [``](https://github.com/fktn-k/fkYAML/blob/develop/include/fkYAML/exception.hpp) + +# fkyaml::out_of_range + +```cpp +class out_of_range : public exception; +``` + +A exception class indicating an invalid type conversion. +This class extends the [`fkyaml::exception`](index.md) class and the [`what()`](what.md) function emits an error message in either of the following formats: + +* if an index which is used to access an element is out of range. + ``` + out_of_range: index [index value] is out of range. + ``` +* if a key which is used to access an element is not found. + ``` + out_of_range: key '[key name]' is not found. + ``` + +## **See Also** + +* [exception](index.md) +* [what](what.md) diff --git a/docs/mkdocs/docs/home/releases.md b/docs/mkdocs/docs/home/releases.md index 5c42e628..16c9ff46 100644 --- a/docs/mkdocs/docs/home/releases.md +++ b/docs/mkdocs/docs/home/releases.md @@ -1,11 +1,61 @@ # Releases +## **fkYAML version 0.3.3** + +!!! abstract "Release Packages" + + * [fkYAML.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML.tgz) + * [fkYAML.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML.zip) + * [fkYAML_single_header.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML_single_header.zip) + * [fkYAML_single_header.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML_single_header.tgz) + * [node.hpp](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/node.hpp) (single header) + +### Summary + +This release adds a new basic_node API, at(), which validates before either (1) that the specified key exists in the mapping or (2) that the specified index is less than the current size of the sequence before accessing the target node. +Several bugs bugs in the deserialization feature have also been fixed. +In addition, refactoring of the deserializer has improved the handling of input characters. + +### What's Changed + +#### :sparkles: New Features + +- \#298 Add at\(\) API to the basic\_node class by [fktn-k](https://github.com/fktn-k) in [\#299](https://github.com/fktn-k/fkYAML/pull/299) + +#### :zap: Improvements + +- Further improvements of input handlings by [fktn-k](https://github.com/fktn-k) in [\#301](https://github.com/fktn-k/fkYAML/pull/301) +- Fixed warnings and made future warnings as errors by [fktn-k](https://github.com/fktn-k) in [\#300](https://github.com/fktn-k/fkYAML/pull/300) +- Improve handling UTF encoded inputs by [fktn-k](https://github.com/fktn-k) in [\#296](https://github.com/fktn-k/fkYAML/pull/296) +- modied the way of formatting error messages for exception objects by [fktn-k](https://github.com/fktn-k) in [\#291](https://github.com/fktn-k/fkYAML/pull/291) + +#### :bug: Bug Fixes + +- \#302 Fixed parse error on alias mapping keys [\#303](https://github.com/fktn-k/fkYAML/pull/303) [fktn-k](https://github.com/fktn-k) +- \#292 Better handling for flow indicators in permitted scalar contexts by [stephenwhittle](https://github.com/stephenwhittle) in [\#293](https://github.com/fktn-k/fkYAML/pull/293) +- \#288 Fixed incorrect parse results from mapping entries split across newlines by [fktn-k](https://github.com/fktn-k) in [\#289](https://github.com/fktn-k/fkYAML/pull/289) (reported by [stephenwhittle](https://github.com/stephenwhittle)) + +#### :robot: CI + +- Resolve warnings against using Node.js 16 by [fktn-k](https://github.com/fktn-k) in [\#290](https://github.com/fktn-k/fkYAML/pull/290) + +#### :people_holding_hands: Community + +- \#297 Add a note for checking the coverage & upload the same as an artifact by [fktn-k](https://github.com/fktn-k) in [\#295](https://github.com/fktn-k/fkYAML/pull/295) (suggested by [stephenwhittle](https://github.com/stephenwhittle)) +- Add .editorconfig file by [fktn-k](https://github.com/fktn-k) in [\#287](https://github.com/fktn-k/fkYAML/pull/287) + +**Full Changelog**: https://github.com/fktn-k/fkYAML/compare/v0.3.2...v0.3.3 + +--- + ## **fkYAML version 0.3.2** !!! abstract "Release Packages" * [fkYAML.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.2/fkYAML.tgz) * [fkYAML.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.2/fkYAML.zip) + * [fkYAML_single_header.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.2/fkYAML_single_header.zip) + * [fkYAML_single_header.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.2/fkYAML_single_header.tgz) * [node.hpp](https://github.com/fktn-k/fkYAML/releases/download/v0.3.2/node.hpp) (single header) ### Summary @@ -51,6 +101,8 @@ Furthermore, the list of the supported compilers are expanded since the GitHub A * [fkYAML.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.1/fkYAML.tgz) * [fkYAML.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.1/fkYAML.zip) + * [fkYAML_single_header.zip](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML_single_header.zip) + * [fkYAML_single_header.tgz](https://github.com/fktn-k/fkYAML/releases/download/v0.3.3/fkYAML_single_header.tgz) * [node.hpp](https://github.com/fktn-k/fkYAML/releases/download/v0.3.1/node.hpp) (single header) ### Summary @@ -460,4 +512,4 @@ Major known issues and limitaions are as follows: - Backward compatibilities for YAML 1.1 or older have not yet been implemented. The other issues and limitations will also be fixed in later versions. -Wait for such a version, or create a PR to fix one. \ No newline at end of file +Wait for such a version, or create a PR to fix one. diff --git a/docs/mkdocs/docs/tutorials/cmake_integration.md b/docs/mkdocs/docs/tutorials/cmake_integration.md index 1beb43cf..8836ff4e 100644 --- a/docs/mkdocs/docs/tutorials/cmake_integration.md +++ b/docs/mkdocs/docs/tutorials/cmake_integration.md @@ -57,7 +57,7 @@ Since CMake v3.11, [`FetchContent`](https://cmake.org/cmake/help/latest/module/F FetchContent_Declare( fkYAML GIT_REPOSITORY https://github.com/fktn-k/fkYAML.git - GIT_TAG v0.3.2 + GIT_TAG v0.3.3 ) FetchContent_MakeAvailable(fkYAML) diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index f3cb41dd..bde4fcda 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -96,7 +96,7 @@ nav: - Releases: home/releases.md - Supported Compilers: home/supported_compilers.md - Community Support: home/community_support.md - - Tutorials: + - Tutorials: - The First Steps: tutorials/index.md - CMake Integration: tutorials/cmake_integration.md - API References: @@ -107,6 +107,7 @@ nav: - operator=: api/basic_node/operator=.md - add_anchor_name: api/basic_node/add_anchor_name.md - alias_of: api/basic_node/alias_of.md + - at: api/basic_node/at.md - begin: api/basic_node/begin.md - boolean_type: api/basic_node/boolean_type.md - const_iterator: api/basic_node/const_iterator.md @@ -159,6 +160,7 @@ nav: - (destructor): api/exception/destructor.md - what: api/exception/what.md - invalid_encoding: api/exception/invalid_encoding.md + - out_of_range: api/exception/out_of_range.md - parse_error: api/exception/parse_error.md - type_error: api/exception/type_error.md - macros: api/macros.md diff --git a/fkYAML.natvis b/fkYAML.natvis index 394229fe..60d895de 100644 --- a/fkYAML.natvis +++ b/fkYAML.natvis @@ -4,26 +4,26 @@ - - - {*(m_node_value.p_sequence)} - {*(m_node_value.p_mapping)} - nullptr - {m_node_value.boolean} - {m_node_value.integer} - {m_node_value.float_val} - {*(m_node_value.p_string)} + + + {*(m_node_value.p_sequence)} + {*(m_node_value.p_mapping)} + nullptr + {m_node_value.boolean} + {m_node_value.integer} + {m_node_value.float_val} + {*(m_node_value.p_string)} - + *(m_node_value.p_sequence),view(simple) - + *(m_node_value.p_mapping),view(simple) - + {second} second diff --git a/include/fkYAML/detail/assert.hpp b/include/fkYAML/detail/assert.hpp index 83e9c5f3..7bbfe205 100644 --- a/include/fkYAML/detail/assert.hpp +++ b/include/fkYAML/detail/assert.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/detail/conversions/from_node.hpp b/include/fkYAML/detail/conversions/from_node.hpp index 485b8196..438b04ef 100644 --- a/include/fkYAML/detail/conversions/from_node.hpp +++ b/include/fkYAML/detail/conversions/from_node.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -294,4 +294,4 @@ FK_YAML_INLINE_VAR constexpr const auto& from_node = detail::static_const @@ -280,4 +280,4 @@ inline double from_string(const std::string& s, type_tag /*unused*/) FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ */ diff --git a/include/fkYAML/detail/conversions/to_node.hpp b/include/fkYAML/detail/conversions/to_node.hpp index 660f096d..fd06a5c3 100644 --- a/include/fkYAML/detail/conversions/to_node.hpp +++ b/include/fkYAML/detail/conversions/to_node.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -369,4 +369,4 @@ FK_YAML_INLINE_VAR constexpr const auto& to_node = detail::static_const diff --git a/include/fkYAML/detail/encodings/encode_detector.hpp b/include/fkYAML/detail/encodings/encode_detector.hpp index c07447c9..7ddfafdd 100644 --- a/include/fkYAML/detail/encodings/encode_detector.hpp +++ b/include/fkYAML/detail/encodings/encode_detector.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -12,9 +12,10 @@ #define FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ #include +#include #include -#include +#include #include /// @brief namespace for fkYAML library. @@ -26,63 +27,68 @@ namespace detail /// @brief Detect an encoding type for UTF-8 expected inputs. /// @note This function doesn't support the case where the first character is null. -/// @param b0 The 1st byte of an input character sequence. -/// @param b1 The 2nd byte of an input character sequence. -/// @param b2 The 3rd byte of an input character sequence. -/// @param b3 The 4th byte of an input character sequence. +/// @param[in] bytes 4 bytes of an input character sequence. +/// @param[out] has_bom Whether or not the input contains a BOM. /// @return A detected encoding type. -inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3) noexcept +inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { + has_bom = false; + // Check if a BOM exists. - if (b0 == uint8_t(0xEFu) && b1 == uint8_t(0xBBu) && b2 == uint8_t(0xBFu)) + if (bytes[0] == uint8_t(0xEFu) && bytes[1] == uint8_t(0xBBu) && bytes[2] == uint8_t(0xBFu)) { - return encode_t::UTF_8_BOM; + has_bom = true; + return utf_encode_t::UTF_8; } - if (b0 == 0 && b1 == 0 && b2 == uint8_t(0xFEu) && b3 == uint8_t(0xFFu)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == uint8_t(0xFEu) && bytes[3] == uint8_t(0xFFu)) { - return encode_t::UTF_32BE_BOM; + has_bom = true; + return utf_encode_t::UTF_32BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu) && b2 == 0 && b3 == 0) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu) && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_BOM; + has_bom = true; + return utf_encode_t::UTF_32LE; } - if (b0 == uint8_t(0xFEu) && b1 == uint8_t(0xFFu)) + if (bytes[0] == uint8_t(0xFEu) && bytes[1] == uint8_t(0xFFu)) { - return encode_t::UTF_16BE_BOM; + has_bom = true; + return utf_encode_t::UTF_16BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu)) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu)) { - return encode_t::UTF_16LE_BOM; + has_bom = true; + return utf_encode_t::UTF_16LE; } // Test the first character assuming it's an ASCII character. - if (b0 == 0 && b1 == 0 && b2 == 0 && 0 < b3 && b3 < uint8_t(0x80u)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && 0 < bytes[3] && bytes[3] < uint8_t(0x80u)) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0 && b2 == 0 && b3 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_N; + return utf_encode_t::UTF_32LE; } - if (b0 == 0 && 0 < b1 && b1 < uint8_t(0x80u)) + if (bytes[0] == 0 && 0 < bytes[1] && bytes[1] < uint8_t(0x80u)) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0) { - return encode_t::UTF_16LE_N; + return utf_encode_t::UTF_16LE; } - return encode_t::UTF_8_N; + return utf_encode_t::UTF_8; } /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. @@ -92,9 +98,9 @@ inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t /// @param end The end of input iterators. /// @return A detected encoding type. template ())))> -inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) +inline utf_encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; switch (ElemSize) { case sizeof(char): { // this case covers char8_t as well when compiled with C++20 features. @@ -103,30 +109,34 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i] = uint8_t(begin[i]); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (has_bom) { - case encode_t::UTF_8_BOM: - std::advance(begin, 3); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 2); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 4); - break; - default: - // Do nothing if a BOM doesn't exist. - break; + // skip reading the BOM. + switch (encode_type) + { + case utf_encode_t::UTF_8: + std::advance(begin, 3); + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + std::advance(begin, 2); + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + std::advance(begin, 4); + break; + } } + return encode_type; } case sizeof(char16_t): { if (begin == end) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } for (int i = 0; i < 2 && begin + i != end; i++) { @@ -134,45 +144,47 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i * 2 + 1] = uint8_t(begin[i] & 0xFFu); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char16_t characters must be encoded in the UTF-16 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } case sizeof(char32_t): { if (begin == end) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } + bytes[0] = uint8_t((*begin & 0xFF000000u) >> 24); bytes[1] = uint8_t((*begin & 0x00FF0000u) >> 16); bytes[2] = uint8_t((*begin & 0x0000FF00u) >> 8); bytes[3] = uint8_t(*begin & 0x000000FFu); - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char32_t characters must be encoded in the UTF-32 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } default: @@ -180,9 +192,9 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) } } -inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; for (std::size_t i = 0; i < 4; i++) { char byte = 0; @@ -194,32 +206,36 @@ inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept bytes[i] = uint8_t(byte & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + long offset = 0; + if (has_bom) { - case encode_t::UTF_8_BOM: - fseek(file, 3, SEEK_SET); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - fseek(file, 2, SEEK_SET); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - fseek(file, 4, SEEK_SET); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - fseek(file, 0, SEEK_SET); - break; + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + fseek(file, offset, SEEK_SET); return encode_type; } -inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; for (std::size_t i = 0; i < 4; i++) { char ch = 0; @@ -234,25 +250,29 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept bytes[i] = uint8_t(ch & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + std::streamoff offset = 0; + if (has_bom) { - case encode_t::UTF_8_BOM: - is.seekg(3, std::ios_base::beg); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - is.seekg(2, std::ios_base::beg); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - is.seekg(4, std::ios_base::beg); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - is.seekg(0, std::ios_base::beg); - break; + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + is.seekg(offset, std::ios_base::beg); return encode_type; } @@ -261,4 +281,4 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ */ diff --git a/include/fkYAML/detail/encodings/utf8_encoding.hpp b/include/fkYAML/detail/encodings/utf8_encoding.hpp index 76a4b11d..bf7b54d8 100644 --- a/include/fkYAML/detail/encodings/utf8_encoding.hpp +++ b/include/fkYAML/detail/encodings/utf8_encoding.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -324,4 +324,4 @@ class utf8_encoding FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODING_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODING_HPP_ */ diff --git a/include/fkYAML/detail/encodings/encode_t.hpp b/include/fkYAML/detail/encodings/utf_encode_t.hpp similarity index 53% rename from include/fkYAML/detail/encodings/encode_t.hpp rename to include/fkYAML/detail/encodings/utf_encode_t.hpp index 280f76d3..854da33a 100644 --- a/include/fkYAML/detail/encodings/encode_t.hpp +++ b/include/fkYAML/detail/encodings/utf_encode_t.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -8,8 +8,8 @@ /// /// @file -#ifndef FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ -#define FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ #include @@ -22,22 +22,17 @@ namespace detail /// @brief Definition of Unicode encoding types /// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. -enum class encode_t +enum class utf_encode_t { - UTF_8_N, //!< UTF-8 without BOM - UTF_8_BOM, //!< UTF-8 with BOM - UTF_16BE_N, //!< UTF-16BE without BOM - UTF_16BE_BOM, //!< UTF-16BE with BOM - UTF_16LE_N, //!< UTF-16LE without BOM - UTF_16LE_BOM, //!< UTF-16LE with BOM - UTF_32BE_N, //!< UTF-32BE without BOM - UTF_32BE_BOM, //!< UTF-32BE with BOM - UTF_32LE_N, //!< UTF-32LE without BOM - UTF_32LE_BOM, //!< UTF-32LE with BOM + UTF_8, //!< UTF-8 + UTF_16BE, //!< UTF-16 Big Endian + UTF_16LE, //!< UTF-16 Little Endian + UTF_32BE, //!< UTF-32 Big Endian + UTF_32LE, //!< UTF-32 Little Endian }; } // namespace detail FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ */ diff --git a/include/fkYAML/detail/input/deserializer.hpp b/include/fkYAML/detail/input/deserializer.hpp index 8d122f6e..7bbe3bce 100644 --- a/include/fkYAML/detail/input/deserializer.hpp +++ b/include/fkYAML/detail/input/deserializer.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -62,7 +62,7 @@ class basic_deserializer template ::value, int> = 0> BasicNodeType deserialize(InputAdapterType&& input_adapter) { - lexical_analyzer lexer(std::forward(input_adapter)); + lexical_analyzer lexer(std::forward(input_adapter)); BasicNodeType root = BasicNodeType::mapping(); m_current_node = &root; @@ -128,12 +128,62 @@ class basic_deserializer throw parse_error("A key separator found without key.", cur_line, cur_indent); } - bool is_implicit = m_indent_stack.empty() || cur_indent > m_indent_stack.back().first; - if (is_implicit) + // hold the line count of the key separator for later use. + std::size_t old_indent = cur_indent; + std::size_t old_line = cur_line; + + type = lexer.get_next_token(); + if (type == lexical_token_t::COMMENT_PREFIX) { - break; + // just skip the comment and get the next token. + type = lexer.get_next_token(); + } + + cur_indent = lexer.get_last_token_begin_pos(); + cur_line = lexer.get_lines_processed(); + + bool is_implicit_same_line = + (cur_line == old_line) && (m_indent_stack.empty() || old_indent > m_indent_stack.back().first); + if (is_implicit_same_line) + { + // a key separator for an implicit key with its value on the same line. + continue; + } + + if (cur_line > old_line) + { + switch (type) + { + case lexical_token_t::SEQUENCE_BLOCK_PREFIX: + // a key separator preceeding block sequence entries + *m_current_node = BasicNodeType::sequence(); + set_yaml_version(*m_current_node); + break; + case lexical_token_t::EXPLICIT_KEY_PREFIX: + // a key separator for a explicit block mapping key. + *m_current_node = BasicNodeType::mapping(); + set_yaml_version(*m_current_node); + break; + // defer checking the existence of a key separator after the scalar until a deserialize_scalar() + // call. + case lexical_token_t::NULL_VALUE: + case lexical_token_t::BOOLEAN_VALUE: + case lexical_token_t::INTEGER_VALUE: + case lexical_token_t::FLOAT_NUMBER_VALUE: + case lexical_token_t::STRING_VALUE: + // defer handling these tokens until the next loop. + case lexical_token_t::MAPPING_FLOW_BEGIN: + case lexical_token_t::SEQUENCE_FLOW_BEGIN: + break; + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE + } + + continue; } + // handle explicit mapping key separators. + while (!m_indent_stack.back().second) { m_current_node = m_node_stack.back(); @@ -165,7 +215,6 @@ class basic_deserializer m_node_stack.push_back(m_node_stack.back()); m_indent_stack.back().second = false; - type = lexer.get_next_token(); if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { *m_current_node = BasicNodeType::sequence(); @@ -180,18 +229,16 @@ class basic_deserializer case lexical_token_t::ANCHOR_PREFIX: { m_anchor_name = lexer.get_string(); m_needs_anchor_impl = true; - break; - } - case lexical_token_t::ALIAS_PREFIX: { - const string_type& alias_name = lexer.get_string(); - auto itr = m_anchor_table.find(alias_name); - if (itr == m_anchor_table.end()) - { - throw parse_error( - "The given anchor name must appear prior to the alias node.", cur_line, cur_indent); - } - assign_node_value(BasicNodeType::alias_of(m_anchor_table.at(alias_name))); - break; + + // Skip updating the current indent to avoid stacking a wrong indentation. + // + // &foo bar: baz + // ^ + // the correct indent width for the "bar" node key. + + type = lexer.get_next_token(); + cur_line = lexer.get_lines_processed(); + continue; } case lexical_token_t::COMMENT_PREFIX: break; @@ -254,26 +301,6 @@ class basic_deserializer m_current_node = m_node_stack.back(); m_node_stack.pop_back(); break; - case lexical_token_t::MAPPING_BLOCK_PREFIX: - type = lexer.get_next_token(); - if (type == lexical_token_t::COMMENT_PREFIX) - { - type = lexer.get_next_token(); - } - if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) - { - *m_current_node = BasicNodeType::sequence(); - set_yaml_version(*m_current_node); - cur_indent = lexer.get_last_token_begin_pos(); - cur_line = lexer.get_lines_processed(); - continue; - } - - *m_current_node = BasicNodeType::mapping(); - set_yaml_version(*m_current_node); - cur_indent = lexer.get_last_token_begin_pos(); - cur_line = lexer.get_lines_processed(); - continue; case lexical_token_t::MAPPING_FLOW_BEGIN: *m_current_node = BasicNodeType::mapping(); set_yaml_version(*m_current_node); @@ -281,45 +308,13 @@ class basic_deserializer case lexical_token_t::MAPPING_FLOW_END: m_current_node = m_node_stack.back(); break; - case lexical_token_t::NULL_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_null()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::BOOLEAN_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_boolean()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::INTEGER_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_integer()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::FLOAT_NUMBER_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_float_number()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } + case lexical_token_t::ALIAS_PREFIX: + case lexical_token_t::NULL_VALUE: + case lexical_token_t::BOOLEAN_VALUE: + case lexical_token_t::INTEGER_VALUE: + case lexical_token_t::FLOAT_NUMBER_VALUE: case lexical_token_t::STRING_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_string()), cur_indent, cur_line, type); + bool do_continue = deserialize_scalar(lexer, cur_indent, cur_line, type); if (do_continue) { continue; @@ -333,8 +328,10 @@ class basic_deserializer break; } + lexical_token_t prev_type = type; type = lexer.get_next_token(); - cur_indent = lexer.get_last_token_begin_pos(); + // + cur_indent = (prev_type == lexical_token_t::ANCHOR_PREFIX) ? cur_indent : lexer.get_last_token_begin_pos(); cur_line = lexer.get_lines_processed(); } while (type != lexical_token_t::END_OF_BUFFER); @@ -350,7 +347,7 @@ class basic_deserializer private: /// @brief Add new key string to the current YAML node. /// @param key a key string to be added to the current YAML node. - void add_new_key(const BasicNodeType& key, const std::size_t indent, const std::size_t line) + void add_new_key(BasicNodeType&& key, const std::size_t indent, const std::size_t line) { if (!m_indent_stack.empty() && indent < m_indent_stack.back().first) { @@ -409,27 +406,11 @@ class basic_deserializer if (m_current_node->is_sequence()) { m_current_node->template get_value_ref().emplace_back(std::move(node_value)); - set_yaml_version(m_current_node->template get_value_ref().back()); - if (m_needs_anchor_impl) - { - m_current_node->template get_value_ref().back().add_anchor_name(m_anchor_name); - m_anchor_table[m_anchor_name] = m_current_node->template get_value_ref().back(); - m_needs_anchor_impl = false; - m_anchor_name.clear(); - } return; } // a scalar node *m_current_node = std::move(node_value); - set_yaml_version(*m_current_node); - if (m_needs_anchor_impl) - { - m_current_node->add_anchor_name(m_anchor_name); - m_anchor_table[m_anchor_name] = *m_current_node; - m_needs_anchor_impl = false; - m_anchor_name.clear(); - } if (!m_indent_stack.back().second) { m_current_node = m_node_stack.back(); @@ -437,23 +418,77 @@ class basic_deserializer } } + template + BasicNodeType create_scalar_node(LexerType& lexer, lexical_token_t type, std::size_t indent, std::size_t line) + { + FK_YAML_ASSERT( + type == lexical_token_t::NULL_VALUE || type == lexical_token_t::BOOLEAN_VALUE || + type == lexical_token_t::INTEGER_VALUE || type == lexical_token_t::FLOAT_NUMBER_VALUE || + type == lexical_token_t::STRING_VALUE || type == lexical_token_t::ALIAS_PREFIX); + + BasicNodeType node {}; + switch (type) + { + case lexical_token_t::NULL_VALUE: + node = BasicNodeType(lexer.get_null()); + break; + case lexical_token_t::BOOLEAN_VALUE: + node = BasicNodeType(lexer.get_boolean()); + break; + case lexical_token_t::INTEGER_VALUE: + node = BasicNodeType(lexer.get_integer()); + break; + case lexical_token_t::FLOAT_NUMBER_VALUE: + node = BasicNodeType(lexer.get_float_number()); + break; + case lexical_token_t::STRING_VALUE: + node = BasicNodeType(lexer.get_string()); + break; + case lexical_token_t::ALIAS_PREFIX: { + const string_type& alias_name = lexer.get_string(); + auto itr = m_anchor_table.find(alias_name); + if (itr == m_anchor_table.end()) + { + throw parse_error("The given anchor name must appear prior to the alias node.", line, indent); + } + node = BasicNodeType::alias_of(m_anchor_table[alias_name]); + break; + } + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE + } + + set_yaml_version(node); + + if (m_needs_anchor_impl) + { + node.add_anchor_name(m_anchor_name); + m_anchor_table[m_anchor_name] = node; + m_needs_anchor_impl = false; + m_anchor_name.clear(); + } + + return node; + } + /// @brief Deserialize a detected scalar node. /// @param node A detected scalar node by a lexer. /// @param indent The current indentation width. Can be updated in this function. /// @param line The number of processed lines. Can be updated in this function. /// @return true if next token has already been got, false otherwise. template - bool deserialize_scalar( - LexerType& lexer, BasicNodeType&& node, std::size_t& indent, std::size_t& line, lexical_token_t& type) + bool deserialize_scalar(LexerType& lexer, std::size_t& indent, std::size_t& line, lexical_token_t& type) { + BasicNodeType node = create_scalar_node(lexer, type, indent, line); + if (m_current_node->is_mapping()) { - add_new_key(node, indent, line); + add_new_key(std::move(node), indent, line); return false; } type = lexer.get_next_token(); - if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX) + if (type == lexical_token_t::KEY_SEPARATOR) { if (m_current_node->is_scalar()) { @@ -470,8 +505,9 @@ class basic_deserializer return true; } *m_current_node = BasicNodeType::mapping(); + set_yaml_version(*m_current_node); } - add_new_key(node, indent, line); + add_new_key(std::move(node), indent, line); } else { diff --git a/include/fkYAML/detail/input/input_adapter.hpp b/include/fkYAML/detail/input/input_adapter.hpp index ad63fdc3..be112db3 100644 --- a/include/fkYAML/detail/input/input_adapter.hpp +++ b/include/fkYAML/detail/input/input_adapter.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -11,6 +11,7 @@ #ifndef FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ #define FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ +#include #include #include #include @@ -18,8 +19,9 @@ #include #include +#include #include -#include +#include #include #include #include @@ -45,9 +47,6 @@ class iterator_input_adapter< IterType, enable_if_t::value_type>, char>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -55,7 +54,7 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) @@ -71,150 +70,158 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; + buffer.clear(); + switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + /// @brief The concrete implementation of fill_buffer() for UTF-8 encoded inputs. + /// @param buffer A buffer to be filled with the input. + void fill_buffer_utf8(std::string& buffer) { - if (m_current != m_end) - { - auto ret = std::char_traits::to_int_type(*m_current); - ++m_current; - return ret; - } - return std::char_traits::eof(); - } + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() - { - if (m_utf8_buf_index == m_utf8_buf_size) + IterType current = m_current; + while (current != m_end) { - if (m_current == m_end) + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) { - if (m_encoded_buf_size == 0) + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - return std::char_traits::eof(); + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } } - - while (m_current != m_end && m_encoded_buf_size < 2) + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) { - switch (m_encode_type) + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(*m_current) << 8); - ++m_current; - m_encoded_buffer[m_encoded_buf_size] |= char16_t(*m_current); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(*m_current); - ++m_current; - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(*m_current) << 8); - break; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - ++m_current; - ++m_encoded_buf_size; + } + } + + buffer.assign(m_current, m_end); + } + + /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. + /// @param buffer A buffer to be filled with the input. + void fill_buffer_utf16(std::string& buffer) + { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) + { + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } + + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (m_current != m_end || encoded_buf_size != 0) + { + while (m_current != m_end && encoded_buf_size < 2) + { + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(*m_current++) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(*m_current++) << shift_bits[1]); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) { - if (m_current == m_end) - { - return std::char_traits::eof(); - } + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(*m_current << 24); - ++m_current; - utf32 |= char32_t(*m_current << 16); - ++m_current; - utf32 |= char32_t(*m_current << 8); - ++m_current; - utf32 |= char32_t(*m_current); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(*m_current); - ++m_current; - utf32 |= char32_t(*m_current << 8); - ++m_current; - utf32 |= char32_t(*m_current << 16); - ++m_current; - utf32 |= char32_t(*m_current << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - ++m_current; - m_utf8_buf_index = 0; - } + while (m_current != m_end) + { + char32_t utf32 = char32_t(*m_current++ << shift_bits[0]); + utf32 |= char32_t(*m_current++ << shift_bits[1]); + utf32 |= char32_t(*m_current++ << shift_bits[2]); + utf32 |= char32_t(*m_current++ << shift_bits[3]); + + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: @@ -223,17 +230,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #ifdef FK_YAML_HAS_CHAR8_T @@ -246,9 +243,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char8_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -256,11 +250,14 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + // char8_t characters must be encoded in the UTF-8 format. + // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); } // allow only move construct/assignment like other input adapters. @@ -272,35 +269,57 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; - switch (m_encode_type) + IterType current = m_current; + while (current != m_end) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); - break; - default: // LCOV_EXCL_LINE - // char8_t characters must be encoded in the UTF-8 format. - // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. - break; // LCOV_EXCL_LINE + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } } - return ret; - } -private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept - { - if (m_current != m_end) + while (m_current != m_end) { - auto ret = std::char_traits::to_int_type(*m_current); - ++m_current; - return ret; + buffer.push_back(char(*m_current++)); } - return std::char_traits::eof(); } private: @@ -309,7 +328,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #endif // defined(FK_YAML_HAS_CHAR8_T) @@ -322,9 +341,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char16_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -332,11 +348,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); } // allow only move construct/assignment like other input adapters. @@ -348,57 +365,36 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + int shift_bits = (m_encode_type == utf_encode_t::UTF_16BE) ? 0 : 8; + + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (m_current != m_end || encoded_buf_size != 0) { - if (m_current == m_end) + while (m_current != m_end && encoded_buf_size < 2) { - if (m_encoded_buf_size == 0) - { - return std::char_traits::eof(); - } - } - - while (m_current != m_end && m_encoded_buf_size < 2) - { - switch (m_encode_type) - { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = *m_current; - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - char16_t tmp = *m_current; - m_encoded_buffer[m_encoded_buf_size] = char16_t((tmp & 0x00FFu) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t((tmp & 0xFF00u) >> 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } - ++m_current; - ++m_encoded_buf_size; + char16_t tmp = *m_current++; + encoded_buffer[encoded_buf_size] = char16_t((tmp & 0x00FFu) << shift_bits); + encoded_buffer[encoded_buf_size++] |= char16_t((tmp & 0xFF00u) >> shift_bits); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } private: @@ -407,17 +403,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_16BE_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; }; /// @brief An input adapter for iterators of type char32_t. @@ -428,9 +414,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char32_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -438,11 +421,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); } // allow only move construct/assignment like other input adapters. @@ -454,44 +438,32 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32LE) { - if (m_current == m_end) - { - return std::char_traits::eof(); - } + shift_bits[0] = 24; + shift_bits[1] = 8; + shift_bits[2] = 8; + shift_bits[3] = 24; + } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = *m_current; - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - char32_t tmp = *m_current; - utf32 |= char32_t((tmp & 0xFF000000u) >> 24); - utf32 |= char32_t((tmp & 0x00FF0000u) >> 8); - utf32 |= char32_t((tmp & 0x0000FF00u) << 8); - utf32 |= char32_t((tmp & 0x000000FFu) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - ++m_current; - m_utf8_buf_index = 0; - } + while (m_current != m_end) + { + char32_t tmp = *m_current++; + char32_t utf32 = char32_t((tmp & 0xFF000000u) >> shift_bits[0]); + utf32 |= char32_t((tmp & 0x00FF0000u) >> shift_bits[1]); + utf32 |= char32_t((tmp & 0x0000FF00u) << shift_bits[2]); + utf32 |= char32_t((tmp & 0x000000FFu) << shift_bits[3]); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); + + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: @@ -500,22 +472,13 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_32BE_N}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; }; /// @brief An input adapter for C-style file handles. class file_input_adapter { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new file_input_adapter object. file_input_adapter() = default; @@ -525,7 +488,7 @@ class file_input_adapter /// It's user's responsibility to call those functions. /// @param file A file handle for this adapter. (A non-null pointer is assumed.) /// @param encode_type The encoding type for this input adapter. - explicit file_input_adapter(std::FILE* file, encode_t encode_type) noexcept + explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept : m_file(file), m_encode_type(encode_type) { @@ -540,173 +503,189 @@ class file_input_adapter /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + void fill_buffer_utf8(std::string& buffer) { - char ch = 0; - size_t size = std::fread(&ch, sizeof(char), 1, m_file); - if (size == 1) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + + char tmp_buf[256] {}; + std::size_t read_size = 0; + while ((read_size = std::fread(&tmp_buf[0], sizeof(char), sizeof(tmp_buf) / sizeof(tmp_buf[0]), m_file)) > 0) { - return std::char_traits::to_int_type(ch); + buffer.append(tmp_buf, read_size); } - return std::char_traits::eof(); - } - /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() - { - if (m_utf8_buf_index == m_utf8_buf_size) + auto current = buffer.begin(); + auto end = buffer.end(); + while (current != end) { - char chars[2] = {0, 0}; - while (m_encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) { - switch (m_encode_type) + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - - ++m_encoded_buf_size; } + } + } - if (m_encoded_buf_size == 0) + /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. + /// @return A UTF-8 encoded byte at the current position, or EOF. + void fill_buffer_utf16(std::string& buffer) + { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) + { + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } + + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (std::feof(m_file) == 0) + { + while (encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { - return std::char_traits::eof(); + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(chars[0]) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(chars[1]) << shift_bits[1]); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) + { + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } + + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (std::feof(m_file) == 0) { - char chars[4] = {0, 0, 0, 0}; std::size_t size = std::fread(&chars[0], sizeof(char), 4, m_file); if (size != 4) { - return std::char_traits::eof(); + return; } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(uint8_t(chars[0]) << 24); - utf32 |= char32_t(uint8_t(chars[1]) << 16); - utf32 |= char32_t(uint8_t(chars[2]) << 8); - utf32 |= char32_t(uint8_t(chars[3])); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(uint8_t(chars[0])); - utf32 |= char32_t(uint8_t(chars[1]) << 8); - utf32 |= char32_t(uint8_t(chars[2]) << 16); - utf32 |= char32_t(uint8_t(chars[3]) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + char32_t utf32 = char32_t(uint8_t(chars[0]) << shift_bits[0]); + utf32 |= char32_t(uint8_t(chars[1]) << shift_bits[1]); + utf32 |= char32_t(uint8_t(chars[2]) << shift_bits[2]); + utf32 |= char32_t(uint8_t(chars[3]) << shift_bits[3]); - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - m_utf8_buf_index = 0; - } + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: /// A pointer to the input file handle. std::FILE* m_file {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; /// @brief An input adapter for streams class stream_input_adapter { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new stream_input_adapter object. stream_input_adapter() = default; /// @brief Construct a new stream_input_adapter object. /// @param is A reference to the target input stream. - explicit stream_input_adapter(std::istream& is, encode_t encode_type) noexcept + explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept : m_istream(&is), m_encode_type(encode_type) { @@ -721,165 +700,186 @@ class stream_input_adapter /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + void fill_buffer_utf8(std::string& buffer) { - return m_istream->get(); + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + + char tmp_buf[256] {}; + do + { + m_istream->read(&tmp_buf[0], 256); + std::size_t read_size = m_istream->gcount(); + buffer.append(tmp_buf, read_size); + } while (!m_istream->eof()); + + auto current = buffer.begin(); + auto end = buffer.end(); + while (current != end) + { + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + } } /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() + void fill_buffer_utf16(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) { - while (m_encoded_buf_size < 2) + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } + + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + do + { + while (encoded_buf_size < 2) { - char chars[2] = {0, 0}; m_istream->read(&chars[0], 2); std::streamsize size = m_istream->gcount(); if (size != 2) { - if (m_encoded_buf_size == 0) - { - return std::char_traits::eof(); - } - break; - } - - switch (m_encode_type) - { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); break; } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } - ++m_encoded_buf_size; + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(chars[0]) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(chars[1]) << shift_bits[1]); }; std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; - } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } while (!m_istream->eof()); } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) { - char ch = 0; - m_istream->read(&ch, 1); + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } + + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + do + { + m_istream->read(&chars[0], 4); std::streamsize size = m_istream->gcount(); - if (size != 1) + if (size != 4) { - return std::char_traits::eof(); + return; } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(ch << 24); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 16); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 8); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(ch); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 8); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 16); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + char32_t utf32 = char32_t(uint8_t(chars[0]) << shift_bits[0]); + utf32 |= char32_t(uint8_t(chars[1]) << shift_bits[1]); + utf32 |= char32_t(uint8_t(chars[2]) << shift_bits[2]); + utf32 |= char32_t(uint8_t(chars[3]) << shift_bits[3]); - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - m_utf8_buf_index = 0; - } + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } while (!m_istream->eof()); } private: /// A pointer to the input stream object. std::istream* m_istream {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; ///////////////////////////////// @@ -894,7 +894,7 @@ class stream_input_adapter template ())))> inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { - encode_t encode_type = detect_encoding_and_skip_bom(begin, end); + utf_encode_t encode_type = detect_encoding_and_skip_bom(begin, end); return iterator_input_adapter(begin, end, encode_type); } @@ -909,7 +909,7 @@ inline auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array } /// @brief A namespace to implement container_input_adapter_factory for internal use. -namespace container_input_adapter_factory_impl +namespace input_adapter_factory { using std::begin; @@ -934,25 +934,25 @@ struct container_input_adapter_factory< decltype(input_adapter(begin(std::declval()), end(std::declval()))); /// @brief A factory method of input adapter objects for the target container objects. - /// @param container - /// @return adapter_type + /// @param container A container-like input object. + /// @return adapter_type An iterator_input_adapter object. static adapter_type create(const ContainerType& container) { return input_adapter(begin(container), end(container)); } }; -} // namespace container_input_adapter_factory_impl +} // namespace input_adapter_factory /// @brief A factory method for iterator_input_adapter objects with containers. /// @tparam ContainerType A container type. /// @param container A container object. -/// @return container_input_adapter_factory_impl::container_input_adapter_factory::adapter_type +/// @return input_adapter_factory::container_input_adapter_factory::adapter_type template -inline typename container_input_adapter_factory_impl::container_input_adapter_factory::adapter_type -input_adapter(ContainerType&& container) +inline typename input_adapter_factory::container_input_adapter_factory::adapter_type input_adapter( + ContainerType&& container) { - return container_input_adapter_factory_impl::container_input_adapter_factory::create(container); + return input_adapter_factory::container_input_adapter_factory::create(container); } /// @brief A factory method for file_input_adapter objects with C-style file handles. @@ -964,16 +964,16 @@ inline file_input_adapter input_adapter(std::FILE* file) { throw fkyaml::exception("Invalid FILE object pointer."); } - encode_t encode_type = detect_encoding_and_skip_bom(file); + utf_encode_t encode_type = detect_encoding_and_skip_bom(file); return file_input_adapter(file, encode_type); } -/// @brief -/// @param stream -/// @return stream_input_adapter +/// @brief A factory method for stream_input_adapter objects with std::istream objects. +/// @param stream An input stream. +/// @return stream_input_adapter A stream_input_adapter object. inline stream_input_adapter input_adapter(std::istream& stream) noexcept { - encode_t encode_type = detect_encoding_and_skip_bom(stream); + utf_encode_t encode_type = detect_encoding_and_skip_bom(stream); return stream_input_adapter(stream, encode_type); } @@ -981,4 +981,4 @@ inline stream_input_adapter input_adapter(std::istream& stream) noexcept FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ */ diff --git a/include/fkYAML/detail/input/input_handler.hpp b/include/fkYAML/detail/input/input_handler.hpp index 562f387a..549842f0 100644 --- a/include/fkYAML/detail/input/input_handler.hpp +++ b/include/fkYAML/detail/input/input_handler.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -27,21 +27,12 @@ namespace detail { /// @brief An input buffer handler. -/// @tparam InputAdapterType The type of the input adapter. -template ::value, int> = 0> class input_handler { -public: +private: /// The type of character traits of the input buffer. - using char_traits_type = std::char_traits; - /// The type of characters of the input buffer. - using char_type = typename char_traits_type::char_type; - /// The type of integers for the input buffer. - using int_type = typename char_traits_type::int_type; - /// The type of strings of the input buffer. - using string_type = std::basic_string; + using char_traits_type = std::char_traits; -private: /// @brief A set of information on the current position in an input buffer. struct position { @@ -55,102 +46,113 @@ class input_handler public: /// @brief Construct a new input_handler object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object + template ::value, int> = 0> explicit input_handler(InputAdapterType&& input_adapter) - : m_input_adapter(std::move(input_adapter)) + : m_buffer_size(0) { - get_next(); - m_position.cur_pos = m_position.cur_pos_in_line = m_position.lines_read = 0; + input_adapter.fill_buffer(m_buffer); + m_buffer_size = m_buffer.size(); } /// @brief Get the character at the current position. - /// @return int_type A character or EOF. - int_type get_current() const noexcept + /// @return int A character or EOF. + int get_current() const noexcept { - return m_cache[m_position.cur_pos]; + if (m_position.cur_pos == m_buffer_size) + { + return s_end_of_input; + } + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos]); } /// @brief Get the character at next position. - /// @return int_type A character or EOF. - int_type get_next() + /// @return int A character or EOF. + int get_next() { - int_type ret = end_of_input; - - // if already cached, return the cached value. - if (m_position.cur_pos + 1 < m_cache.size()) + // if all the input has already been consumed, return the EOF. + if (m_position.cur_pos == m_buffer_size - 1) { - ret = m_cache[++m_position.cur_pos]; - ++m_position.cur_pos_in_line; + m_position.cur_pos++; + m_position.cur_pos_in_line++; + return s_end_of_input; } - else + + if (m_position.cur_pos == m_buffer_size) { - ret = m_input_adapter.get_character(); - if (ret != end_of_input || m_cache[m_position.cur_pos] != end_of_input) - { - // cache the return value for possible later use. - m_cache.push_back(ret); - ++m_position.cur_pos; - ++m_position.cur_pos_in_line; - } + return s_end_of_input; } - if (m_cache[m_position.cur_pos - 1] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { m_position.cur_pos_in_line = 0; ++m_position.lines_read; } + else + { + m_position.cur_pos_in_line++; + } - return ret; + return char_traits_type::to_int_type(m_buffer[++m_position.cur_pos]); } /// @brief Get the characters in the given range. /// @param length The length of characters retrieved from the current position. /// @param str A string which will contain the resulting characters. - /// @return int_type 0 (for success) or EOF (for error). - int_type get_range(std::size_t length, string_type& str) + /// @return int 0 (for success) or EOF (for error). + int get_range(std::size_t length, std::string& str) { str.clear(); - if (get_current() == end_of_input) + if (length == 0) { - return end_of_input; + // regard this case as successful in getting zero characters. + return 0; } - str += char_traits_type::to_char_type(get_current()); + if (m_position.cur_pos + length - 1 >= m_buffer_size) + { + return s_end_of_input; + } + + str += m_buffer[m_position.cur_pos]; for (std::size_t i = 1; i < length; i++) { - if (get_next() == end_of_input) - { - // m_cur_pos -= i; - for (std::size_t j = i; j > 0; j--) - { - unget(); - } - str.clear(); - return end_of_input; - } - str += char_traits_type::to_char_type(get_current()); + str += char_traits_type::to_char_type(get_next()); } return 0; } + /// @brief Get the next character without changing the current position. + /// @return int A character if not already at the end of the input buffer, an EOF otherwise. + int peek_next() + { + if (m_position.cur_pos >= m_buffer_size - 1) + { + // there is no input character left. + return s_end_of_input; + } + + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos + 1]); + } + /// @brief Move backward the current position. void unget() { if (m_position.cur_pos > 0) { - // just move back the cursor. (no action for adapter) --m_position.cur_pos; --m_position.cur_pos_in_line; - if (m_cache[m_position.cur_pos] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { --m_position.lines_read; m_position.cur_pos_in_line = 0; if (m_position.cur_pos > 0) { - for (std::size_t i = m_position.cur_pos - 1; m_cache[i] != '\n'; i--) + for (std::size_t i = m_position.cur_pos - 1; m_buffer[i] != '\n'; i--) { if (i == 0) { @@ -168,33 +170,11 @@ class input_handler /// @param length The length of moving backward. void unget_range(std::size_t length) { - for (std::size_t i = 0; i < length; i++) - { - unget(); - } - } - - /// @brief Check if the next character is the expected one. - /// @param expected An expected next character. - /// @return true The next character is the expected one. - /// @return false The next character is not the expected one. - bool test_next_char(char_type expected) - { - if (get_current() == end_of_input) - { - return false; - } - - int_type next = get_next(); - if (next == end_of_input) + size_t unget_num = (m_position.cur_pos < length) ? m_position.cur_pos : length; + for (std::size_t i = 0; i < unget_num; i++) { unget(); - return false; } - - bool ret = char_traits_type::eq(char_traits_type::to_char_type(next), expected); - unget(); - return ret; } /// @brief Get the current position in the current line. @@ -213,12 +193,12 @@ class input_handler private: /// The value of EOF for the target character type. - static constexpr int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); - /// An input adapter object. - InputAdapterType m_input_adapter {}; - /// Cached characters retrieved from an input adapter object. - std::vector m_cache {}; + /// The input buffer retrieved from an input adapter object. + std::string m_buffer {}; + /// The size of the buffer. + std::size_t m_buffer_size {0}; /// The current position in an input buffer. position m_position {}; }; @@ -227,4 +207,4 @@ class input_handler FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ */ diff --git a/include/fkYAML/detail/input/lexical_analyzer.hpp b/include/fkYAML/detail/input/lexical_analyzer.hpp index ddcf0407..d966c7b9 100644 --- a/include/fkYAML/detail/input/lexical_analyzer.hpp +++ b/include/fkYAML/detail/input/lexical_analyzer.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -45,17 +45,11 @@ namespace detail /// @brief A class which lexically analizes YAML formatted inputs. /// @tparam BasicNodeType A type of the container for YAML values. -template < - typename BasicNodeType, typename InputAdapterType, - enable_if_t, is_input_adapter>::value, int> = 0> +template ::value, int> = 0> class lexical_analyzer { private: - using input_handler_type = input_handler; - using char_traits_type = typename input_handler_type::char_traits_type; - using char_type = typename char_traits_type::char_type; - using char_int_type = typename char_traits_type::int_type; - using input_string_type = typename input_handler_type::string_type; + using char_traits_type = typename std::char_traits; enum class block_style_indicator_t { @@ -77,7 +71,9 @@ class lexical_analyzer using string_type = typename BasicNodeType::string_type; /// @brief Construct a new lexical_analyzer object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object. + template ::value, int> = 0> explicit lexical_analyzer(InputAdapterType&& input_adapter) : m_input_handler(std::move(input_adapter)) { @@ -89,8 +85,9 @@ class lexical_analyzer { skip_white_spaces_and_newline_codes(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); m_last_token_begin_pos = m_input_handler.get_cur_pos_in_line(); + m_last_token_begin_line = m_input_handler.get_lines_read(); if (0x00 <= current && current <= 0x7F && isdigit(current)) { @@ -99,7 +96,7 @@ class lexical_analyzer switch (current) { - case end_of_input: // end of input buffer + case s_end_of_input: // end of input buffer return m_last_token_type = lexical_token_t::END_OF_BUFFER; case '?': switch (m_input_handler.get_next()) @@ -110,74 +107,56 @@ class lexical_analyzer m_value_buffer = "?"; return m_last_token_type = scan_string(false); } - case ':': // key separater - switch (m_input_handler.get_next()) + case ':': { // key separater + current = m_input_handler.get_next(); + switch (current) { - case ' ': { - size_t prev_pos = m_input_handler.get_lines_read(); - skip_white_spaces_and_comments(); - size_t cur_pos = m_input_handler.get_lines_read(); - if (prev_pos == cur_pos) - { - current = m_input_handler.get_current(); - if (current != '\r' && current != '\n') - { - return m_last_token_type = lexical_token_t::KEY_SEPARATOR; - } - } - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - } - case '\r': { - char_int_type next = m_input_handler.get_next(); - if (next == '\n') + case ' ': + case '\t': + case '\r': + case '\n': + case s_end_of_input: + break; + case ',': + case '[': + case ']': + case '{': + case '}': + if (m_flow_context_depth > 0) { - m_input_handler.get_next(); + // the above characters are not "safe" to be followed in a flow context. + // See https://yaml.org/spec/1.2.2/#733-plain-style for more details. + break; } - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - } - case '\n': - m_input_handler.get_next(); - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; + m_value_buffer = ":"; + return scan_string(false); default: - emit_error("Half-width spaces or newline codes are required after a key separater(:)."); + m_value_buffer = ":"; + return scan_string(false); } + + return m_last_token_type = lexical_token_t::KEY_SEPARATOR; + } case ',': // value separater m_input_handler.get_next(); return m_last_token_type = lexical_token_t::VALUE_SEPARATOR; case '&': { // anchor prefix - m_value_buffer.clear(); - while (true) + extract_anchor_name(); + bool is_empty = m_value_buffer.empty(); + if (is_empty) { - char_int_type next = m_input_handler.get_next(); - if (next == end_of_input || next == '\r' || next == '\n') - { - emit_error("An anchor label must be followed by some value."); - } - if (next == ' ') - { - m_input_handler.get_next(); - break; - } - m_value_buffer.push_back(char_traits_type::to_char_type(next)); + emit_error("anchor name must not be empty."); } return m_last_token_type = lexical_token_t::ANCHOR_PREFIX; } case '*': { // alias prefix - m_value_buffer.clear(); - while (true) + extract_anchor_name(); + bool is_empty = m_value_buffer.empty(); + if (is_empty) { - char_int_type next = m_input_handler.get_next(); - if (next == ' ' || next == '\r' || next == '\n' || next == end_of_input) - { - if (m_value_buffer.empty()) - { - emit_error("An alias prefix must be followed by some anchor name."); - } - m_input_handler.get_next(); - break; - } - m_value_buffer.push_back(char_traits_type::to_char_type(next)); + emit_error("anchor name must not be empty."); } + return m_last_token_type = lexical_token_t::ALIAS_PREFIX; } case '#': // comment prefix @@ -186,7 +165,7 @@ class lexical_analyzer case '%': // directive prefix return m_last_token_type = scan_directive(); case '-': { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ') { // Move a cursor to the beginning of the next token. @@ -200,8 +179,8 @@ class lexical_analyzer return m_last_token_type = scan_number(); } - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); - if (ret != end_of_input) + int ret = m_input_handler.get_range(3, m_value_buffer); + if (ret != s_end_of_input) { if (m_value_buffer == "---") { @@ -209,10 +188,10 @@ class lexical_analyzer return m_last_token_type = lexical_token_t::END_OF_DIRECTIVES; } - m_input_handler.unget_range(2); + m_input_handler.get_next(); } - return m_last_token_type = scan_string(); + return m_last_token_type = scan_string(ret == s_end_of_input); } case '[': // sequence flow begin m_flow_context_depth++; @@ -248,8 +227,8 @@ class lexical_analyzer case '+': return m_last_token_type = scan_number(); case '.': { - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); - if (ret != end_of_input) + int ret = m_input_handler.get_range(3, m_value_buffer); + if (ret != s_end_of_input) { if (m_value_buffer == "...") { @@ -293,7 +272,7 @@ class lexical_analyzer /// @return std::size_t The number of lines already processed. std::size_t get_lines_processed() const noexcept { - return m_input_handler.get_lines_read(); + return m_last_token_begin_line; } /// @brief Convert from string to null and get the converted value. @@ -346,7 +325,7 @@ class lexical_analyzer const string_type& get_string() const noexcept { // TODO: Provide support for different string types between nodes & inputs. - static_assert(std::is_same::value, "Unsupported, different string types."); + static_assert(std::is_same::value, "Unsupported, different string types."); return m_value_buffer; } @@ -364,7 +343,7 @@ class lexical_analyzer /// @brief A utility function to convert a hexadecimal character to an integer. /// @param source A hexadecimal character ('0'~'9', 'A'~'F', 'a'~'f') /// @return char A integer converted from @a source. - char convert_hex_char_to_byte(char_int_type source) const + char convert_hex_char_to_byte(int source) const { if ('0' <= source && source <= '9') { @@ -406,7 +385,7 @@ class lexical_analyzer switch (m_input_handler.get_next()) { - case end_of_input: + case s_end_of_input: emit_error("invalid eof in a directive."); case 'T': { if (m_input_handler.get_next() != 'A' || m_input_handler.get_next() != 'G') @@ -486,6 +465,48 @@ class lexical_analyzer return lexical_token_t::YAML_VER_DIRECTIVE; } + /// @brief Extracts an anchor name from the input and assigns the result to `m_value_buffer`. + void extract_anchor_name() + { + int current = m_input_handler.get_current(); + FK_YAML_ASSERT(current == '&' || current == '*'); + + m_value_buffer.clear(); + + while ((current = m_input_handler.get_next()) != s_end_of_input) + { + switch (current) + { + case s_end_of_input: + // anchor name must not contain white spaces, newline codes and flow indicators. + // See https://yaml.org/spec/1.2.2/#692-node-anchors for more details. + case ' ': + case '\t': + case '\r': + case '\n': + case '{': + case '}': + case '[': + case ']': + case ',': + return; + case ':': { + int peeked = m_input_handler.peek_next(); + if (peeked == ' ') + { + // Stop the extraction at the key separator. + return; + } + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + break; + } + default: + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + break; + } + } + } + /// @brief Scan and determine a number type(integer/float). This method is the entrypoint for all number /// tokens. /// @return lexical_token_t A lexical token type for a determined number type. @@ -493,7 +514,7 @@ class lexical_analyzer { m_value_buffer.clear(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); FK_YAML_ASSERT(std::isdigit(current) || current == '-' || current == '+'); lexical_token_t ret = lexical_token_t::END_OF_BUFFER; @@ -545,7 +566,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for either integer or float numbers. lexical_token_t scan_negative_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); // The value of `next` must be guranteed to be a digit in the get_next_token() function. FK_YAML_ASSERT(std::isdigit(next)); @@ -557,16 +578,16 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float). lexical_token_t scan_number_after_zero_at_first() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); switch (next) { case '.': m_value_buffer.push_back(char_traits_type::to_char_type(next)); return scan_decimal_number_after_decimal_point(); case 'o': - // Do not store 'o' since std::strtoull does not support "0o" but "0" as the prefix for octal numbers. + // Do not store 'o' since std::stoXXX does not support "0o" but "0" as the prefix for octal numbers. // YAML specifies octal values start with the prefix "0o". - // See "10.3.2 Tag Resolution" section in https://yaml.org/spec/1.2.2/ + // See https://yaml.org/spec/1.2.2/#1032-tag-resolution for more details. return scan_octal_number(); case 'x': m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -580,7 +601,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_decimal_point() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -596,7 +617,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_exponent() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == '+' || next == '-') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -618,7 +639,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number_after_sign() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -633,7 +654,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -666,7 +687,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_octal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if ('0' <= next && next <= '7') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -679,7 +700,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_hexadecimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isxdigit(next)) { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -765,12 +786,12 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for strings. lexical_token_t extract_string_token(bool needs_last_single_quote, bool needs_last_double_quote) { - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { // Handle the end of input buffer. - if (current == end_of_input) + if (current == s_end_of_input) { if (needs_last_double_quote) { @@ -790,25 +811,44 @@ class lexical_analyzer if (!needs_last_double_quote && !needs_last_single_quote) { // Allow a space in an unquoted string only if the space is surrounded by non-space characters. - // See "7.3.3 Plain Style" section in https://yaml.org/spec/1.2.2/ - current = m_input_handler.get_next(); - switch (current) + // See https://yaml.org/spec/1.2.2/#733-plain-style for more details. + int next = m_input_handler.get_next(); + + // These characters are permitted when not inside a flow collection, and not inside an implicit key. + // TODO: Support detection of implicit key context for this check. + if (m_flow_context_depth > 0) + { + switch (next) + { + case '{': + case '}': + case '[': + case ']': + case ',': + return lexical_token_t::STRING_VALUE; + } + } + + // " :" is permitted in a plain style string token, but not when followed by a space. + if (next == ':') + { + int peeked = m_input_handler.peek_next(); + if (peeked == ' ') + { + return lexical_token_t::STRING_VALUE; + } + } + + switch (next) { case ' ': case '\r': case '\n': - case '{': - case '}': - case '[': - case ']': - case ',': - case ':': case '#': case '\\': return lexical_token_t::STRING_VALUE; } m_input_handler.unget(); - current = m_input_handler.get_current(); } m_value_buffer.push_back(char_traits_type::to_char_type(current)); continue; @@ -855,8 +895,7 @@ class lexical_analyzer continue; } - char_int_type next = m_input_handler.get_next(); - m_input_handler.unget(); + int next = m_input_handler.peek_next(); // A colon as a key separator must be followed by a space or a newline code. if (next != ' ' && next != '\r' && next != '\n') @@ -920,7 +959,7 @@ class lexical_analyzer } // Handle escaped characters. - // See "5.7 Escaped Characters" section in https://yaml.org/spec/1.2.2/ + // See https://yaml.org/spec/1.2.2/#57-escaped-characters for more details. if (current == '\\') { if (!needs_last_double_quote) @@ -953,7 +992,7 @@ class lexical_analyzer m_value_buffer.push_back('\r'); break; case 'e': - m_value_buffer.push_back(char_type(0x1B)); + m_value_buffer.push_back(char(0x1B)); break; case ' ': m_value_buffer.push_back(' '); @@ -969,31 +1008,19 @@ class lexical_analyzer break; case 'N': // next line utf8_encoding::from_utf32(0x85u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case '_': // non-breaking space utf8_encoding::from_utf32(0xA0u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'L': // line separator utf8_encoding::from_utf32(0x2028u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'P': // paragraph separator utf8_encoding::from_utf32(0x2029u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'x': handle_escaped_unicode(1); @@ -1017,6 +1044,8 @@ class lexical_analyzer continue; } + // The other characters are already checked while creating an input handler. + // Handle ASCII characters except control characters. if (current <= 0x7E) { @@ -1027,46 +1056,26 @@ class lexical_analyzer // Handle 2-byte characters encoded in UTF-8. (U+0080..U+07FF) if (current <= 0xDF) { - std::array byte_array = {{current, m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); continue; } // Handle 3-byte characters encoded in UTF-8. (U+1000..U+D7FF,U+E000..U+FFFF) if (current <= 0xEF) { - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[2])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); continue; } // Handle 4-byte characters encoded in UTF-8. (U+10000..U+FFFFF,U+100000..U+10FFFF) - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next(), m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[2])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[3])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); } } @@ -1076,7 +1085,7 @@ class lexical_analyzer m_value_buffer.clear(); // Handle leading all-space lines. - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { if (current == ' ') @@ -1094,7 +1103,7 @@ class lexical_analyzer continue; } - if (current == end_of_input) + if (current == s_end_of_input) { if (chomp != chomping_indicator_t::KEEP) { @@ -1136,7 +1145,7 @@ class lexical_analyzer } } - for (; current != end_of_input; current = m_input_handler.get_next()) + for (; current != s_end_of_input; current = m_input_handler.get_next()) { if (current == '\r') { @@ -1206,24 +1215,24 @@ class lexical_analyzer } else { - switch (m_input_handler.get_next()) + switch (int next = m_input_handler.peek_next()) { case '\r': { m_input_handler.get_next(); - FK_YAML_ASSERT(m_input_handler.get_current() == '\n'); - m_value_buffer.push_back(char_traits_type::to_char_type('\n')); + next = m_input_handler.get_next(); + FK_YAML_ASSERT(next == '\n'); + m_value_buffer.push_back(char_traits_type::to_char_type(next)); break; } case '\n': - m_value_buffer.push_back(char_traits_type::to_char_type('\n')); + m_input_handler.get_next(); + m_value_buffer.push_back(char_traits_type::to_char_type(next)); break; case ' ': // The next line is more indented, so a newline will be appended in the next loop. - m_input_handler.unget(); break; default: m_value_buffer.push_back(char_traits_type::to_char_type(' ')); - m_input_handler.unget(); break; } } @@ -1302,7 +1311,7 @@ class lexical_analyzer /// @brief Handle unescaped control characters. /// @param c A target character. - void handle_unescaped_control_char(char_int_type c) + void handle_unescaped_control_char(int c) { FK_YAML_ASSERT(0x00 <= c && c <= 0x1F); @@ -1386,26 +1395,26 @@ class lexical_analyzer // Treats the code point as a UTF-32 encoded character. utf8_encoding::from_utf32(code_point, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); } void get_block_style_metadata(chomping_indicator_t& chomp_type, std::size_t& indent) { - char_int_type ch = m_input_handler.get_next(); + int ch = m_input_handler.get_next(); chomp_type = chomping_indicator_t::CLIP; - if (ch == '-') + switch (ch) { + case '-': chomp_type = chomping_indicator_t::STRIP; ch = m_input_handler.get_next(); - } - else if (ch == '+') - { + break; + case '+': chomp_type = chomping_indicator_t::KEEP; ch = m_input_handler.get_next(); + break; + default: + break; } if (ch == '0') @@ -1437,7 +1446,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip white spaces and newline codes (CR/LF) from the current position. @@ -1455,7 +1464,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip white spaces and comments from the current position. @@ -1474,7 +1483,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip the rest in the current line. @@ -1496,7 +1505,7 @@ class lexical_analyzer default: break; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } [[noreturn]] void emit_error(const char* msg) const @@ -1506,15 +1515,20 @@ class lexical_analyzer private: /// The value of EOF for the target characters. - static constexpr char_int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); /// An input buffer adapter to be analyzed. - input_handler_type m_input_handler; + input_handler m_input_handler; /// A temporal buffer to store a string to be parsed to an actual datum. - input_string_type m_value_buffer {}; + std::string m_value_buffer {}; + /// A temporal buffer to store a UTF-8 encoded char sequence. std::array m_encode_buffer {}; + /// The actual size of a UTF-8 encoded char sequence. std::size_t m_encoded_size {0}; + /// The beginning position of the last lexical token. (zero origin) std::size_t m_last_token_begin_pos {0}; + /// The beginning line of the last lexical token. (zero origin) + std::size_t m_last_token_begin_line {0}; /// The current depth of flow context. uint32_t m_flow_context_depth {0}; /// The last found token type. diff --git a/include/fkYAML/detail/iterator.hpp b/include/fkYAML/detail/iterator.hpp index 38cc838e..ffc56793 100644 --- a/include/fkYAML/detail/iterator.hpp +++ b/include/fkYAML/detail/iterator.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/detail/macros/cpp_config_macros.hpp b/include/fkYAML/detail/macros/cpp_config_macros.hpp index 10c5d6f7..f0fb33f0 100644 --- a/include/fkYAML/detail/macros/cpp_config_macros.hpp +++ b/include/fkYAML/detail/macros/cpp_config_macros.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -56,4 +56,4 @@ #endif #endif -#endif /* FK_YAML_DETAIL_MACROS_CPP_CONFIG_MACROS_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_MACROS_CPP_CONFIG_MACROS_HPP_ */ diff --git a/include/fkYAML/detail/macros/version_macros.hpp b/include/fkYAML/detail/macros/version_macros.hpp index b16001dd..764769e4 100644 --- a/include/fkYAML/detail/macros/version_macros.hpp +++ b/include/fkYAML/detail/macros/version_macros.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -10,7 +10,7 @@ // Check version definitions if already defined. #if defined(FK_YAML_MAJOR_VERSION) && defined(FK_YAML_MINOR_VERSION) && defined(FK_YAML_PATCH_VERSION) - #if FK_YAML_MAJOR_VERSION != 0 || FK_YAML_MINOR_VERSION != 3 || FK_YAML_PATCH_VERSION != 2 + #if FK_YAML_MAJOR_VERSION != 0 || FK_YAML_MINOR_VERSION != 3 || FK_YAML_PATCH_VERSION != 3 #warning Already included a different version of the fkYAML library! #else // define macros to skip defining macros down below. @@ -22,7 +22,7 @@ #define FK_YAML_MAJOR_VERSION 0 #define FK_YAML_MINOR_VERSION 3 - #define FK_YAML_PATCH_VERSION 2 + #define FK_YAML_PATCH_VERSION 3 #define FK_YAML_NAMESPACE_VERSION_CONCAT_IMPL(major, minor, patch) v##major##_##minor##_##patch diff --git a/include/fkYAML/detail/meta/detect.hpp b/include/fkYAML/detail/meta/detect.hpp index 8a6eac7d..57b773bd 100644 --- a/include/fkYAML/detail/meta/detect.hpp +++ b/include/fkYAML/detail/meta/detect.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/detail/meta/input_adapter_traits.hpp b/include/fkYAML/detail/meta/input_adapter_traits.hpp index 3b160e3b..d8cdd0be 100644 --- a/include/fkYAML/detail/meta/input_adapter_traits.hpp +++ b/include/fkYAML/detail/meta/input_adapter_traits.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -11,6 +11,7 @@ #ifndef FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ #define FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ +#include #include #include @@ -24,51 +25,27 @@ FK_YAML_NAMESPACE_BEGIN namespace detail { -///////////////////////////////// -// API representative types -///////////////////////////////// - -/// @brief A type which represents T::char_type; -/// @tparam T A target type to check if it has char_type; -template -using detect_char_type_helper_t = typename T::char_type; - -/// @brief A type which represents get_character function. -/// @tparam T A target type. -template -using get_character_fn_t = decltype(std::declval().get_character()); - -/// @brief Type traits to check if T has char_type as its member. -/// @tparam T A target type. -/// @tparam typename N/A -template -struct has_char_type : std::false_type -{ -}; - /////////////////////////////////////////// // Input Adapter API detection traits /////////////////////////////////////////// -/// @brief A partial specialization of has_char_type if T has char_type as its member. +/// @brief A type which represents get_character function. /// @tparam T A target type. template -struct has_char_type::value>> : std::true_type -{ -}; +using fill_buffer_fn_t = decltype(std::declval().fill_buffer(std::declval())); /// @brief Type traits to check if InputAdapterType has get_character member function. /// @tparam InputAdapterType An input adapter type to check if it has get_character function. /// @tparam typename N/A template -struct has_get_character : std::false_type +struct has_fill_buffer : std::false_type { }; -/// @brief A partial specialization of has_get_character if InputAdapterType has get_character member function. +/// @brief A partial specialization of has_fill_buffer if InputAdapterType has get_character member function. /// @tparam InputAdapterType A type of a target input adapter. template -struct has_get_character::value>> +struct has_fill_buffer::value>> : std::true_type { }; @@ -88,10 +65,7 @@ struct is_input_adapter : std::false_type /// @brief A partial specialization of is_input_adapter if T is an input adapter type. /// @tparam InputAdapterType template -struct is_input_adapter< - InputAdapterType, - enable_if_t, has_get_character>::value>> - : std::true_type +struct is_input_adapter::value>> : std::true_type { }; @@ -99,4 +73,4 @@ struct is_input_adapter< FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ */ diff --git a/include/fkYAML/detail/meta/node_traits.hpp b/include/fkYAML/detail/meta/node_traits.hpp index a5f54683..a10eaeac 100644 --- a/include/fkYAML/detail/meta/node_traits.hpp +++ b/include/fkYAML/detail/meta/node_traits.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -183,4 +183,4 @@ struct is_node_compatible_type : is_node_compatible_type_impl @@ -205,4 +205,4 @@ using std::remove_cvref_t; FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_META_STL_SUPPLEMENT_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_META_STL_SUPPLEMENT_HPP_ */ diff --git a/include/fkYAML/detail/meta/type_traits.hpp b/include/fkYAML/detail/meta/type_traits.hpp index 28441f98..e69435c5 100644 --- a/include/fkYAML/detail/meta/type_traits.hpp +++ b/include/fkYAML/detail/meta/type_traits.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/detail/node_property.hpp b/include/fkYAML/detail/node_property.hpp index 41e5004c..56929c7e 100644 --- a/include/fkYAML/detail/node_property.hpp +++ b/include/fkYAML/detail/node_property.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -40,4 +40,4 @@ struct node_property FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_NODE_PROPERTY_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_NODE_PROPERTY_HPP_ */ diff --git a/include/fkYAML/detail/node_ref_storage.hpp b/include/fkYAML/detail/node_ref_storage.hpp index 077dc9ed..81013b88 100644 --- a/include/fkYAML/detail/node_ref_storage.hpp +++ b/include/fkYAML/detail/node_ref_storage.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -99,4 +99,4 @@ class node_ref_storage FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_NODE_REF_STORAGE_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_NODE_REF_STORAGE_HPP_ */ diff --git a/include/fkYAML/detail/output/serializer.hpp b/include/fkYAML/detail/output/serializer.hpp index 51bdc7ac..0185e292 100644 --- a/include/fkYAML/detail/output/serializer.hpp +++ b/include/fkYAML/detail/output/serializer.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -340,7 +340,7 @@ class basic_serializer } auto adapter = input_adapter(s); - lexical_analyzer lexer(std::move(adapter)); + lexical_analyzer lexer(std::move(adapter)); lexical_token_t token_type = lexer.get_next_token(); if (token_type != lexical_token_t::STRING_VALUE) diff --git a/include/fkYAML/detail/string_formatter.hpp b/include/fkYAML/detail/string_formatter.hpp new file mode 100644 index 00000000..b76c924d --- /dev/null +++ b/include/fkYAML/detail/string_formatter.hpp @@ -0,0 +1,55 @@ +/// _______ __ __ __ _____ __ __ __ +/// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 +/// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +/// +/// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani +/// SPDX-License-Identifier: MIT +/// +/// @file + +#ifndef FK_YAML_DETAIL_STRING_FORMATTER_HPP_ +#define FK_YAML_DETAIL_STRING_FORMATTER_HPP_ + +#include +#include +#include +#include + +#include + +/// @namespace namespace for fkYAML library. +FK_YAML_NAMESPACE_BEGIN + +/// @namespace namespace for internal implementation of fkYAML library. +namespace detail +{ + +inline std::string format(const char* fmt, ...) +{ + va_list vl; + va_start(vl, fmt); + int size = std::vsnprintf(nullptr, 0, fmt, vl); + va_end(vl); + + // LCOV_EXCL_START + if (size < 0) + { + return ""; + } + // LCOV_EXCL_STOP + + std::unique_ptr buffer {new char[size + 1] {}}; + + va_start(vl, fmt); + size = std::vsnprintf(buffer.get(), size + 1, fmt, vl); + va_end(vl); + + return std::string(buffer.get(), size); +} + +} // namespace detail + +FK_YAML_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_STRING_FORMATTER_HPP_ */ diff --git a/include/fkYAML/detail/types/lexical_token_t.hpp b/include/fkYAML/detail/types/lexical_token_t.hpp index e90e5156..e25f868e 100644 --- a/include/fkYAML/detail/types/lexical_token_t.hpp +++ b/include/fkYAML/detail/types/lexical_token_t.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -36,7 +36,6 @@ enum class lexical_token_t SEQUENCE_BLOCK_PREFIX, //!< the character for sequence block prefix `- ` SEQUENCE_FLOW_BEGIN, //!< the character for sequence flow begin `[` SEQUENCE_FLOW_END, //!< the character for sequence flow end `]` - MAPPING_BLOCK_PREFIX, //!< the character for mapping block prefix `:` MAPPING_FLOW_BEGIN, //!< the character for mapping begin `{` MAPPING_FLOW_END, //!< the character for mapping end `}` NULL_VALUE, //!< a null value found. use get_null() to get a value. @@ -52,4 +51,4 @@ enum class lexical_token_t FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ */ \ No newline at end of file +#endif /* FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ */ diff --git a/include/fkYAML/detail/types/node_t.hpp b/include/fkYAML/detail/types/node_t.hpp index 2c912b1c..3bc2b219 100644 --- a/include/fkYAML/detail/types/node_t.hpp +++ b/include/fkYAML/detail/types/node_t.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -35,7 +35,7 @@ enum class node_t : std::uint32_t STRING, //!< string value type }; -inline std::string to_string(node_t t) noexcept +inline const char* to_string(node_t t) noexcept { switch (t) { diff --git a/include/fkYAML/detail/types/yaml_version_t.hpp b/include/fkYAML/detail/types/yaml_version_t.hpp index 1d2725c3..1a90e64a 100644 --- a/include/fkYAML/detail/types/yaml_version_t.hpp +++ b/include/fkYAML/detail/types/yaml_version_t.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/exception.hpp b/include/fkYAML/exception.hpp index 5afdf113..7d2c51dc 100644 --- a/include/fkYAML/exception.hpp +++ b/include/fkYAML/exception.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -14,9 +14,9 @@ #include #include #include -#include #include +#include #include /// @brief namespace for fkYAML library. @@ -88,14 +88,13 @@ class invalid_encoding : public exception template std::string generate_error_message(const char* msg, std::array u8) const noexcept { - std::stringstream ss; - ss << "invalid_encoding: " << msg << " in=[ 0x" << std::hex << u8[0]; + std::string formatted = detail::format("invalid_encoding: %s in=[ 0x%02x", msg, u8[0]); for (std::size_t i = 1; i < N; i++) { - ss << ", 0x" << std::hex << u8[i]; + formatted += detail::format(", 0x%02x", u8[i]); } - ss << " ]"; - return ss.str(); + formatted += " ]"; + return formatted; } /// @brief Generate an error message from the given parameters for the UTF-16 encoding. @@ -105,11 +104,8 @@ class invalid_encoding : public exception /// @return A generated error message. std::string generate_error_message(const char* msg, std::array u16) const noexcept { - std::stringstream ss; - ss << "invalid_encoding: " << msg; // uint16_t is large enough for UTF-16 encoded elements. - ss << " in=[ 0x" << std::hex << uint16_t(u16[0]) << ", 0x" << std::hex << uint16_t(u16[1]) << " ]"; - return ss.str(); + return detail::format("invalid_encoding: %s in=[ 0x%04x, 0x%04x ]", msg, uint16_t(u16[0]), uint16_t(u16[1])); } /// @brief Generate an error message from the given parameters for the UTF-32 encoding. @@ -118,10 +114,8 @@ class invalid_encoding : public exception /// @return A genereated error message. std::string generate_error_message(const char* msg, char32_t u32) const noexcept { - std::stringstream ss; // uint32_t is large enough for UTF-32 encoded elements. - ss << "invalid_encoding: " << msg << " in=0x" << std::hex << uint32_t(u32); - return ss.str(); + return detail::format("invalid_encoding: %s in=0x%08x", msg, uint32_t(u32)); } }; @@ -137,9 +131,7 @@ class parse_error : public exception private: std::string generate_error_message(const char* msg, std::size_t lines, std::size_t cols_in_line) const noexcept { - std::stringstream ss; - ss << "parse_error: " << msg << " (at line " << lines << ", column " << cols_in_line << ")"; - return ss.str(); + return detail::format("parse_error: %s (at line %zu, column %zu)", msg, lines, cols_in_line); } }; @@ -163,9 +155,32 @@ class type_error : public exception /// @return A generated error message. std::string generate_error_message(const char* msg, detail::node_t type) const noexcept { - std::stringstream ss; - ss << "type_error: " << msg << " type=" << detail::to_string(type); - return ss.str(); + return detail::format("type_error: %s type=%s", msg, detail::to_string(type)); + } +}; + +class out_of_range : public exception +{ +public: + explicit out_of_range(int index) noexcept + : exception(generate_error_message(index).c_str()) + { + } + + explicit out_of_range(const char* key) noexcept + : exception(generate_error_message(key).c_str()) + { + } + +private: + std::string generate_error_message(int index) + { + return detail::format("out_of_range: index %d is out of range", index); + } + + std::string generate_error_message(const char* key) + { + return detail::format("out_of_range: key \'%s\' is not found.", key); } }; diff --git a/include/fkYAML/node.hpp b/include/fkYAML/node.hpp index 564123a8..d9420ab1 100644 --- a/include/fkYAML/node.hpp +++ b/include/fkYAML/node.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -1018,6 +1018,178 @@ class basic_node } } + /// @brief Get a basic_node object with a key of a compatible type. + /// @tparam KeyType A key type compatible with basic_node + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> + basic_node& at(KeyType&& key) + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + basic_node node_key = std::forward(key); + + if (is_sequence()) + { + if (!node_key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = node_key.template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(node_key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(node_key).c_str()); + } + return m_node_value.p_mapping->at(node_key); + } + + /// @brief Get a basic_node object with a key of a compatible type. + /// @tparam KeyType A key type compatible with basic_node + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Constant reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> + const basic_node& at(KeyType&& key) const + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + basic_node node_key = std::forward(key); + + if (is_sequence()) + { + if (!node_key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = node_key.template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(node_key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(node_key).c_str()); + } + return m_node_value.p_mapping->at(node_key); + } + + /// @brief Get a basic_node object with a basic_node key object. + /// @tparam KeyType A key type which is a kind of the basic_node template class. + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t>::value, int> = 0> + basic_node& at(KeyType&& key) + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + if (is_sequence()) + { + if (!key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = std::forward(key).template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(key).c_str()); + } + return m_node_value.p_mapping->at(key); + } + + /// @brief Get a basic_node object with a basic_node key object. + /// @tparam KeyType A key type which is a kind of the basic_node template class. + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Constant reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t>::value, int> = 0> + const basic_node& at(KeyType&& key) const + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + if (is_sequence()) + { + if (!key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = std::forward(key).template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(key).c_str()); + } + return m_node_value.p_mapping->at(key); + } + /// @brief Get the YAML version specification for this basic_node object. /// @return The version of the YAML format applied to the basic_node object. /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/get_yaml_version/ diff --git a/include/fkYAML/node_value_converter.hpp b/include/fkYAML/node_value_converter.hpp index 4d1b7681..80a2fc4f 100644 --- a/include/fkYAML/node_value_converter.hpp +++ b/include/fkYAML/node_value_converter.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/include/fkYAML/ordered_map.hpp b/include/fkYAML/ordered_map.hpp index 906b9464..98f2cba7 100644 --- a/include/fkYAML/ordered_map.hpp +++ b/include/fkYAML/ordered_map.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/single_include/fkYAML/node.hpp b/single_include/fkYAML/node.hpp index 0544736a..f85eee36 100644 --- a/single_include/fkYAML/node.hpp +++ b/single_include/fkYAML/node.hpp @@ -1,6 +1,6 @@ /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -24,7 +24,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -34,7 +34,7 @@ // Check version definitions if already defined. #if defined(FK_YAML_MAJOR_VERSION) && defined(FK_YAML_MINOR_VERSION) && defined(FK_YAML_PATCH_VERSION) - #if FK_YAML_MAJOR_VERSION != 0 || FK_YAML_MINOR_VERSION != 3 || FK_YAML_PATCH_VERSION != 2 + #if FK_YAML_MAJOR_VERSION != 0 || FK_YAML_MINOR_VERSION != 3 || FK_YAML_PATCH_VERSION != 3 #warning Already included a different version of the fkYAML library! #else // define macros to skip defining macros down below. @@ -46,7 +46,7 @@ #define FK_YAML_MAJOR_VERSION 0 #define FK_YAML_MINOR_VERSION 3 - #define FK_YAML_PATCH_VERSION 2 + #define FK_YAML_PATCH_VERSION 3 #define FK_YAML_NAMESPACE_VERSION_CONCAT_IMPL(major, minor, patch) v##major##_##minor##_##patch @@ -69,7 +69,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -127,12 +127,13 @@ #endif /* FK_YAML_DETAIL_MACROS_CPP_CONFIG_MACROS_HPP_ */ + #endif // !defined(FK_YAML_VERCHECK_SUCCEEDED) // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -158,7 +159,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -178,7 +179,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -204,7 +205,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -226,7 +227,7 @@ // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -433,10 +434,11 @@ using std::remove_cvref_t; FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_META_STL_SUPPLEMENT_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -455,7 +457,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -721,7 +723,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -735,14 +737,71 @@ FK_YAML_NAMESPACE_END #include #include #include -#include // #include +// #include +/// _______ __ __ __ _____ __ __ __ +/// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 +/// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +/// +/// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani +/// SPDX-License-Identifier: MIT +/// +/// @file + +#ifndef FK_YAML_DETAIL_STRING_FORMATTER_HPP_ +#define FK_YAML_DETAIL_STRING_FORMATTER_HPP_ + +#include +#include +#include +#include + +// #include + + +/// @namespace namespace for fkYAML library. +FK_YAML_NAMESPACE_BEGIN + +/// @namespace namespace for internal implementation of fkYAML library. +namespace detail +{ + +inline std::string format(const char* fmt, ...) +{ + va_list vl; + va_start(vl, fmt); + int size = std::vsnprintf(nullptr, 0, fmt, vl); + va_end(vl); + + // LCOV_EXCL_START + if (size < 0) + { + return ""; + } + // LCOV_EXCL_STOP + + std::unique_ptr buffer {new char[size + 1] {}}; + + va_start(vl, fmt); + size = std::vsnprintf(buffer.get(), size + 1, fmt, vl); + va_end(vl); + + return std::string(buffer.get(), size); +} + +} // namespace detail + +FK_YAML_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_STRING_FORMATTER_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -778,7 +837,7 @@ enum class node_t : std::uint32_t STRING, //!< string value type }; -inline std::string to_string(node_t t) noexcept +inline const char* to_string(node_t t) noexcept { switch (t) { @@ -877,14 +936,13 @@ class invalid_encoding : public exception template std::string generate_error_message(const char* msg, std::array u8) const noexcept { - std::stringstream ss; - ss << "invalid_encoding: " << msg << " in=[ 0x" << std::hex << u8[0]; + std::string formatted = detail::format("invalid_encoding: %s in=[ 0x%02x", msg, u8[0]); for (std::size_t i = 1; i < N; i++) { - ss << ", 0x" << std::hex << u8[i]; + formatted += detail::format(", 0x%02x", u8[i]); } - ss << " ]"; - return ss.str(); + formatted += " ]"; + return formatted; } /// @brief Generate an error message from the given parameters for the UTF-16 encoding. @@ -894,11 +952,8 @@ class invalid_encoding : public exception /// @return A generated error message. std::string generate_error_message(const char* msg, std::array u16) const noexcept { - std::stringstream ss; - ss << "invalid_encoding: " << msg; // uint16_t is large enough for UTF-16 encoded elements. - ss << " in=[ 0x" << std::hex << uint16_t(u16[0]) << ", 0x" << std::hex << uint16_t(u16[1]) << " ]"; - return ss.str(); + return detail::format("invalid_encoding: %s in=[ 0x%04x, 0x%04x ]", msg, uint16_t(u16[0]), uint16_t(u16[1])); } /// @brief Generate an error message from the given parameters for the UTF-32 encoding. @@ -907,10 +962,8 @@ class invalid_encoding : public exception /// @return A genereated error message. std::string generate_error_message(const char* msg, char32_t u32) const noexcept { - std::stringstream ss; // uint32_t is large enough for UTF-32 encoded elements. - ss << "invalid_encoding: " << msg << " in=0x" << std::hex << uint32_t(u32); - return ss.str(); + return detail::format("invalid_encoding: %s in=0x%08x", msg, uint32_t(u32)); } }; @@ -926,9 +979,7 @@ class parse_error : public exception private: std::string generate_error_message(const char* msg, std::size_t lines, std::size_t cols_in_line) const noexcept { - std::stringstream ss; - ss << "parse_error: " << msg << " (at line " << lines << ", column " << cols_in_line << ")"; - return ss.str(); + return detail::format("parse_error: %s (at line %zu, column %zu)", msg, lines, cols_in_line); } }; @@ -952,9 +1003,32 @@ class type_error : public exception /// @return A generated error message. std::string generate_error_message(const char* msg, detail::node_t type) const noexcept { - std::stringstream ss; - ss << "type_error: " << msg << " type=" << detail::to_string(type); - return ss.str(); + return detail::format("type_error: %s type=%s", msg, detail::to_string(type)); + } +}; + +class out_of_range : public exception +{ +public: + explicit out_of_range(int index) noexcept + : exception(generate_error_message(index).c_str()) + { + } + + explicit out_of_range(const char* key) noexcept + : exception(generate_error_message(key).c_str()) + { + } + +private: + std::string generate_error_message(int index) + { + return detail::format("out_of_range: index %d is out of range", index); + } + + std::string generate_error_message(const char* key) + { + return detail::format("out_of_range: key \'%s\' is not found.", key); } }; @@ -1222,10 +1296,11 @@ inline double from_string(const std::string& s, type_tag /*unused*/) FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -1552,10 +1627,11 @@ class utf8_encoding FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODING_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -1575,7 +1651,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -1586,6 +1662,7 @@ FK_YAML_NAMESPACE_END #ifndef FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ #define FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ +#include #include // #include @@ -1602,51 +1679,27 @@ FK_YAML_NAMESPACE_BEGIN namespace detail { -///////////////////////////////// -// API representative types -///////////////////////////////// - -/// @brief A type which represents T::char_type; -/// @tparam T A target type to check if it has char_type; -template -using detect_char_type_helper_t = typename T::char_type; - -/// @brief A type which represents get_character function. -/// @tparam T A target type. -template -using get_character_fn_t = decltype(std::declval().get_character()); - -/// @brief Type traits to check if T has char_type as its member. -/// @tparam T A target type. -/// @tparam typename N/A -template -struct has_char_type : std::false_type -{ -}; - /////////////////////////////////////////// // Input Adapter API detection traits /////////////////////////////////////////// -/// @brief A partial specialization of has_char_type if T has char_type as its member. +/// @brief A type which represents get_character function. /// @tparam T A target type. template -struct has_char_type::value>> : std::true_type -{ -}; +using fill_buffer_fn_t = decltype(std::declval().fill_buffer(std::declval())); /// @brief Type traits to check if InputAdapterType has get_character member function. /// @tparam InputAdapterType An input adapter type to check if it has get_character function. /// @tparam typename N/A template -struct has_get_character : std::false_type +struct has_fill_buffer : std::false_type { }; -/// @brief A partial specialization of has_get_character if InputAdapterType has get_character member function. +/// @brief A partial specialization of has_fill_buffer if InputAdapterType has get_character member function. /// @tparam InputAdapterType A type of a target input adapter. template -struct has_get_character::value>> +struct has_fill_buffer::value>> : std::true_type { }; @@ -1666,10 +1719,7 @@ struct is_input_adapter : std::false_type /// @brief A partial specialization of is_input_adapter if T is an input adapter type. /// @tparam InputAdapterType template -struct is_input_adapter< - InputAdapterType, - enable_if_t, has_get_character>::value>> - : std::true_type +struct is_input_adapter::value>> : std::true_type { }; @@ -1678,6 +1728,7 @@ struct is_input_adapter< FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP_ */ + // #include @@ -1689,21 +1740,12 @@ namespace detail { /// @brief An input buffer handler. -/// @tparam InputAdapterType The type of the input adapter. -template ::value, int> = 0> class input_handler { -public: +private: /// The type of character traits of the input buffer. - using char_traits_type = std::char_traits; - /// The type of characters of the input buffer. - using char_type = typename char_traits_type::char_type; - /// The type of integers for the input buffer. - using int_type = typename char_traits_type::int_type; - /// The type of strings of the input buffer. - using string_type = std::basic_string; + using char_traits_type = std::char_traits; -private: /// @brief A set of information on the current position in an input buffer. struct position { @@ -1717,102 +1759,113 @@ class input_handler public: /// @brief Construct a new input_handler object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object + template ::value, int> = 0> explicit input_handler(InputAdapterType&& input_adapter) - : m_input_adapter(std::move(input_adapter)) + : m_buffer_size(0) { - get_next(); - m_position.cur_pos = m_position.cur_pos_in_line = m_position.lines_read = 0; + input_adapter.fill_buffer(m_buffer); + m_buffer_size = m_buffer.size(); } /// @brief Get the character at the current position. - /// @return int_type A character or EOF. - int_type get_current() const noexcept + /// @return int A character or EOF. + int get_current() const noexcept { - return m_cache[m_position.cur_pos]; + if (m_position.cur_pos == m_buffer_size) + { + return s_end_of_input; + } + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos]); } /// @brief Get the character at next position. - /// @return int_type A character or EOF. - int_type get_next() + /// @return int A character or EOF. + int get_next() { - int_type ret = end_of_input; - - // if already cached, return the cached value. - if (m_position.cur_pos + 1 < m_cache.size()) + // if all the input has already been consumed, return the EOF. + if (m_position.cur_pos == m_buffer_size - 1) { - ret = m_cache[++m_position.cur_pos]; - ++m_position.cur_pos_in_line; + m_position.cur_pos++; + m_position.cur_pos_in_line++; + return s_end_of_input; } - else + + if (m_position.cur_pos == m_buffer_size) { - ret = m_input_adapter.get_character(); - if (ret != end_of_input || m_cache[m_position.cur_pos] != end_of_input) - { - // cache the return value for possible later use. - m_cache.push_back(ret); - ++m_position.cur_pos; - ++m_position.cur_pos_in_line; - } + return s_end_of_input; } - if (m_cache[m_position.cur_pos - 1] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { m_position.cur_pos_in_line = 0; ++m_position.lines_read; } + else + { + m_position.cur_pos_in_line++; + } - return ret; + return char_traits_type::to_int_type(m_buffer[++m_position.cur_pos]); } /// @brief Get the characters in the given range. /// @param length The length of characters retrieved from the current position. /// @param str A string which will contain the resulting characters. - /// @return int_type 0 (for success) or EOF (for error). - int_type get_range(std::size_t length, string_type& str) + /// @return int 0 (for success) or EOF (for error). + int get_range(std::size_t length, std::string& str) { str.clear(); - if (get_current() == end_of_input) + if (length == 0) + { + // regard this case as successful in getting zero characters. + return 0; + } + + if (m_position.cur_pos + length - 1 >= m_buffer_size) { - return end_of_input; + return s_end_of_input; } - str += char_traits_type::to_char_type(get_current()); + str += m_buffer[m_position.cur_pos]; for (std::size_t i = 1; i < length; i++) { - if (get_next() == end_of_input) - { - // m_cur_pos -= i; - for (std::size_t j = i; j > 0; j--) - { - unget(); - } - str.clear(); - return end_of_input; - } - str += char_traits_type::to_char_type(get_current()); + str += char_traits_type::to_char_type(get_next()); } return 0; } + /// @brief Get the next character without changing the current position. + /// @return int A character if not already at the end of the input buffer, an EOF otherwise. + int peek_next() + { + if (m_position.cur_pos >= m_buffer_size - 1) + { + // there is no input character left. + return s_end_of_input; + } + + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos + 1]); + } + /// @brief Move backward the current position. void unget() { if (m_position.cur_pos > 0) { - // just move back the cursor. (no action for adapter) --m_position.cur_pos; --m_position.cur_pos_in_line; - if (m_cache[m_position.cur_pos] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { --m_position.lines_read; m_position.cur_pos_in_line = 0; if (m_position.cur_pos > 0) { - for (std::size_t i = m_position.cur_pos - 1; m_cache[i] != '\n'; i--) + for (std::size_t i = m_position.cur_pos - 1; m_buffer[i] != '\n'; i--) { if (i == 0) { @@ -1830,33 +1883,11 @@ class input_handler /// @param length The length of moving backward. void unget_range(std::size_t length) { - for (std::size_t i = 0; i < length; i++) - { - unget(); - } - } - - /// @brief Check if the next character is the expected one. - /// @param expected An expected next character. - /// @return true The next character is the expected one. - /// @return false The next character is not the expected one. - bool test_next_char(char_type expected) - { - if (get_current() == end_of_input) - { - return false; - } - - int_type next = get_next(); - if (next == end_of_input) + size_t unget_num = (m_position.cur_pos < length) ? m_position.cur_pos : length; + for (std::size_t i = 0; i < unget_num; i++) { unget(); - return false; } - - bool ret = char_traits_type::eq(char_traits_type::to_char_type(next), expected); - unget(); - return ret; } /// @brief Get the current position in the current line. @@ -1875,12 +1906,12 @@ class input_handler private: /// The value of EOF for the target character type. - static constexpr int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); - /// An input adapter object. - InputAdapterType m_input_adapter {}; - /// Cached characters retrieved from an input adapter object. - std::vector m_cache {}; + /// The input buffer retrieved from an input adapter object. + std::string m_buffer {}; + /// The size of the buffer. + std::size_t m_buffer_size {0}; /// The current position in an input buffer. position m_position {}; }; @@ -1890,12 +1921,13 @@ class input_handler FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ */ + // #include // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -2083,12 +2115,13 @@ struct is_node_compatible_type : is_node_compatible_type_impl // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -2125,7 +2158,6 @@ enum class lexical_token_t SEQUENCE_BLOCK_PREFIX, //!< the character for sequence block prefix `- ` SEQUENCE_FLOW_BEGIN, //!< the character for sequence flow begin `[` SEQUENCE_FLOW_END, //!< the character for sequence flow end `]` - MAPPING_BLOCK_PREFIX, //!< the character for mapping block prefix `:` MAPPING_FLOW_BEGIN, //!< the character for mapping begin `{` MAPPING_FLOW_END, //!< the character for mapping end `}` NULL_VALUE, //!< a null value found. use get_null() to get a value. @@ -2142,6 +2174,7 @@ enum class lexical_token_t FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ */ + // #include @@ -2160,17 +2193,11 @@ namespace detail /// @brief A class which lexically analizes YAML formatted inputs. /// @tparam BasicNodeType A type of the container for YAML values. -template < - typename BasicNodeType, typename InputAdapterType, - enable_if_t, is_input_adapter>::value, int> = 0> +template ::value, int> = 0> class lexical_analyzer { private: - using input_handler_type = input_handler; - using char_traits_type = typename input_handler_type::char_traits_type; - using char_type = typename char_traits_type::char_type; - using char_int_type = typename char_traits_type::int_type; - using input_string_type = typename input_handler_type::string_type; + using char_traits_type = typename std::char_traits; enum class block_style_indicator_t { @@ -2192,7 +2219,9 @@ class lexical_analyzer using string_type = typename BasicNodeType::string_type; /// @brief Construct a new lexical_analyzer object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object. + template ::value, int> = 0> explicit lexical_analyzer(InputAdapterType&& input_adapter) : m_input_handler(std::move(input_adapter)) { @@ -2204,8 +2233,9 @@ class lexical_analyzer { skip_white_spaces_and_newline_codes(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); m_last_token_begin_pos = m_input_handler.get_cur_pos_in_line(); + m_last_token_begin_line = m_input_handler.get_lines_read(); if (0x00 <= current && current <= 0x7F && isdigit(current)) { @@ -2214,7 +2244,7 @@ class lexical_analyzer switch (current) { - case end_of_input: // end of input buffer + case s_end_of_input: // end of input buffer return m_last_token_type = lexical_token_t::END_OF_BUFFER; case '?': switch (m_input_handler.get_next()) @@ -2225,74 +2255,56 @@ class lexical_analyzer m_value_buffer = "?"; return m_last_token_type = scan_string(false); } - case ':': // key separater - switch (m_input_handler.get_next()) + case ':': { // key separater + current = m_input_handler.get_next(); + switch (current) { - case ' ': { - size_t prev_pos = m_input_handler.get_lines_read(); - skip_white_spaces_and_comments(); - size_t cur_pos = m_input_handler.get_lines_read(); - if (prev_pos == cur_pos) - { - current = m_input_handler.get_current(); - if (current != '\r' && current != '\n') - { - return m_last_token_type = lexical_token_t::KEY_SEPARATOR; - } - } - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - } - case '\r': { - char_int_type next = m_input_handler.get_next(); - if (next == '\n') + case ' ': + case '\t': + case '\r': + case '\n': + case s_end_of_input: + break; + case ',': + case '[': + case ']': + case '{': + case '}': + if (m_flow_context_depth > 0) { - m_input_handler.get_next(); + // the above characters are not "safe" to be followed in a flow context. + // See https://yaml.org/spec/1.2.2/#733-plain-style for more details. + break; } - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - } - case '\n': - m_input_handler.get_next(); - return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; + m_value_buffer = ":"; + return scan_string(false); default: - emit_error("Half-width spaces or newline codes are required after a key separater(:)."); + m_value_buffer = ":"; + return scan_string(false); } + + return m_last_token_type = lexical_token_t::KEY_SEPARATOR; + } case ',': // value separater m_input_handler.get_next(); return m_last_token_type = lexical_token_t::VALUE_SEPARATOR; case '&': { // anchor prefix - m_value_buffer.clear(); - while (true) + extract_anchor_name(); + bool is_empty = m_value_buffer.empty(); + if (is_empty) { - char_int_type next = m_input_handler.get_next(); - if (next == end_of_input || next == '\r' || next == '\n') - { - emit_error("An anchor label must be followed by some value."); - } - if (next == ' ') - { - m_input_handler.get_next(); - break; - } - m_value_buffer.push_back(char_traits_type::to_char_type(next)); + emit_error("anchor name must not be empty."); } return m_last_token_type = lexical_token_t::ANCHOR_PREFIX; } case '*': { // alias prefix - m_value_buffer.clear(); - while (true) + extract_anchor_name(); + bool is_empty = m_value_buffer.empty(); + if (is_empty) { - char_int_type next = m_input_handler.get_next(); - if (next == ' ' || next == '\r' || next == '\n' || next == end_of_input) - { - if (m_value_buffer.empty()) - { - emit_error("An alias prefix must be followed by some anchor name."); - } - m_input_handler.get_next(); - break; - } - m_value_buffer.push_back(char_traits_type::to_char_type(next)); + emit_error("anchor name must not be empty."); } + return m_last_token_type = lexical_token_t::ALIAS_PREFIX; } case '#': // comment prefix @@ -2301,7 +2313,7 @@ class lexical_analyzer case '%': // directive prefix return m_last_token_type = scan_directive(); case '-': { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ') { // Move a cursor to the beginning of the next token. @@ -2315,8 +2327,8 @@ class lexical_analyzer return m_last_token_type = scan_number(); } - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); - if (ret != end_of_input) + int ret = m_input_handler.get_range(3, m_value_buffer); + if (ret != s_end_of_input) { if (m_value_buffer == "---") { @@ -2324,10 +2336,10 @@ class lexical_analyzer return m_last_token_type = lexical_token_t::END_OF_DIRECTIVES; } - m_input_handler.unget_range(2); + m_input_handler.get_next(); } - return m_last_token_type = scan_string(); + return m_last_token_type = scan_string(ret == s_end_of_input); } case '[': // sequence flow begin m_flow_context_depth++; @@ -2363,8 +2375,8 @@ class lexical_analyzer case '+': return m_last_token_type = scan_number(); case '.': { - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); - if (ret != end_of_input) + int ret = m_input_handler.get_range(3, m_value_buffer); + if (ret != s_end_of_input) { if (m_value_buffer == "...") { @@ -2408,7 +2420,7 @@ class lexical_analyzer /// @return std::size_t The number of lines already processed. std::size_t get_lines_processed() const noexcept { - return m_input_handler.get_lines_read(); + return m_last_token_begin_line; } /// @brief Convert from string to null and get the converted value. @@ -2461,7 +2473,7 @@ class lexical_analyzer const string_type& get_string() const noexcept { // TODO: Provide support for different string types between nodes & inputs. - static_assert(std::is_same::value, "Unsupported, different string types."); + static_assert(std::is_same::value, "Unsupported, different string types."); return m_value_buffer; } @@ -2479,7 +2491,7 @@ class lexical_analyzer /// @brief A utility function to convert a hexadecimal character to an integer. /// @param source A hexadecimal character ('0'~'9', 'A'~'F', 'a'~'f') /// @return char A integer converted from @a source. - char convert_hex_char_to_byte(char_int_type source) const + char convert_hex_char_to_byte(int source) const { if ('0' <= source && source <= '9') { @@ -2521,7 +2533,7 @@ class lexical_analyzer switch (m_input_handler.get_next()) { - case end_of_input: + case s_end_of_input: emit_error("invalid eof in a directive."); case 'T': { if (m_input_handler.get_next() != 'A' || m_input_handler.get_next() != 'G') @@ -2601,6 +2613,48 @@ class lexical_analyzer return lexical_token_t::YAML_VER_DIRECTIVE; } + /// @brief Extracts an anchor name from the input and assigns the result to `m_value_buffer`. + void extract_anchor_name() + { + int current = m_input_handler.get_current(); + FK_YAML_ASSERT(current == '&' || current == '*'); + + m_value_buffer.clear(); + + while ((current = m_input_handler.get_next()) != s_end_of_input) + { + switch (current) + { + case s_end_of_input: + // anchor name must not contain white spaces, newline codes and flow indicators. + // See https://yaml.org/spec/1.2.2/#692-node-anchors for more details. + case ' ': + case '\t': + case '\r': + case '\n': + case '{': + case '}': + case '[': + case ']': + case ',': + return; + case ':': { + int peeked = m_input_handler.peek_next(); + if (peeked == ' ') + { + // Stop the extraction at the key separator. + return; + } + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + break; + } + default: + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + break; + } + } + } + /// @brief Scan and determine a number type(integer/float). This method is the entrypoint for all number /// tokens. /// @return lexical_token_t A lexical token type for a determined number type. @@ -2608,7 +2662,7 @@ class lexical_analyzer { m_value_buffer.clear(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); FK_YAML_ASSERT(std::isdigit(current) || current == '-' || current == '+'); lexical_token_t ret = lexical_token_t::END_OF_BUFFER; @@ -2660,7 +2714,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for either integer or float numbers. lexical_token_t scan_negative_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); // The value of `next` must be guranteed to be a digit in the get_next_token() function. FK_YAML_ASSERT(std::isdigit(next)); @@ -2672,16 +2726,16 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float). lexical_token_t scan_number_after_zero_at_first() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); switch (next) { case '.': m_value_buffer.push_back(char_traits_type::to_char_type(next)); return scan_decimal_number_after_decimal_point(); case 'o': - // Do not store 'o' since std::strtoull does not support "0o" but "0" as the prefix for octal numbers. + // Do not store 'o' since std::stoXXX does not support "0o" but "0" as the prefix for octal numbers. // YAML specifies octal values start with the prefix "0o". - // See "10.3.2 Tag Resolution" section in https://yaml.org/spec/1.2.2/ + // See https://yaml.org/spec/1.2.2/#1032-tag-resolution for more details. return scan_octal_number(); case 'x': m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2695,7 +2749,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_decimal_point() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2711,7 +2765,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_exponent() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == '+' || next == '-') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2733,7 +2787,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number_after_sign() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2748,7 +2802,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2781,7 +2835,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_octal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if ('0' <= next && next <= '7') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2794,7 +2848,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_hexadecimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isxdigit(next)) { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2880,12 +2934,12 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for strings. lexical_token_t extract_string_token(bool needs_last_single_quote, bool needs_last_double_quote) { - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { // Handle the end of input buffer. - if (current == end_of_input) + if (current == s_end_of_input) { if (needs_last_double_quote) { @@ -2905,25 +2959,44 @@ class lexical_analyzer if (!needs_last_double_quote && !needs_last_single_quote) { // Allow a space in an unquoted string only if the space is surrounded by non-space characters. - // See "7.3.3 Plain Style" section in https://yaml.org/spec/1.2.2/ - current = m_input_handler.get_next(); - switch (current) + // See https://yaml.org/spec/1.2.2/#733-plain-style for more details. + int next = m_input_handler.get_next(); + + // These characters are permitted when not inside a flow collection, and not inside an implicit key. + // TODO: Support detection of implicit key context for this check. + if (m_flow_context_depth > 0) + { + switch (next) + { + case '{': + case '}': + case '[': + case ']': + case ',': + return lexical_token_t::STRING_VALUE; + } + } + + // " :" is permitted in a plain style string token, but not when followed by a space. + if (next == ':') + { + int peeked = m_input_handler.peek_next(); + if (peeked == ' ') + { + return lexical_token_t::STRING_VALUE; + } + } + + switch (next) { case ' ': case '\r': case '\n': - case '{': - case '}': - case '[': - case ']': - case ',': - case ':': case '#': case '\\': return lexical_token_t::STRING_VALUE; } m_input_handler.unget(); - current = m_input_handler.get_current(); } m_value_buffer.push_back(char_traits_type::to_char_type(current)); continue; @@ -2970,8 +3043,7 @@ class lexical_analyzer continue; } - char_int_type next = m_input_handler.get_next(); - m_input_handler.unget(); + int next = m_input_handler.peek_next(); // A colon as a key separator must be followed by a space or a newline code. if (next != ' ' && next != '\r' && next != '\n') @@ -3035,7 +3107,7 @@ class lexical_analyzer } // Handle escaped characters. - // See "5.7 Escaped Characters" section in https://yaml.org/spec/1.2.2/ + // See https://yaml.org/spec/1.2.2/#57-escaped-characters for more details. if (current == '\\') { if (!needs_last_double_quote) @@ -3068,7 +3140,7 @@ class lexical_analyzer m_value_buffer.push_back('\r'); break; case 'e': - m_value_buffer.push_back(char_type(0x1B)); + m_value_buffer.push_back(char(0x1B)); break; case ' ': m_value_buffer.push_back(' '); @@ -3084,31 +3156,19 @@ class lexical_analyzer break; case 'N': // next line utf8_encoding::from_utf32(0x85u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case '_': // non-breaking space utf8_encoding::from_utf32(0xA0u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'L': // line separator utf8_encoding::from_utf32(0x2028u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'P': // paragraph separator utf8_encoding::from_utf32(0x2029u, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); break; case 'x': handle_escaped_unicode(1); @@ -3132,6 +3192,8 @@ class lexical_analyzer continue; } + // The other characters are already checked while creating an input handler. + // Handle ASCII characters except control characters. if (current <= 0x7E) { @@ -3142,46 +3204,26 @@ class lexical_analyzer // Handle 2-byte characters encoded in UTF-8. (U+0080..U+07FF) if (current <= 0xDF) { - std::array byte_array = {{current, m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); continue; } // Handle 3-byte characters encoded in UTF-8. (U+1000..U+D7FF,U+E000..U+FFFF) if (current <= 0xEF) { - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[2])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); continue; } // Handle 4-byte characters encoded in UTF-8. (U+10000..U+FFFFF,U+100000..U+10FFFF) - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next(), m_input_handler.get_next()}}; - if (!utf8_encoding::validate(byte_array)) - { - throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); - } - - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[0])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[1])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[2])); - m_value_buffer.push_back(char_traits_type::to_char_type(byte_array[3])); + m_value_buffer.push_back(char_traits_type::to_char_type(current)); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); + m_value_buffer.push_back(char_traits_type::to_char_type(m_input_handler.get_next())); } } @@ -3191,7 +3233,7 @@ class lexical_analyzer m_value_buffer.clear(); // Handle leading all-space lines. - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { if (current == ' ') @@ -3209,7 +3251,7 @@ class lexical_analyzer continue; } - if (current == end_of_input) + if (current == s_end_of_input) { if (chomp != chomping_indicator_t::KEEP) { @@ -3251,7 +3293,7 @@ class lexical_analyzer } } - for (; current != end_of_input; current = m_input_handler.get_next()) + for (; current != s_end_of_input; current = m_input_handler.get_next()) { if (current == '\r') { @@ -3321,24 +3363,24 @@ class lexical_analyzer } else { - switch (m_input_handler.get_next()) + switch (int next = m_input_handler.peek_next()) { case '\r': { m_input_handler.get_next(); - FK_YAML_ASSERT(m_input_handler.get_current() == '\n'); - m_value_buffer.push_back(char_traits_type::to_char_type('\n')); + next = m_input_handler.get_next(); + FK_YAML_ASSERT(next == '\n'); + m_value_buffer.push_back(char_traits_type::to_char_type(next)); break; } case '\n': - m_value_buffer.push_back(char_traits_type::to_char_type('\n')); + m_input_handler.get_next(); + m_value_buffer.push_back(char_traits_type::to_char_type(next)); break; case ' ': // The next line is more indented, so a newline will be appended in the next loop. - m_input_handler.unget(); break; default: m_value_buffer.push_back(char_traits_type::to_char_type(' ')); - m_input_handler.unget(); break; } } @@ -3417,7 +3459,7 @@ class lexical_analyzer /// @brief Handle unescaped control characters. /// @param c A target character. - void handle_unescaped_control_char(char_int_type c) + void handle_unescaped_control_char(int c) { FK_YAML_ASSERT(0x00 <= c && c <= 0x1F); @@ -3501,26 +3543,26 @@ class lexical_analyzer // Treats the code point as a UTF-32 encoded character. utf8_encoding::from_utf32(code_point, m_encode_buffer, m_encoded_size); - for (size_t i = 0; i < m_encoded_size; i++) - { - m_value_buffer.push_back(m_encode_buffer[i]); - } + m_value_buffer.append(m_encode_buffer.data(), m_encoded_size); } void get_block_style_metadata(chomping_indicator_t& chomp_type, std::size_t& indent) { - char_int_type ch = m_input_handler.get_next(); + int ch = m_input_handler.get_next(); chomp_type = chomping_indicator_t::CLIP; - if (ch == '-') + switch (ch) { + case '-': chomp_type = chomping_indicator_t::STRIP; ch = m_input_handler.get_next(); - } - else if (ch == '+') - { + break; + case '+': chomp_type = chomping_indicator_t::KEEP; ch = m_input_handler.get_next(); + break; + default: + break; } if (ch == '0') @@ -3552,7 +3594,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip white spaces and newline codes (CR/LF) from the current position. @@ -3570,7 +3612,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip white spaces and comments from the current position. @@ -3589,7 +3631,7 @@ class lexical_analyzer default: return; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } /// @brief Skip the rest in the current line. @@ -3611,7 +3653,7 @@ class lexical_analyzer default: break; } - } while (m_input_handler.get_next() != end_of_input); + } while (m_input_handler.get_next() != s_end_of_input); } [[noreturn]] void emit_error(const char* msg) const @@ -3621,15 +3663,20 @@ class lexical_analyzer private: /// The value of EOF for the target characters. - static constexpr char_int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); /// An input buffer adapter to be analyzed. - input_handler_type m_input_handler; + input_handler m_input_handler; /// A temporal buffer to store a string to be parsed to an actual datum. - input_string_type m_value_buffer {}; + std::string m_value_buffer {}; + /// A temporal buffer to store a UTF-8 encoded char sequence. std::array m_encode_buffer {}; + /// The actual size of a UTF-8 encoded char sequence. std::size_t m_encoded_size {0}; + /// The beginning position of the last lexical token. (zero origin) std::size_t m_last_token_begin_pos {0}; + /// The beginning line of the last lexical token. (zero origin) + std::size_t m_last_token_begin_line {0}; /// The current depth of flow context. uint32_t m_flow_context_depth {0}; /// The last found token type. @@ -3659,7 +3706,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -3736,7 +3783,7 @@ class basic_deserializer template ::value, int> = 0> BasicNodeType deserialize(InputAdapterType&& input_adapter) { - lexical_analyzer lexer(std::forward(input_adapter)); + lexical_analyzer lexer(std::forward(input_adapter)); BasicNodeType root = BasicNodeType::mapping(); m_current_node = &root; @@ -3802,12 +3849,62 @@ class basic_deserializer throw parse_error("A key separator found without key.", cur_line, cur_indent); } - bool is_implicit = m_indent_stack.empty() || cur_indent > m_indent_stack.back().first; - if (is_implicit) + // hold the line count of the key separator for later use. + std::size_t old_indent = cur_indent; + std::size_t old_line = cur_line; + + type = lexer.get_next_token(); + if (type == lexical_token_t::COMMENT_PREFIX) { - break; + // just skip the comment and get the next token. + type = lexer.get_next_token(); + } + + cur_indent = lexer.get_last_token_begin_pos(); + cur_line = lexer.get_lines_processed(); + + bool is_implicit_same_line = + (cur_line == old_line) && (m_indent_stack.empty() || old_indent > m_indent_stack.back().first); + if (is_implicit_same_line) + { + // a key separator for an implicit key with its value on the same line. + continue; + } + + if (cur_line > old_line) + { + switch (type) + { + case lexical_token_t::SEQUENCE_BLOCK_PREFIX: + // a key separator preceeding block sequence entries + *m_current_node = BasicNodeType::sequence(); + set_yaml_version(*m_current_node); + break; + case lexical_token_t::EXPLICIT_KEY_PREFIX: + // a key separator for a explicit block mapping key. + *m_current_node = BasicNodeType::mapping(); + set_yaml_version(*m_current_node); + break; + // defer checking the existence of a key separator after the scalar until a deserialize_scalar() + // call. + case lexical_token_t::NULL_VALUE: + case lexical_token_t::BOOLEAN_VALUE: + case lexical_token_t::INTEGER_VALUE: + case lexical_token_t::FLOAT_NUMBER_VALUE: + case lexical_token_t::STRING_VALUE: + // defer handling these tokens until the next loop. + case lexical_token_t::MAPPING_FLOW_BEGIN: + case lexical_token_t::SEQUENCE_FLOW_BEGIN: + break; + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE + } + + continue; } + // handle explicit mapping key separators. + while (!m_indent_stack.back().second) { m_current_node = m_node_stack.back(); @@ -3839,7 +3936,6 @@ class basic_deserializer m_node_stack.push_back(m_node_stack.back()); m_indent_stack.back().second = false; - type = lexer.get_next_token(); if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { *m_current_node = BasicNodeType::sequence(); @@ -3854,18 +3950,16 @@ class basic_deserializer case lexical_token_t::ANCHOR_PREFIX: { m_anchor_name = lexer.get_string(); m_needs_anchor_impl = true; - break; - } - case lexical_token_t::ALIAS_PREFIX: { - const string_type& alias_name = lexer.get_string(); - auto itr = m_anchor_table.find(alias_name); - if (itr == m_anchor_table.end()) - { - throw parse_error( - "The given anchor name must appear prior to the alias node.", cur_line, cur_indent); - } - assign_node_value(BasicNodeType::alias_of(m_anchor_table.at(alias_name))); - break; + + // Skip updating the current indent to avoid stacking a wrong indentation. + // + // &foo bar: baz + // ^ + // the correct indent width for the "bar" node key. + + type = lexer.get_next_token(); + cur_line = lexer.get_lines_processed(); + continue; } case lexical_token_t::COMMENT_PREFIX: break; @@ -3928,26 +4022,6 @@ class basic_deserializer m_current_node = m_node_stack.back(); m_node_stack.pop_back(); break; - case lexical_token_t::MAPPING_BLOCK_PREFIX: - type = lexer.get_next_token(); - if (type == lexical_token_t::COMMENT_PREFIX) - { - type = lexer.get_next_token(); - } - if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) - { - *m_current_node = BasicNodeType::sequence(); - set_yaml_version(*m_current_node); - cur_indent = lexer.get_last_token_begin_pos(); - cur_line = lexer.get_lines_processed(); - continue; - } - - *m_current_node = BasicNodeType::mapping(); - set_yaml_version(*m_current_node); - cur_indent = lexer.get_last_token_begin_pos(); - cur_line = lexer.get_lines_processed(); - continue; case lexical_token_t::MAPPING_FLOW_BEGIN: *m_current_node = BasicNodeType::mapping(); set_yaml_version(*m_current_node); @@ -3955,45 +4029,13 @@ class basic_deserializer case lexical_token_t::MAPPING_FLOW_END: m_current_node = m_node_stack.back(); break; - case lexical_token_t::NULL_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_null()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::BOOLEAN_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_boolean()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::INTEGER_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_integer()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } - case lexical_token_t::FLOAT_NUMBER_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_float_number()), cur_indent, cur_line, type); - if (do_continue) - { - continue; - } - break; - } + case lexical_token_t::ALIAS_PREFIX: + case lexical_token_t::NULL_VALUE: + case lexical_token_t::BOOLEAN_VALUE: + case lexical_token_t::INTEGER_VALUE: + case lexical_token_t::FLOAT_NUMBER_VALUE: case lexical_token_t::STRING_VALUE: { - bool do_continue = - deserialize_scalar(lexer, BasicNodeType(lexer.get_string()), cur_indent, cur_line, type); + bool do_continue = deserialize_scalar(lexer, cur_indent, cur_line, type); if (do_continue) { continue; @@ -4007,8 +4049,10 @@ class basic_deserializer break; } + lexical_token_t prev_type = type; type = lexer.get_next_token(); - cur_indent = lexer.get_last_token_begin_pos(); + // + cur_indent = (prev_type == lexical_token_t::ANCHOR_PREFIX) ? cur_indent : lexer.get_last_token_begin_pos(); cur_line = lexer.get_lines_processed(); } while (type != lexical_token_t::END_OF_BUFFER); @@ -4024,7 +4068,7 @@ class basic_deserializer private: /// @brief Add new key string to the current YAML node. /// @param key a key string to be added to the current YAML node. - void add_new_key(const BasicNodeType& key, const std::size_t indent, const std::size_t line) + void add_new_key(BasicNodeType&& key, const std::size_t indent, const std::size_t line) { if (!m_indent_stack.empty() && indent < m_indent_stack.back().first) { @@ -4083,27 +4127,11 @@ class basic_deserializer if (m_current_node->is_sequence()) { m_current_node->template get_value_ref().emplace_back(std::move(node_value)); - set_yaml_version(m_current_node->template get_value_ref().back()); - if (m_needs_anchor_impl) - { - m_current_node->template get_value_ref().back().add_anchor_name(m_anchor_name); - m_anchor_table[m_anchor_name] = m_current_node->template get_value_ref().back(); - m_needs_anchor_impl = false; - m_anchor_name.clear(); - } return; } // a scalar node *m_current_node = std::move(node_value); - set_yaml_version(*m_current_node); - if (m_needs_anchor_impl) - { - m_current_node->add_anchor_name(m_anchor_name); - m_anchor_table[m_anchor_name] = *m_current_node; - m_needs_anchor_impl = false; - m_anchor_name.clear(); - } if (!m_indent_stack.back().second) { m_current_node = m_node_stack.back(); @@ -4111,23 +4139,77 @@ class basic_deserializer } } + template + BasicNodeType create_scalar_node(LexerType& lexer, lexical_token_t type, std::size_t indent, std::size_t line) + { + FK_YAML_ASSERT( + type == lexical_token_t::NULL_VALUE || type == lexical_token_t::BOOLEAN_VALUE || + type == lexical_token_t::INTEGER_VALUE || type == lexical_token_t::FLOAT_NUMBER_VALUE || + type == lexical_token_t::STRING_VALUE || type == lexical_token_t::ALIAS_PREFIX); + + BasicNodeType node {}; + switch (type) + { + case lexical_token_t::NULL_VALUE: + node = BasicNodeType(lexer.get_null()); + break; + case lexical_token_t::BOOLEAN_VALUE: + node = BasicNodeType(lexer.get_boolean()); + break; + case lexical_token_t::INTEGER_VALUE: + node = BasicNodeType(lexer.get_integer()); + break; + case lexical_token_t::FLOAT_NUMBER_VALUE: + node = BasicNodeType(lexer.get_float_number()); + break; + case lexical_token_t::STRING_VALUE: + node = BasicNodeType(lexer.get_string()); + break; + case lexical_token_t::ALIAS_PREFIX: { + const string_type& alias_name = lexer.get_string(); + auto itr = m_anchor_table.find(alias_name); + if (itr == m_anchor_table.end()) + { + throw parse_error("The given anchor name must appear prior to the alias node.", line, indent); + } + node = BasicNodeType::alias_of(m_anchor_table[alias_name]); + break; + } + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE + } + + set_yaml_version(node); + + if (m_needs_anchor_impl) + { + node.add_anchor_name(m_anchor_name); + m_anchor_table[m_anchor_name] = node; + m_needs_anchor_impl = false; + m_anchor_name.clear(); + } + + return node; + } + /// @brief Deserialize a detected scalar node. /// @param node A detected scalar node by a lexer. /// @param indent The current indentation width. Can be updated in this function. /// @param line The number of processed lines. Can be updated in this function. /// @return true if next token has already been got, false otherwise. template - bool deserialize_scalar( - LexerType& lexer, BasicNodeType&& node, std::size_t& indent, std::size_t& line, lexical_token_t& type) + bool deserialize_scalar(LexerType& lexer, std::size_t& indent, std::size_t& line, lexical_token_t& type) { + BasicNodeType node = create_scalar_node(lexer, type, indent, line); + if (m_current_node->is_mapping()) { - add_new_key(node, indent, line); + add_new_key(std::move(node), indent, line); return false; } type = lexer.get_next_token(); - if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX) + if (type == lexical_token_t::KEY_SEPARATOR) { if (m_current_node->is_scalar()) { @@ -4144,8 +4226,9 @@ class basic_deserializer return true; } *m_current_node = BasicNodeType::mapping(); + set_yaml_version(*m_current_node); } - add_new_key(node, indent, line); + add_new_key(std::move(node), indent, line); } else { @@ -4201,7 +4284,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -4212,6 +4295,7 @@ FK_YAML_NAMESPACE_END #ifndef FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ #define FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ +#include #include #include #include @@ -4220,10 +4304,12 @@ FK_YAML_NAMESPACE_END // #include +// #include + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -4235,13 +4321,14 @@ FK_YAML_NAMESPACE_END #define FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ #include +#include // #include -// #include +// #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -4249,8 +4336,8 @@ FK_YAML_NAMESPACE_END /// /// @file -#ifndef FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ -#define FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ // #include @@ -4264,25 +4351,21 @@ namespace detail /// @brief Definition of Unicode encoding types /// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. -enum class encode_t -{ - UTF_8_N, //!< UTF-8 without BOM - UTF_8_BOM, //!< UTF-8 with BOM - UTF_16BE_N, //!< UTF-16BE without BOM - UTF_16BE_BOM, //!< UTF-16BE with BOM - UTF_16LE_N, //!< UTF-16LE without BOM - UTF_16LE_BOM, //!< UTF-16LE with BOM - UTF_32BE_N, //!< UTF-32BE without BOM - UTF_32BE_BOM, //!< UTF-32BE with BOM - UTF_32LE_N, //!< UTF-32LE without BOM - UTF_32LE_BOM, //!< UTF-32LE with BOM +enum class utf_encode_t +{ + UTF_8, //!< UTF-8 + UTF_16BE, //!< UTF-16 Big Endian + UTF_16LE, //!< UTF-16 Little Endian + UTF_32BE, //!< UTF-32 Big Endian + UTF_32LE, //!< UTF-32 Little Endian }; } // namespace detail FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ */ +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ */ + // #include @@ -4295,63 +4378,68 @@ namespace detail /// @brief Detect an encoding type for UTF-8 expected inputs. /// @note This function doesn't support the case where the first character is null. -/// @param b0 The 1st byte of an input character sequence. -/// @param b1 The 2nd byte of an input character sequence. -/// @param b2 The 3rd byte of an input character sequence. -/// @param b3 The 4th byte of an input character sequence. +/// @param[in] bytes 4 bytes of an input character sequence. +/// @param[out] has_bom Whether or not the input contains a BOM. /// @return A detected encoding type. -inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3) noexcept +inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { + has_bom = false; + // Check if a BOM exists. - if (b0 == uint8_t(0xEFu) && b1 == uint8_t(0xBBu) && b2 == uint8_t(0xBFu)) + if (bytes[0] == uint8_t(0xEFu) && bytes[1] == uint8_t(0xBBu) && bytes[2] == uint8_t(0xBFu)) { - return encode_t::UTF_8_BOM; + has_bom = true; + return utf_encode_t::UTF_8; } - if (b0 == 0 && b1 == 0 && b2 == uint8_t(0xFEu) && b3 == uint8_t(0xFFu)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == uint8_t(0xFEu) && bytes[3] == uint8_t(0xFFu)) { - return encode_t::UTF_32BE_BOM; + has_bom = true; + return utf_encode_t::UTF_32BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu) && b2 == 0 && b3 == 0) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu) && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_BOM; + has_bom = true; + return utf_encode_t::UTF_32LE; } - if (b0 == uint8_t(0xFEu) && b1 == uint8_t(0xFFu)) + if (bytes[0] == uint8_t(0xFEu) && bytes[1] == uint8_t(0xFFu)) { - return encode_t::UTF_16BE_BOM; + has_bom = true; + return utf_encode_t::UTF_16BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu)) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu)) { - return encode_t::UTF_16LE_BOM; + has_bom = true; + return utf_encode_t::UTF_16LE; } // Test the first character assuming it's an ASCII character. - if (b0 == 0 && b1 == 0 && b2 == 0 && 0 < b3 && b3 < uint8_t(0x80u)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && 0 < bytes[3] && bytes[3] < uint8_t(0x80u)) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0 && b2 == 0 && b3 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_N; + return utf_encode_t::UTF_32LE; } - if (b0 == 0 && 0 < b1 && b1 < uint8_t(0x80u)) + if (bytes[0] == 0 && 0 < bytes[1] && bytes[1] < uint8_t(0x80u)) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0) { - return encode_t::UTF_16LE_N; + return utf_encode_t::UTF_16LE; } - return encode_t::UTF_8_N; + return utf_encode_t::UTF_8; } /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. @@ -4361,9 +4449,9 @@ inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t /// @param end The end of input iterators. /// @return A detected encoding type. template ())))> -inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) +inline utf_encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; switch (ElemSize) { case sizeof(char): { // this case covers char8_t as well when compiled with C++20 features. @@ -4372,30 +4460,34 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i] = uint8_t(begin[i]); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (has_bom) { - case encode_t::UTF_8_BOM: - std::advance(begin, 3); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 2); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 4); - break; - default: - // Do nothing if a BOM doesn't exist. - break; + // skip reading the BOM. + switch (encode_type) + { + case utf_encode_t::UTF_8: + std::advance(begin, 3); + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + std::advance(begin, 2); + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + std::advance(begin, 4); + break; + } } + return encode_type; } case sizeof(char16_t): { if (begin == end) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } for (int i = 0; i < 2 && begin + i != end; i++) { @@ -4403,45 +4495,47 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i * 2 + 1] = uint8_t(begin[i] & 0xFFu); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char16_t characters must be encoded in the UTF-16 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } case sizeof(char32_t): { if (begin == end) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } + bytes[0] = uint8_t((*begin & 0xFF000000u) >> 24); bytes[1] = uint8_t((*begin & 0x00FF0000u) >> 16); bytes[2] = uint8_t((*begin & 0x0000FF00u) >> 8); bytes[3] = uint8_t(*begin & 0x000000FFu); - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char32_t characters must be encoded in the UTF-32 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } default: @@ -4449,9 +4543,9 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) } } -inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; for (std::size_t i = 0; i < 4; i++) { char byte = 0; @@ -4463,32 +4557,36 @@ inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept bytes[i] = uint8_t(byte & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) - { - case encode_t::UTF_8_BOM: - fseek(file, 3, SEEK_SET); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - fseek(file, 2, SEEK_SET); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - fseek(file, 4, SEEK_SET); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - fseek(file, 0, SEEK_SET); - break; + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + long offset = 0; + if (has_bom) + { + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + fseek(file, offset, SEEK_SET); return encode_type; } -inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {{0xFFu, 0xFFu, 0xFFu, 0xFFu}}; for (std::size_t i = 0; i < 4; i++) { char ch = 0; @@ -4503,25 +4601,29 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept bytes[i] = uint8_t(ch & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) - { - case encode_t::UTF_8_BOM: - is.seekg(3, std::ios_base::beg); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - is.seekg(2, std::ios_base::beg); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - is.seekg(4, std::ios_base::beg); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - is.seekg(0, std::ios_base::beg); - break; + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + std::streamoff offset = 0; + if (has_bom) + { + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + is.seekg(offset, std::ios_base::beg); return encode_type; } @@ -4531,7 +4633,8 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ */ -// #include + +// #include // #include @@ -4561,9 +4664,6 @@ class iterator_input_adapter< IterType, enable_if_t::value_type>, char>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -4571,7 +4671,7 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) @@ -4587,150 +4687,158 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; + buffer.clear(); + switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + /// @brief The concrete implementation of fill_buffer() for UTF-8 encoded inputs. + /// @param buffer A buffer to be filled with the input. + void fill_buffer_utf8(std::string& buffer) { - if (m_current != m_end) - { - auto ret = std::char_traits::to_int_type(*m_current); - ++m_current; - return ret; - } - return std::char_traits::eof(); - } + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() - { - if (m_utf8_buf_index == m_utf8_buf_size) + IterType current = m_current; + while (current != m_end) { - if (m_current == m_end) + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) { - if (m_encoded_buf_size == 0) + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - return std::char_traits::eof(); + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } } - - while (m_current != m_end && m_encoded_buf_size < 2) + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) { - switch (m_encode_type) + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(*m_current) << 8); - ++m_current; - m_encoded_buffer[m_encoded_buf_size] |= char16_t(*m_current); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(*m_current); - ++m_current; - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(*m_current) << 8); - break; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - ++m_current; - ++m_encoded_buf_size; + } + } + + buffer.assign(m_current, m_end); + } + + /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. + /// @param buffer A buffer to be filled with the input. + void fill_buffer_utf16(std::string& buffer) + { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) + { + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } + + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (m_current != m_end || encoded_buf_size != 0) + { + while (m_current != m_end && encoded_buf_size < 2) + { + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(*m_current++) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(*m_current++) << shift_bits[1]); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) { - if (m_current == m_end) - { - return std::char_traits::eof(); - } + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(*m_current << 24); - ++m_current; - utf32 |= char32_t(*m_current << 16); - ++m_current; - utf32 |= char32_t(*m_current << 8); - ++m_current; - utf32 |= char32_t(*m_current); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(*m_current); - ++m_current; - utf32 |= char32_t(*m_current << 8); - ++m_current; - utf32 |= char32_t(*m_current << 16); - ++m_current; - utf32 |= char32_t(*m_current << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - ++m_current; - m_utf8_buf_index = 0; - } + while (m_current != m_end) + { + char32_t utf32 = char32_t(*m_current++ << shift_bits[0]); + utf32 |= char32_t(*m_current++ << shift_bits[1]); + utf32 |= char32_t(*m_current++ << shift_bits[2]); + utf32 |= char32_t(*m_current++ << shift_bits[3]); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); + + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: @@ -4739,17 +4847,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #ifdef FK_YAML_HAS_CHAR8_T @@ -4762,9 +4860,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char8_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -4772,11 +4867,14 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + // char8_t characters must be encoded in the UTF-8 format. + // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); } // allow only move construct/assignment like other input adapters. @@ -4788,35 +4886,57 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; - switch (m_encode_type) + IterType current = m_current; + while (current != m_end) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); - break; - default: // LCOV_EXCL_LINE - // char8_t characters must be encoded in the UTF-8 format. - // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. - break; // LCOV_EXCL_LINE - } - return ret; - } + char first = *current++; -private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept - { - if (m_current != m_end) + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + } + + while (m_current != m_end) { - auto ret = std::char_traits::to_int_type(*m_current); - ++m_current; - return ret; + buffer.push_back(char(*m_current++)); } - return std::char_traits::eof(); } private: @@ -4825,7 +4945,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #endif // defined(FK_YAML_HAS_CHAR8_T) @@ -4838,9 +4958,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char16_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -4848,11 +4965,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); } // allow only move construct/assignment like other input adapters. @@ -4864,57 +4982,36 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) - { - if (m_current == m_end) - { - if (m_encoded_buf_size == 0) - { - return std::char_traits::eof(); - } - } + int shift_bits = (m_encode_type == utf_encode_t::UTF_16BE) ? 0 : 8; + + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; - while (m_current != m_end && m_encoded_buf_size < 2) + while (m_current != m_end || encoded_buf_size != 0) + { + while (m_current != m_end && encoded_buf_size < 2) { - switch (m_encode_type) - { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = *m_current; - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - char16_t tmp = *m_current; - m_encoded_buffer[m_encoded_buf_size] = char16_t((tmp & 0x00FFu) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t((tmp & 0xFF00u) >> 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } - ++m_current; - ++m_encoded_buf_size; + char16_t tmp = *m_current++; + encoded_buffer[encoded_buf_size] = char16_t((tmp & 0x00FFu) << shift_bits); + encoded_buffer[encoded_buf_size++] |= char16_t((tmp & 0xFF00u) >> shift_bits); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } private: @@ -4923,17 +5020,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_16BE_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; }; /// @brief An input adapter for iterators of type char32_t. @@ -4944,9 +5031,6 @@ class iterator_input_adapter< enable_if_t::value_type>, char32_t>::value>> { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new iterator_input_adapter object. iterator_input_adapter() = default; @@ -4954,11 +5038,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); } // allow only move construct/assignment like other input adapters. @@ -4970,44 +5055,32 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32LE) { - if (m_current == m_end) - { - return std::char_traits::eof(); - } + shift_bits[0] = 24; + shift_bits[1] = 8; + shift_bits[2] = 8; + shift_bits[3] = 24; + } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = *m_current; - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - char32_t tmp = *m_current; - utf32 |= char32_t((tmp & 0xFF000000u) >> 24); - utf32 |= char32_t((tmp & 0x00FF0000u) >> 8); - utf32 |= char32_t((tmp & 0x0000FF00u) << 8); - utf32 |= char32_t((tmp & 0x000000FFu) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - ++m_current; - m_utf8_buf_index = 0; - } + while (m_current != m_end) + { + char32_t tmp = *m_current++; + char32_t utf32 = char32_t((tmp & 0xFF000000u) >> shift_bits[0]); + utf32 |= char32_t((tmp & 0x00FF0000u) >> shift_bits[1]); + utf32 |= char32_t((tmp & 0x0000FF00u) << shift_bits[2]); + utf32 |= char32_t((tmp & 0x000000FFu) << shift_bits[3]); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); + + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: @@ -5016,22 +5089,13 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_32BE_N}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; }; /// @brief An input adapter for C-style file handles. class file_input_adapter { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new file_input_adapter object. file_input_adapter() = default; @@ -5041,7 +5105,7 @@ class file_input_adapter /// It's user's responsibility to call those functions. /// @param file A file handle for this adapter. (A non-null pointer is assumed.) /// @param encode_type The encoding type for this input adapter. - explicit file_input_adapter(std::FILE* file, encode_t encode_type) noexcept + explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept : m_file(file), m_encode_type(encode_type) { @@ -5056,173 +5120,189 @@ class file_input_adapter /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + void fill_buffer_utf8(std::string& buffer) { - char ch = 0; - size_t size = std::fread(&ch, sizeof(char), 1, m_file); - if (size == 1) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + + char tmp_buf[256] {}; + std::size_t read_size = 0; + while ((read_size = std::fread(&tmp_buf[0], sizeof(char), sizeof(tmp_buf) / sizeof(tmp_buf[0]), m_file)) > 0) { - return std::char_traits::to_int_type(ch); + buffer.append(tmp_buf, read_size); } - return std::char_traits::eof(); - } - /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() - { - if (m_utf8_buf_index == m_utf8_buf_size) + auto current = buffer.begin(); + auto end = buffer.end(); + while (current != end) { - char chars[2] = {0, 0}; - while (m_encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) { - switch (m_encode_type) + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); } - - ++m_encoded_buf_size; } + } + } + + /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. + /// @return A UTF-8 encoded byte at the current position, or EOF. + void fill_buffer_utf16(std::string& buffer) + { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) + { + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } - if (m_encoded_buf_size == 0) + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (std::feof(m_file) == 0) + { + while (encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { - return std::char_traits::eof(); + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(chars[0]) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(chars[1]) << shift_bits[1]); } std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; + encoded_buf_size -= consumed_size; - m_utf8_buf_index = 0; + buffer.append(utf8_buffer.data(), utf8_buf_size); } - - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) + { + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } + + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + while (std::feof(m_file) == 0) { - char chars[4] = {0, 0, 0, 0}; std::size_t size = std::fread(&chars[0], sizeof(char), 4, m_file); if (size != 4) { - return std::char_traits::eof(); + return; } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(uint8_t(chars[0]) << 24); - utf32 |= char32_t(uint8_t(chars[1]) << 16); - utf32 |= char32_t(uint8_t(chars[2]) << 8); - utf32 |= char32_t(uint8_t(chars[3])); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(uint8_t(chars[0])); - utf32 |= char32_t(uint8_t(chars[1]) << 8); - utf32 |= char32_t(uint8_t(chars[2]) << 16); - utf32 |= char32_t(uint8_t(chars[3]) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + char32_t utf32 = char32_t(uint8_t(chars[0]) << shift_bits[0]); + utf32 |= char32_t(uint8_t(chars[1]) << shift_bits[1]); + utf32 |= char32_t(uint8_t(chars[2]) << shift_bits[2]); + utf32 |= char32_t(uint8_t(chars[3]) << shift_bits[3]); - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - m_utf8_buf_index = 0; - } + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } } private: /// A pointer to the input file handle. std::FILE* m_file {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; /// @brief An input adapter for streams class stream_input_adapter { public: - /// A type for characters used in this input adapter. - using char_type = char; - /// @brief Construct a new stream_input_adapter object. stream_input_adapter() = default; /// @brief Construct a new stream_input_adapter object. /// @param is A reference to the target input stream. - explicit stream_input_adapter(std::istream& is, encode_t encode_type) noexcept + explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept : m_istream(&is), m_encode_type(encode_type) { @@ -5237,165 +5317,186 @@ class stream_input_adapter /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. - typename std::char_traits::int_type get_character() + void fill_buffer(std::string& buffer) { - typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); + case utf_encode_t::UTF_8: + fill_buffer_utf8(buffer); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: - ret = get_character_for_utf16(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + fill_buffer_utf16(buffer); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: - ret = get_character_for_utf32(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + fill_buffer_utf32(buffer); break; } - return ret; } private: /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept + void fill_buffer_utf8(std::string& buffer) { - return m_istream->get(); + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + + char tmp_buf[256] {}; + do + { + m_istream->read(&tmp_buf[0], 256); + std::size_t read_size = m_istream->gcount(); + buffer.append(tmp_buf, read_size); + } while (!m_istream->eof()); + + auto current = buffer.begin(); + auto end = buffer.end(); + while (current != end) + { + char first = *current++; + + // The first byte starts with 0b0XXX'XXXX -> 1-byte character + if ((first & 0xC0) == 0x80) + { + // The first byte must not start with 0b10XX'XXXX + std::array bytes {{first}}; + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + // The first byte starts with 0b110X'XXXX -> 2-byte character + else if ((first & 0xE0) == 0xC0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0b1110'XXXX -> 3-byte character + else if ((first & 0xF0) == 0xE0) + { + std::array bytes {{uint8_t(first), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + // The first byte starts with 0x1111'0XXX -> 4-byte character + else if ((first & 0xF8) == 0xF0) + { + std::array bytes { + {uint8_t(first), uint8_t(*current++), uint8_t(*current++), uint8_t(*current++)}}; + bool is_valid = utf8_encoding::validate(bytes); + if (!is_valid) + { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", bytes); + } + } + } } /// @brief The concrete implementation of get_character() for UTF-16 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf16() + void fill_buffer_utf16(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) + { + shift_bits[0] = 8; + } + else // m_encode_type == utf_encode_t::UTF_16LE + { + shift_bits[1] = 8; + } + + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + std::size_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + do { - while (m_encoded_buf_size < 2) + while (encoded_buf_size < 2) { - char chars[2] = {0, 0}; m_istream->read(&chars[0], 2); std::streamsize size = m_istream->gcount(); if (size != 2) { - if (m_encoded_buf_size == 0) - { - return std::char_traits::eof(); - } - break; - } - - switch (m_encode_type) - { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { - m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); - m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); break; } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } - ++m_encoded_buf_size; + encoded_buffer[encoded_buf_size] = char16_t(uint8_t(chars[0]) << shift_bits[0]); + encoded_buffer[encoded_buf_size++] |= char16_t(uint8_t(chars[1]) << shift_bits[1]); }; std::size_t consumed_size = 0; - utf8_encoding::from_utf16(m_encoded_buffer, m_utf8_buffer, consumed_size, m_utf8_buf_size); + utf8_encoding::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); if (consumed_size == 1) { - m_encoded_buffer[0] = m_encoded_buffer[1]; - m_encoded_buffer[1] = 0; + encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } - m_encoded_buf_size -= consumed_size; - - m_utf8_buf_index = 0; - } + encoded_buf_size -= consumed_size; - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } while (!m_istream->eof()); } /// @brief The concrete implementation of get_character() for UTF-32 encoded inputs. /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf32() + void fill_buffer_utf32(std::string& buffer) { - if (m_utf8_buf_index == m_utf8_buf_size) + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) + { + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else // m_encode_type == utf_encode_t::UTF_32LE + { + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } + + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + std::size_t utf8_buf_size {0}; + + do { - char ch = 0; - m_istream->read(&ch, 1); + m_istream->read(&chars[0], 4); std::streamsize size = m_istream->gcount(); - if (size != 1) + if (size != 4) { - return std::char_traits::eof(); + return; } - char32_t utf32 = 0; - switch (m_encode_type) - { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - utf32 = char32_t(ch << 24); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 16); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 8); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { - utf32 = char32_t(ch); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 8); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 16); - m_istream->read(&ch, 1); - utf32 |= char32_t(ch << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE - } + char32_t utf32 = char32_t(uint8_t(chars[0]) << shift_bits[0]); + utf32 |= char32_t(uint8_t(chars[1]) << shift_bits[1]); + utf32 |= char32_t(uint8_t(chars[2]) << shift_bits[2]); + utf32 |= char32_t(uint8_t(chars[3]) << shift_bits[3]); - utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); - m_utf8_buf_index = 0; - } + utf8_encoding::from_utf32(utf32, utf8_buffer, utf8_buf_size); - auto ret = std::char_traits::to_int_type(m_utf8_buffer[m_utf8_buf_index]); - ++m_utf8_buf_index; - return ret; + buffer.append(utf8_buffer.data(), utf8_buf_size); + } while (!m_istream->eof()); } private: /// A pointer to the input stream object. std::istream* m_istream {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; - /// The buffer for decoding characters read from the input. - std::array m_encoded_buffer {{0, 0}}; - /// The number of elements in `m_encoded_buffer`. - std::size_t m_encoded_buf_size {0}; - /// The buffer for UTF-8 encoded characters. - std::array m_utf8_buffer {{0, 0, 0, 0}}; - /// The next index in `m_utf8_buffer` to read. - std::size_t m_utf8_buf_index {0}; - /// The number of bytes in `m_utf8_buffer`. - std::size_t m_utf8_buf_size {0}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; ///////////////////////////////// @@ -5410,7 +5511,7 @@ class stream_input_adapter template ())))> inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { - encode_t encode_type = detect_encoding_and_skip_bom(begin, end); + utf_encode_t encode_type = detect_encoding_and_skip_bom(begin, end); return iterator_input_adapter(begin, end, encode_type); } @@ -5425,7 +5526,7 @@ inline auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array } /// @brief A namespace to implement container_input_adapter_factory for internal use. -namespace container_input_adapter_factory_impl +namespace input_adapter_factory { using std::begin; @@ -5450,25 +5551,25 @@ struct container_input_adapter_factory< decltype(input_adapter(begin(std::declval()), end(std::declval()))); /// @brief A factory method of input adapter objects for the target container objects. - /// @param container - /// @return adapter_type + /// @param container A container-like input object. + /// @return adapter_type An iterator_input_adapter object. static adapter_type create(const ContainerType& container) { return input_adapter(begin(container), end(container)); } }; -} // namespace container_input_adapter_factory_impl +} // namespace input_adapter_factory /// @brief A factory method for iterator_input_adapter objects with containers. /// @tparam ContainerType A container type. /// @param container A container object. -/// @return container_input_adapter_factory_impl::container_input_adapter_factory::adapter_type +/// @return input_adapter_factory::container_input_adapter_factory::adapter_type template -inline typename container_input_adapter_factory_impl::container_input_adapter_factory::adapter_type -input_adapter(ContainerType&& container) +inline typename input_adapter_factory::container_input_adapter_factory::adapter_type input_adapter( + ContainerType&& container) { - return container_input_adapter_factory_impl::container_input_adapter_factory::create(container); + return input_adapter_factory::container_input_adapter_factory::create(container); } /// @brief A factory method for file_input_adapter objects with C-style file handles. @@ -5480,16 +5581,16 @@ inline file_input_adapter input_adapter(std::FILE* file) { throw fkyaml::exception("Invalid FILE object pointer."); } - encode_t encode_type = detect_encoding_and_skip_bom(file); + utf_encode_t encode_type = detect_encoding_and_skip_bom(file); return file_input_adapter(file, encode_type); } -/// @brief -/// @param stream -/// @return stream_input_adapter +/// @brief A factory method for stream_input_adapter objects with std::istream objects. +/// @param stream An input stream. +/// @return stream_input_adapter A stream_input_adapter object. inline stream_input_adapter input_adapter(std::istream& stream) noexcept { - encode_t encode_type = detect_encoding_and_skip_bom(stream); + utf_encode_t encode_type = detect_encoding_and_skip_bom(stream); return stream_input_adapter(stream, encode_type); } @@ -5498,10 +5599,11 @@ inline stream_input_adapter input_adapter(std::istream& stream) noexcept FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -5963,7 +6065,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6005,10 +6107,11 @@ struct node_property FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_NODE_PROPERTY_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6111,10 +6214,11 @@ class node_ref_storage FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_NODE_REF_STORAGE_HPP_ */ + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6134,7 +6238,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6566,7 +6670,7 @@ class basic_serializer } auto adapter = input_adapter(s); - lexical_analyzer lexer(std::move(adapter)); + lexical_analyzer lexer(std::move(adapter)); lexical_token_t token_type = lexer.get_next_token(); if (token_type != lexical_token_t::STRING_VALUE) @@ -6652,7 +6756,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6670,7 +6774,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -6971,10 +7075,11 @@ FK_YAML_INLINE_VAR constexpr const auto& from_node = detail::static_const /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -7350,6 +7455,7 @@ FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP_ */ + FK_YAML_NAMESPACE_BEGIN /// @brief An ADL friendly converter between basic_node objects and native data objects. @@ -7395,7 +7501,7 @@ FK_YAML_NAMESPACE_END // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +/// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 /// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML /// /// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -8574,6 +8680,178 @@ class basic_node } } + /// @brief Get a basic_node object with a key of a compatible type. + /// @tparam KeyType A key type compatible with basic_node + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> + basic_node& at(KeyType&& key) + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + basic_node node_key = std::forward(key); + + if (is_sequence()) + { + if (!node_key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = node_key.template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(node_key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(node_key).c_str()); + } + return m_node_value.p_mapping->at(node_key); + } + + /// @brief Get a basic_node object with a key of a compatible type. + /// @tparam KeyType A key type compatible with basic_node + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Constant reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t< + detail::conjunction< + detail::negation>, + detail::is_node_compatible_type>::value, + int> = 0> + const basic_node& at(KeyType&& key) const + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + basic_node node_key = std::forward(key); + + if (is_sequence()) + { + if (!node_key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = node_key.template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(node_key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(node_key).c_str()); + } + return m_node_value.p_mapping->at(node_key); + } + + /// @brief Get a basic_node object with a basic_node key object. + /// @tparam KeyType A key type which is a kind of the basic_node template class. + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t>::value, int> = 0> + basic_node& at(KeyType&& key) + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + if (is_sequence()) + { + if (!key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = std::forward(key).template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(key).c_str()); + } + return m_node_value.p_mapping->at(key); + } + + /// @brief Get a basic_node object with a basic_node key object. + /// @tparam KeyType A key type which is a kind of the basic_node template class. + /// @param key A key to the target basic_node object in a sequence/mapping node. + /// @return Constant reference to the basic_node object associated with the given key. + /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/at/ + template < + typename KeyType, detail::enable_if_t>::value, int> = 0> + const basic_node& at(KeyType&& key) const + { + if (is_scalar()) + { + throw fkyaml::type_error("at() is unavailable for a scalar node.", m_node_type); + } + + if (is_sequence()) + { + if (!key.is_integer()) + { + throw fkyaml::type_error("An argument of at() for sequence nodes must be an integer.", m_node_type); + } + + FK_YAML_ASSERT(m_node_value.p_sequence != nullptr); + int index = std::forward(key).template get_value(); + int size = static_cast(m_node_value.p_sequence->size()); + if (index >= size) + { + throw fkyaml::out_of_range(index); + } + return m_node_value.p_sequence->at(index); + } + + FK_YAML_ASSERT(m_node_value.p_mapping != nullptr); + bool is_found = m_node_value.p_mapping->find(key) != m_node_value.p_mapping->end(); + if (!is_found) + { + throw fkyaml::out_of_range(serialize(key).c_str()); + } + return m_node_value.p_mapping->at(key); + } + /// @brief Get the YAML version specification for this basic_node object. /// @return The version of the YAML format applied to the basic_node object. /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/get_yaml_version/ diff --git a/test/cmake_add_subdirectory_test/CMakeLists.txt b/test/cmake_add_subdirectory_test/CMakeLists.txt index 4ee324b9..ebe252a3 100644 --- a/test/cmake_add_subdirectory_test/CMakeLists.txt +++ b/test/cmake_add_subdirectory_test/CMakeLists.txt @@ -6,7 +6,7 @@ add_test( -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -S${CMAKE_CURRENT_SOURCE_DIR}/project -B${CMAKE_CURRENT_BINARY_DIR}/build - -DfkYAML_source_dir=${PROJECT_SOURCE_DIR} + -DfkYAML_source_dir=${PROJECT_SOURCE_DIR} ) add_test( diff --git a/test/cmake_add_subdirectory_test/project/main.cpp b/test/cmake_add_subdirectory_test/project/main.cpp index f8c43d8c..16a8637d 100644 --- a/test/cmake_add_subdirectory_test/project/main.cpp +++ b/test/cmake_add_subdirectory_test/project/main.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/cmake_fetch_content_test/project/CMakeLists.txt b/test/cmake_fetch_content_test/project/CMakeLists.txt index 9c682217..02edbcdd 100644 --- a/test/cmake_fetch_content_test/project/CMakeLists.txt +++ b/test/cmake_fetch_content_test/project/CMakeLists.txt @@ -6,7 +6,7 @@ include(FetchContent) FetchContent_Declare( fkYAML GIT_REPOSITORY https://github.com/fktn-k/fkYAML.git - GIT_TAG v0.3.2) + GIT_TAG v0.3.3) FetchContent_MakeAvailable(fkYAML) add_executable( diff --git a/test/cmake_fetch_content_test/project/main.cpp b/test/cmake_fetch_content_test/project/main.cpp index f8c43d8c..16a8637d 100644 --- a/test/cmake_fetch_content_test/project/main.cpp +++ b/test/cmake_fetch_content_test/project/main.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/cmake_find_package_test/project/main.cpp b/test/cmake_find_package_test/project/main.cpp index f8c43d8c..16a8637d 100644 --- a/test/cmake_find_package_test/project/main.cpp +++ b/test/cmake_find_package_test/project/main.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/cmake_target_include_directories_test/project/main.cpp b/test/cmake_target_include_directories_test/project/main.cpp index f8c43d8c..16a8637d 100644 --- a/test/cmake_target_include_directories_test/project/main.cpp +++ b/test/cmake_target_include_directories_test/project/main.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/CMakeLists.txt b/test/unit_test/CMakeLists.txt index cd04128f..6bf9c513 100644 --- a/test/unit_test/CMakeLists.txt +++ b/test/unit_test/CMakeLists.txt @@ -109,21 +109,20 @@ target_compile_options( INTERFACE # MSVC $<$: - /W4 /EHsc /utf-8 /permissive- + /W4 /WX /EHsc /utf-8 /permissive- + /wd4709 # comma operator within array index expression $<$:/Z7> $<$:/Od> > # GNU $<$: - -Wall -Wextra -pedantic -Wpedantic --all-warnings --extra-warnings - -Wc++0x-compat - -Wno-self-move # necessary to build iterator class test. + -Wall -Wextra -Werror -pedantic -Wpedantic --all-warnings --extra-warnings + -Wno-self-move # necessary to build the detail::iterator class test > # Clang $<$: - -Wall -pedantic + -Wall -Wextra -Werror -pedantic -Wno-c++98-compat -Wno-c++98-compat-pedantic - -Wno-self-assign-overloaded -Wno-self-move # necessary to build iterator class test. > ) @@ -182,7 +181,7 @@ if(FK_YAML_CODE_COVERAGE) unit_test_config INTERFACE --coverage - ) + ) endif() endif() @@ -207,6 +206,7 @@ add_executable( test_node_ref_storage_class.cpp test_ordered_map_class.cpp test_serializer_class.cpp + test_string_formatter.cpp test_utf8_encoding_class.cpp main.cpp ) diff --git a/test/unit_test/main.cpp b/test/unit_test/main.cpp index e7e8689c..731e12a1 100644 --- a/test/unit_test/main.cpp +++ b/test/unit_test/main.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/test_custom_from_node.cpp b/test/unit_test/test_custom_from_node.cpp index 3eb8fd53..6a83ff7f 100644 --- a/test/unit_test/test_custom_from_node.cpp +++ b/test/unit_test/test_custom_from_node.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -159,4 +159,4 @@ TEST_CASE("FromNodeTest_UserDefinedTypeMapErrorTest", "[FromNodeTest]") fkyaml::node int_node = 123; REQUIRE_THROWS_AS(int_node.get_value(), fkyaml::exception); -} \ No newline at end of file +} diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_1byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_1byte_char.txt new file mode 100644 index 00000000..e270962d --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_1byte_char.txt @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_2byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_2byte_char.txt new file mode 100644 index 00000000..d18f885c --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_2byte_char.txt @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_3byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_3byte_char.txt new file mode 100644 index 00000000..91c95044 --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_3byte_char.txt @@ -0,0 +1 @@ +j \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_4byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_4byte_char.txt new file mode 100644 index 00000000..4248f2fd --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_invalid_4byte_char.txt @@ -0,0 +1 @@ +pp \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_1byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_1byte_char.txt new file mode 100644 index 00000000..ac389421 --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_1byte_char.txt @@ -0,0 +1 @@ +Z0a \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_2byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_2byte_char.txt new file mode 100644 index 00000000..99eee759 --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_2byte_char.txt @@ -0,0 +1 @@ +€߿ \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_3byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_3byte_char.txt new file mode 100644 index 00000000..9ff4e287 --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_3byte_char.txt @@ -0,0 +1 @@ +쿿 \ No newline at end of file diff --git a/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_4byte_char.txt b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_4byte_char.txt new file mode 100644 index 00000000..3e7cee86 --- /dev/null +++ b/test/unit_test/test_data/input_adapter_test_data_utf8n_valid_4byte_char.txt @@ -0,0 +1 @@ +𐀀򿀀 \ No newline at end of file diff --git a/test/unit_test/test_deserializer_class.cpp b/test/unit_test/test_deserializer_class.cpp index 738a721f..b5a569a2 100644 --- a/test/unit_test/test_deserializer_class.cpp +++ b/test/unit_test/test_deserializer_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -984,6 +984,47 @@ TEST_CASE("DeserializerClassTest_DeserializeBlockMappingTest", "[DeserializerCla REQUIRE(root["Baz[123]"].get_value() == 3.14); } + SECTION("Flow indicators inside unquoted plain scalar values") + { + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, abc{abc"))); + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("Foo")); + REQUIRE(root["Foo"].is_string()); + REQUIRE(root["Foo"].get_value_ref() == "Bar, abc{abc"); + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, abc}abc"))); + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("Foo")); + REQUIRE(root["Foo"].is_string()); + REQUIRE(root["Foo"].get_value_ref() == "Bar, abc}abc"); + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, abc[abc"))); + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("Foo")); + REQUIRE(root["Foo"].is_string()); + REQUIRE(root["Foo"].get_value_ref() == "Bar, abc[abc"); + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, abc]abc"))); + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("Foo")); + REQUIRE(root["Foo"].is_string()); + REQUIRE(root["Foo"].get_value_ref() == "Bar, abc]abc"); + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, {[123] :3.14}"))); + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("Foo")); + REQUIRE(root["Foo"].is_string()); + REQUIRE(root["Foo"].get_value_ref() == "Bar, {[123] :3.14}"); + REQUIRE_THROWS_AS( + root = deserializer.deserialize(fkyaml::detail::input_adapter("Foo: Bar, {[123] : 3.14}")), + fkyaml::parse_error); + } + SECTION("a comment right after a block mapping key.") { REQUIRE_NOTHROW( @@ -1000,6 +1041,61 @@ TEST_CASE("DeserializerClassTest_DeserializeBlockMappingTest", "[DeserializerCla REQUIRE(root["baz"]["qux"].is_integer()); REQUIRE(root["baz"]["qux"].get_value() == 123); } + + SECTION("mapping entries split across newlines") + { + REQUIRE_NOTHROW( + root = deserializer.deserialize(fkyaml::detail::input_adapter("foo:\n" + " bar\n" + "baz:\n" + " 123\n" + "null:\n" + " {false: 3.14}\n" + "qux:\n" + " [r, g, b]"))); + + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 4); + REQUIRE(root.contains("foo")); + REQUIRE(root.contains("baz")); + REQUIRE(root.contains(nullptr)); + REQUIRE(root.contains("qux")); + + REQUIRE(root["foo"].is_string()); + REQUIRE(root["foo"].get_value_ref() == "bar"); + + REQUIRE(root["baz"].is_integer()); + REQUIRE(root["baz"].get_value() == 123); + + REQUIRE(root[nullptr].is_mapping()); + REQUIRE(root[nullptr].contains(false)); + REQUIRE(root[nullptr][false].is_float_number()); + REQUIRE(root[nullptr][false].get_value() == 3.14); + + REQUIRE(root["qux"].is_sequence()); + REQUIRE(root["qux"].size() == 3); + + REQUIRE(root["qux"][0].is_string()); + REQUIRE(root["qux"][0].get_value_ref() == "r"); + REQUIRE(root["qux"][1].is_string()); + REQUIRE(root["qux"][1].get_value_ref() == "g"); + REQUIRE(root["qux"][2].is_string()); + REQUIRE(root["qux"][2].get_value_ref() == "b"); + } + + SECTION("parse alias mapping key") + { + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("&anchor foo:\n *anchor: 123"))); + + REQUIRE(root.is_mapping()); + REQUIRE(root.size() == 1); + REQUIRE(root.contains("foo")); + REQUIRE(root["foo"].is_mapping()); + REQUIRE(root["foo"].size() == 1); + REQUIRE(root["foo"].contains("foo")); + REQUIRE(root["foo"]["foo"].is_integer()); + REQUIRE(root["foo"]["foo"].get_value() == 123); + } } TEST_CASE("DeserializerClassTest_DeserializeFlowSequenceTest", "[DeserializerClassTest]") diff --git a/test/unit_test/test_encode_detector.cpp b/test/unit_test/test_encode_detector.cpp index da28dc54..c929c26d 100644 --- a/test/unit_test/test_encode_detector.cpp +++ b/test/unit_test/test_encode_detector.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -28,54 +28,67 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingTypeTest", "[EncodeDetectorTest]") { - using pair_t = std::pair, fkyaml::detail::encode_t>; - auto params = GENERATE( - pair_t {{0xEFu, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_BOM}, - pair_t {{0xEFu, 0, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xEFu, 0xBBu, 0, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_32BE_BOM}, - pair_t {{0x80u, 0, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0x80u, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0x80u, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0xFEu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_BOM}, - pair_t {{0x80u, 0xFEu, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0x80u, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0x80u, 0}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0xFFu, 0xFEu, 0, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0xFEu, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_16BE_BOM}, - pair_t {{0x80u, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFEu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0x80u, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 1}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0, 0, 0, 0x40u}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0, 0, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0x80u, 0, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0x80u, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0x80u, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{1, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0x40u, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0x7Fu, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x80u, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x7Fu, 0x80u, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x7Fu, 0, 0x80u, 0}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x7Fu, 0, 0, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0, 1, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{0, 0x40u, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{0, 0x7Fu, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{1, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x40u, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x7Fu, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}); - - REQUIRE( - fkyaml::detail::detect_encoding_type(params.first[0], params.first[1], params.first[2], params.first[3]) == - params.second); + struct test_data_t + { + test_data_t(std::array input_, fkyaml::detail::utf_encode_t encode_type_, bool has_bom_) + : input(input_), + encode_type(encode_type_), + has_bom(has_bom_) + { + } + + std::array input {}; + fkyaml::detail::utf_encode_t encode_type {fkyaml::detail::utf_encode_t::UTF_8}; + bool has_bom {false}; + }; + + auto d = GENERATE( + test_data_t {{{0xEFu, 0xBBu, 0xBFu, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, true}, + test_data_t {{{0xEFu, 0, 0xBFu, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xEFu, 0xBBu, 0, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0xBBu, 0xBFu, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0xFEu, 0xFFu}}, fkyaml::detail::utf_encode_t::UTF_32BE, true}, + test_data_t {{{0x80u, 0, 0xFEu, 0xFFu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0x80u, 0xFEu, 0xFFu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0x80u, 0xFFu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0xFEu, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFFu, 0xFEu, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_32LE, true}, + test_data_t {{{0x80u, 0xFEu, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFFu, 0x80u, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFFu, 0xFEu, 0x80u, 0}}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{{0xFFu, 0xFEu, 0, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{{0xFEu, 0xFFu, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_16BE, true}, + test_data_t {{{0x80u, 0xFFu, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFEu, 0x80u, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFFu, 0xFEu, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{{0x80u, 0xFEu, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0xFFu, 0x80u, 0x80u, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0, 1}}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{{0, 0, 0, 0x40u}}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{{0, 0, 0, 0x7Fu}}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{{0x80u, 0, 0, 0x7Fu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0x80u, 0, 0x7Fu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0x80u, 0x7Fu}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0, 0, 0, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{1, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{{0x40u, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{{0x7Fu, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{{0, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0x80u, 0, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0x7Fu, 0x80u, 0, 0}}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{{0x7Fu, 0, 0x80u, 0}}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{{0x7Fu, 0, 0, 0x80u}}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{{0, 1, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{{0, 0x40u, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{{0, 0x7Fu, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{{1, 0, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{{0x40u, 0, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{{0x7Fu, 0, 1, 1}}, fkyaml::detail::utf_encode_t::UTF_16LE, false}); + + bool has_bom = false; + REQUIRE(fkyaml::detail::detect_encoding_type(d.input, has_bom) == d.encode_type); + REQUIRE(has_bom == d.has_bom); } TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTest]") @@ -89,8 +102,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), char(0x61u), char(0x62u), char(0x63u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(begin == std::begin(input)); } @@ -99,8 +112,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xEFu), char(0xBBu), char(0xBFu), char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(begin == std::begin(input) + 3); } @@ -109,8 +122,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, char(0x60u), 0, char(0x61u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -119,8 +132,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFEu), char(0xFFu), 0, char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input) + 2); } @@ -129,8 +142,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), 0, char(0x61u), 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input)); } @@ -139,8 +152,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFFu), char(0xFEu), char(0x60u), 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input) + 2); } @@ -149,8 +162,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, 0, 0, char(0x60u), 0, 0, 0, char(0x61u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -159,8 +172,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, 0, char(0xFEu), char(0xFFu), 0, 0, 0, char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input) + 4); } @@ -169,8 +182,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), 0, 0, 0, char(0x61u), 0, 0, 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input)); } @@ -179,8 +192,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFFu), char(0xFEu), 0, 0, char(0x60u), 0, 0, 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input) + 4); } @@ -193,8 +206,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0x0060u), char16_t(0x0061u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -203,8 +216,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0xFEFFu), char16_t(0x0060u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input) + 1); } @@ -213,8 +226,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0x6000u), char16_t(0x6100u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input)); } @@ -223,8 +236,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0xFFFEu), char16_t(0x6000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input) + 1); } @@ -233,8 +246,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input = u""; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -255,8 +268,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x00000060u), char32_t(0x00000061u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -265,8 +278,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x0000FEFFu), char32_t(0x00000060u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input) + 1); } @@ -275,8 +288,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x60000000u), char32_t(0x61000000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input)); } @@ -285,8 +298,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0xFFFE0000u), char32_t(0x60000000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input) + 1); } @@ -295,8 +308,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input = U""; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -319,8 +332,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -333,8 +346,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 3); std::fclose(p_file); @@ -347,8 +360,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -361,8 +374,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(std::ftell(p_file) == 2); std::fclose(p_file); @@ -375,8 +388,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -389,8 +402,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(std::ftell(p_file) == 2); std::fclose(p_file); @@ -403,8 +416,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -417,8 +430,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(std::ftell(p_file) == 4); std::fclose(p_file); @@ -431,8 +444,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -445,8 +458,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(std::ftell(p_file) == 4); std::fclose(p_file); @@ -459,8 +472,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -473,88 +486,88 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes SECTION("std::istream with UTF-8 encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-8(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8bom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 3); } SECTION("std::istream with UTF-16BE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16ben.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-16BE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16bebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(ifs.tellg() == 2); } SECTION("std::istream with UTF-16LE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16len.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-16LE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16lebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(ifs.tellg() == 2); } SECTION("std::istream with UTF-32BE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32ben.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-32BE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32bebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(ifs.tellg() == 4); } SECTION("std::istream with UTF-32LE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32len.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-32LE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32lebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(ifs.tellg() == 4); } SECTION("std::istream with an empty input file") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/single_char_byte_input.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 0); } -} \ No newline at end of file +} diff --git a/test/unit_test/test_exception_class.cpp b/test/unit_test/test_exception_class.cpp index b11a6e9e..1d4ab8bd 100644 --- a/test/unit_test/test_exception_class.cpp +++ b/test/unit_test/test_exception_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/test_from_string.cpp b/test/unit_test/test_from_string.cpp index 536caf5e..c0d3b759 100644 --- a/test/unit_test/test_from_string.cpp +++ b/test/unit_test/test_from_string.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/test_input_adapter.cpp b/test/unit_test/test_input_adapter.cpp index dd28e127..0e17940f 100644 --- a/test/unit_test/test_input_adapter.cpp +++ b/test/unit_test/test_input_adapter.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -100,7 +100,7 @@ TEST_CASE("InputAdapterTest_StreamInputAdapterProviderTest", "[InputAdapterTest] REQUIRE(std::is_same::value); } -TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") +TEST_CASE("InputAdapterTest_FillBufferTest", "[InputAdapterTest]") { /////////////// // UTF-8 // @@ -112,21 +112,22 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == ' '); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 'o'); - REQUIRE(input_adapter.get_character() == 'u'); - REQUIRE(input_adapter.get_character() == 'r'); - REQUIRE(input_adapter.get_character() == 'c'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == '.'); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 12); + REQUIRE(buffer[0] == 't'); + REQUIRE(buffer[1] == 'e'); + REQUIRE(buffer[2] == 's'); + REQUIRE(buffer[3] == 't'); + REQUIRE(buffer[4] == ' '); + REQUIRE(buffer[5] == 's'); + REQUIRE(buffer[6] == 'o'); + REQUIRE(buffer[7] == 'u'); + REQUIRE(buffer[8] == 'r'); + REQUIRE(buffer[9] == 'c'); + REQUIRE(buffer[10] == 'e'); + REQUIRE(buffer[11] == '.'); } SECTION("iterator_input_adapter for UTF-8 with std::string") @@ -136,21 +137,22 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == ' '); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 'o'); - REQUIRE(input_adapter.get_character() == 'u'); - REQUIRE(input_adapter.get_character() == 'r'); - REQUIRE(input_adapter.get_character() == 'c'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == '.'); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 12); + REQUIRE(buffer[0] == 't'); + REQUIRE(buffer[1] == 'e'); + REQUIRE(buffer[2] == 's'); + REQUIRE(buffer[3] == 't'); + REQUIRE(buffer[4] == ' '); + REQUIRE(buffer[5] == 's'); + REQUIRE(buffer[6] == 'o'); + REQUIRE(buffer[7] == 'u'); + REQUIRE(buffer[8] == 'r'); + REQUIRE(buffer[9] == 'c'); + REQUIRE(buffer[10] == 'e'); + REQUIRE(buffer[11] == '.'); } SECTION("file_input_adapter for UTF-8") @@ -162,18 +164,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -184,18 +186,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } //////////////////// @@ -209,21 +211,22 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == ' '); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 'o'); - REQUIRE(input_adapter.get_character() == 'u'); - REQUIRE(input_adapter.get_character() == 'r'); - REQUIRE(input_adapter.get_character() == 'c'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == '.'); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 12); + REQUIRE(buffer[0] == 't'); + REQUIRE(buffer[1] == 'e'); + REQUIRE(buffer[2] == 's'); + REQUIRE(buffer[3] == 't'); + REQUIRE(buffer[4] == ' '); + REQUIRE(buffer[5] == 's'); + REQUIRE(buffer[6] == 'o'); + REQUIRE(buffer[7] == 'u'); + REQUIRE(buffer[8] == 'r'); + REQUIRE(buffer[9] == 'c'); + REQUIRE(buffer[10] == 'e'); + REQUIRE(buffer[11] == '.'); } SECTION("iterator_input_adapter for UTF-8(BOM) with std::string") @@ -235,21 +238,22 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 't'); - REQUIRE(input_adapter.get_character() == ' '); - REQUIRE(input_adapter.get_character() == 's'); - REQUIRE(input_adapter.get_character() == 'o'); - REQUIRE(input_adapter.get_character() == 'u'); - REQUIRE(input_adapter.get_character() == 'r'); - REQUIRE(input_adapter.get_character() == 'c'); - REQUIRE(input_adapter.get_character() == 'e'); - REQUIRE(input_adapter.get_character() == '.'); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 12); + REQUIRE(buffer[0] == 't'); + REQUIRE(buffer[1] == 'e'); + REQUIRE(buffer[2] == 's'); + REQUIRE(buffer[3] == 't'); + REQUIRE(buffer[4] == ' '); + REQUIRE(buffer[5] == 's'); + REQUIRE(buffer[6] == 'o'); + REQUIRE(buffer[7] == 'u'); + REQUIRE(buffer[8] == 'r'); + REQUIRE(buffer[9] == 'c'); + REQUIRE(buffer[10] == 'e'); + REQUIRE(buffer[11] == '.'); } SECTION("file_input_adapter for UTF-8(BOM)") @@ -261,18 +265,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -283,18 +287,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } ////////////////// @@ -307,19 +311,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16BE with std::string") @@ -329,20 +333,20 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == int_type(0x5Au)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 10); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); + REQUIRE(buffer[9] == char(0x5Au)); } SECTION("iterator_input_adapter for UTF-16BE with a char16_t array") @@ -351,19 +355,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16BE with std::u16string") @@ -374,20 +378,20 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u16string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == int_type(0x5Au)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 10); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); + REQUIRE(buffer[9] == char(0x5Au)); } SECTION("file_input_adapter for UTF-16BE") @@ -399,19 +403,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); std::fclose(p_file); } @@ -422,19 +426,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } /////////////////////// @@ -448,19 +452,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16BE(BOM) with std::string") @@ -471,19 +475,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16BE(BOM) with a char16_t array") @@ -492,19 +496,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16BE(BOM) with std::u16string") @@ -515,19 +519,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u16string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("file_input_adapter for UTF-16BE(BOM)") @@ -539,19 +543,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); std::fclose(p_file); } @@ -562,19 +566,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } ////////////////// @@ -587,19 +590,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE with std::string") @@ -609,19 +612,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE with a char16_t array") @@ -630,19 +633,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE with std::u16string") @@ -653,19 +656,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u16string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("file_input_adapter for UTF-16LE") @@ -677,19 +680,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); std::fclose(p_file); } @@ -700,19 +703,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } /////////////////////// @@ -726,19 +729,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE(BOM) with std::string") @@ -749,19 +752,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE(BOM) with a char16_t array") @@ -770,19 +773,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("iterator_input_adapter for UTF-16LE(BOM) with std::u16string") @@ -793,19 +796,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u16string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } SECTION("file_input_adapter for UTF-16LE(BOM)") @@ -817,19 +820,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); std::fclose(p_file); } @@ -840,19 +843,19 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == int_type(0x52u)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 9); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + REQUIRE(buffer[8] == char(0x52u)); } ////////////////// @@ -865,18 +868,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE with std::string") @@ -886,18 +889,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE with a char32_t array") @@ -906,18 +909,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE with std::u32string") @@ -928,18 +931,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u32string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("file_input_adapter for UTF-32BE") @@ -951,18 +954,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -973,18 +976,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } /////////////////////// @@ -997,18 +1000,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE(BOM) with std::string") @@ -1018,18 +1021,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE(BOM) with a char32_t array") @@ -1038,18 +1041,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32BE(BOM) with std::u32string") @@ -1060,18 +1063,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u32string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("file_input_adapter for UTF-32BE(BOM)") @@ -1083,18 +1086,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -1105,18 +1108,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } ////////////////// @@ -1129,18 +1132,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE with std::string") @@ -1150,18 +1153,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE with a char32_t array") @@ -1170,18 +1173,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE with std::u32string") @@ -1192,18 +1195,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u32string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("file_input_adapter for UTF-32LE") @@ -1215,18 +1218,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -1237,18 +1240,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } /////////////////////// @@ -1261,18 +1264,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE(BOM) with std::string") @@ -1282,18 +1285,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE(BOM) with a char32_t array") @@ -1302,18 +1305,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(input); REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("iterator_input_adapter for UTF-32LE(BOM) with std::u32string") @@ -1324,18 +1327,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") using itr_type = typename std::u32string::iterator; REQUIRE(std::is_same>::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; - - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); } SECTION("file_input_adapter for UTF-32LE(BOM)") @@ -1347,18 +1350,18 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(p_file); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); std::fclose(p_file); } @@ -1369,17 +1372,286 @@ TEST_CASE("InputAdapterTest_GetCharacterTest", "[InputAdapterTest]") auto input_adapter = fkyaml::detail::input_adapter(ifs); REQUIRE(std::is_same::value); - using char_traits_type = std::char_traits; - using int_type = typename char_traits_type::int_type; + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == 'a'); + REQUIRE(buffer[1] == char(0xE3u)); + REQUIRE(buffer[2] == char(0x81u)); + REQUIRE(buffer[3] == char(0x82u)); + REQUIRE(buffer[4] == char(0xF0u)); + REQUIRE(buffer[5] == char(0xA0u)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x8Bu)); + } +} + +TEST_CASE("InputAdapterTest_FillBufferUTF8CharsValidationTest", "[InputAdapterTest]") +{ + ///////////////////////////////// + // UTF-8 1-Byte Characters // + ///////////////////////////////// + + SECTION("file_input_adapter with valid 1-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_1byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 3); + REQUIRE(buffer[0] == char(0x5Au)); + REQUIRE(buffer[1] == char(0x30u)); + REQUIRE(buffer[2] == char(0x61u)); + + std::fclose(p_file); + } + + SECTION("file_input_adapter with invalid 1-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_1byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + SECTION("stream_input_adapter with valid 1-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_1byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 3); + REQUIRE(buffer[0] == char(0x5Au)); + REQUIRE(buffer[1] == char(0x30u)); + REQUIRE(buffer[2] == char(0x61u)); + } + + SECTION("stream_input_adapter with invalid 1-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_1byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + ///////////////////////////////// + // UTF-8 2-Byte Characters // + ///////////////////////////////// + + SECTION("file_input_adapter with valid 2-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_2byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 4); + REQUIRE(buffer[0] == char(0xC2u)); + REQUIRE(buffer[1] == char(0x80u)); + REQUIRE(buffer[2] == char(0xDFu)); + REQUIRE(buffer[3] == char(0xBFu)); + + std::fclose(p_file); + } + + SECTION("file_input_adapter with invalid 2-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_2byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + SECTION("stream_input_adapter with valid 2-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_2byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 4); + REQUIRE(buffer[0] == char(0xC2u)); + REQUIRE(buffer[1] == char(0x80u)); + REQUIRE(buffer[2] == char(0xDFu)); + REQUIRE(buffer[3] == char(0xBFu)); + } + + SECTION("stream_input_adapter with invalid 2-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_2byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); - REQUIRE(input_adapter.get_character() == 'a'); - REQUIRE(input_adapter.get_character() == int_type(0xE3u)); - REQUIRE(input_adapter.get_character() == int_type(0x81u)); - REQUIRE(input_adapter.get_character() == int_type(0x82u)); - REQUIRE(input_adapter.get_character() == int_type(0xF0u)); - REQUIRE(input_adapter.get_character() == int_type(0xA0u)); - REQUIRE(input_adapter.get_character() == int_type(0x80u)); - REQUIRE(input_adapter.get_character() == int_type(0x8Bu)); - REQUIRE(input_adapter.get_character() == char_traits_type::eof()); + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); } -} \ No newline at end of file + + ///////////////////////////////// + // UTF-8 3-Byte Characters // + ///////////////////////////////// + + SECTION("file_input_adapter with valid 3-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_3byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 6); + REQUIRE(buffer[0] == char(0xE0u)); + REQUIRE(buffer[1] == char(0x80u)); + REQUIRE(buffer[2] == char(0x80u)); + REQUIRE(buffer[3] == char(0xECu)); + REQUIRE(buffer[4] == char(0xBFu)); + REQUIRE(buffer[5] == char(0xBFu)); + + std::fclose(p_file); + } + + SECTION("file_input_adapter with invalid 3-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_3byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + SECTION("stream_input_adapter with valid 3-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_3byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 6); + REQUIRE(buffer[0] == char(0xE0u)); + REQUIRE(buffer[1] == char(0x80u)); + REQUIRE(buffer[2] == char(0x80u)); + REQUIRE(buffer[3] == char(0xECu)); + REQUIRE(buffer[4] == char(0xBFu)); + REQUIRE(buffer[5] == char(0xBFu)); + } + + SECTION("stream_input_adapter with invalid 3-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_3byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + ///////////////////////////////// + // UTF-8 4-Byte Characters // + ///////////////////////////////// + + SECTION("file_input_adapter with valid 4-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_4byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == char(0xF0u)); + REQUIRE(buffer[1] == char(0x90u)); + REQUIRE(buffer[2] == char(0x80u)); + REQUIRE(buffer[3] == char(0x80u)); + REQUIRE(buffer[4] == char(0xF2u)); + REQUIRE(buffer[5] == char(0xBFu)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x80u)); + + std::fclose(p_file); + } + + SECTION("file_input_adapter with invalid 4-byte UTF-8 encodings") + { + DISABLE_C4996 + FILE* p_file = std::fopen(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_4byte_char.txt", "r"); + ENABLE_C4996 + + auto input_adapter = fkyaml::detail::input_adapter(p_file); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } + + SECTION("stream_input_adapter with valid 4-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_valid_4byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + input_adapter.fill_buffer(buffer); + + REQUIRE(buffer.size() == 8); + REQUIRE(buffer[0] == char(0xF0u)); + REQUIRE(buffer[1] == char(0x90u)); + REQUIRE(buffer[2] == char(0x80u)); + REQUIRE(buffer[3] == char(0x80u)); + REQUIRE(buffer[4] == char(0xF2u)); + REQUIRE(buffer[5] == char(0xBFu)); + REQUIRE(buffer[6] == char(0x80u)); + REQUIRE(buffer[7] == char(0x80u)); + } + + SECTION("stream_input_adapter with invalid 4-byte UTF-8 encodings") + { + std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n_invalid_4byte_char.txt"); + auto input_adapter = fkyaml::detail::input_adapter(ifs); + REQUIRE(std::is_same::value); + + std::string buffer {}; + REQUIRE_THROWS_AS(input_adapter.fill_buffer(buffer), fkyaml::invalid_encoding); + } +} diff --git a/test/unit_test/test_input_handler.cpp b/test/unit_test/test_input_handler.cpp index ea6ecdd4..3b427da1 100644 --- a/test/unit_test/test_input_handler.cpp +++ b/test/unit_test/test_input_handler.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -17,12 +17,10 @@ #include #endif -using pchar_input_handler = fkyaml::detail::input_handler>; - TEST_CASE("InputHandlerTest_InitialStateTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -32,7 +30,7 @@ TEST_CASE("InputHandlerTest_InitialStateTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -53,13 +51,13 @@ TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } @@ -67,7 +65,7 @@ TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_next() == 'e'); REQUIRE(handler.get_cur_pos_in_line() == 1); @@ -81,11 +79,11 @@ TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } @@ -93,8 +91,8 @@ TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetRangeTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_range(4, str) == 0); REQUIRE(str == "test"); @@ -102,25 +100,53 @@ TEST_CASE("InputHandlerTest_GetRangeTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_range(2, str) == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_range(2, str) == std::char_traits::eof()); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_range(0, str) == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_range(0, str) == 0); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } +TEST_CASE("InputHandlerTest_PeekNextTest", "[InputHandlerTest]") +{ + char input[] = "test"; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); + + REQUIRE(handler.peek_next() == 'e'); + REQUIRE(handler.get_cur_pos_in_line() == 0); + REQUIRE(handler.get_lines_read() == 0); + + REQUIRE(handler.get_next() == 'e'); + REQUIRE(handler.peek_next() == 's'); + REQUIRE_FALSE(handler.peek_next() == 't'); + REQUIRE(handler.get_cur_pos_in_line() == 1); + REQUIRE(handler.get_lines_read() == 0); + + REQUIRE(handler.get_next() == 's'); + REQUIRE(handler.get_next() == 't'); + REQUIRE_FALSE(handler.peek_next() == 't'); + + REQUIRE(handler.get_next() == std::char_traits::eof()); + REQUIRE_FALSE(handler.peek_next() == 't'); + REQUIRE(handler.get_cur_pos_in_line() == 4); + REQUIRE(handler.get_lines_read() == 0); + + REQUIRE(handler.peek_next() == std::char_traits::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); +} + TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -140,7 +166,7 @@ TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") REQUIRE(handler.get_next() == 'e'); REQUIRE(handler.get_next() == 's'); REQUIRE(handler.get_next() == 't'); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); handler.unget(); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 3); @@ -150,7 +176,7 @@ TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_UngetRangeTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); handler.unget_range(4); @@ -172,50 +198,20 @@ TEST_CASE("InputHandlerTest_UngetRangeTest", "[InputHandlerTest]") REQUIRE(handler.get_next() == 's'); REQUIRE(handler.get_next() == 't'); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); handler.unget_range(2); REQUIRE(handler.get_current() == 's'); REQUIRE(handler.get_cur_pos_in_line() == 2); REQUIRE(handler.get_lines_read() == 0); } -TEST_CASE("InputHandlerTest_TestNextCharTest", "[InputHandlerTest]") -{ - char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); - - REQUIRE(handler.test_next_char('e') == true); - REQUIRE(handler.get_cur_pos_in_line() == 0); - REQUIRE(handler.get_lines_read() == 0); - - REQUIRE(handler.get_next() == 'e'); - REQUIRE(handler.test_next_char('s') == true); - REQUIRE(handler.test_next_char('t') == false); - REQUIRE(handler.get_cur_pos_in_line() == 1); - REQUIRE(handler.get_lines_read() == 0); - - REQUIRE(handler.get_next() == 's'); - REQUIRE(handler.get_next() == 't'); - REQUIRE(handler.test_next_char('t') == false); - - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.test_next_char('t') == false); - REQUIRE(handler.get_cur_pos_in_line() == 4); - REQUIRE(handler.get_lines_read() == 0); - - pchar_input_handler::char_type char_eof = - pchar_input_handler::char_traits_type::to_char_type(pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.test_next_char(char_eof) == false); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); -} - TEST_CASE("InputHandlerTest_TestMultipleLinesTest", "[InputHandlerTest]") { SECTION("first character is not a newline code.") { char input[] = "test\nfoo"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_range(4, str) == 0); REQUIRE(handler.get_cur_pos_in_line() == 3); @@ -237,8 +233,8 @@ TEST_CASE("InputHandlerTest_TestMultipleLinesTest", "[InputHandlerTest]") SECTION("first character is a newline code.") { char input[] = "\ntest\nfoo"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_next() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); diff --git a/test/unit_test/test_iterator_class.cpp b/test/unit_test/test_iterator_class.cpp index 22a2b4a6..e8587672 100644 --- a/test/unit_test/test_iterator_class.cpp +++ b/test/unit_test/test_iterator_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -82,14 +82,14 @@ TEST_CASE("IteratorClassTest_AssignmentOperatorTest", "[IteratorClassTest]") SECTION("Test lvalue iterator.") { - iterator = iterator; + iterator = *&iterator; REQUIRE(iterator.type() == fkyaml::detail::iterator_t::SEQUENCE); REQUIRE(iterator->is_null()); } SECTION("Test rvalue iterator.") { - iterator = std::move(iterator); + iterator = std::move(*&iterator); REQUIRE(iterator.type() == fkyaml::detail::iterator_t::SEQUENCE); REQUIRE(iterator->is_null()); } diff --git a/test/unit_test/test_lexical_analyzer_class.cpp b/test/unit_test/test_lexical_analyzer_class.cpp index 2b3ee736..741bceda 100644 --- a/test/unit_test/test_lexical_analyzer_class.cpp +++ b/test/unit_test/test_lexical_analyzer_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -14,10 +14,7 @@ #endif #include -using pchar_lexer_t = - fkyaml::detail::lexical_analyzer>; -using str_lexer_t = - fkyaml::detail::lexical_analyzer>; +using lexer_t = fkyaml::detail::lexical_analyzer; TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnalyzerClassTest]") { @@ -31,7 +28,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal value_pair_t(std::string("%YAML 1.2\n"), std::string("1.2")), value_pair_t(std::string("%YAML 1.2 "), std::string("1.2"))); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::YAML_VER_DIRECTIVE); @@ -49,7 +46,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal std::string("%YAMR 1.2 \r\n"), std::string("%YANL 1.2 \n")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); @@ -69,7 +66,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal std::string("%YAML1.2 "), std::string("%YAML AbC")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -80,7 +77,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas SECTION("Test nothrow expected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%TAG ")); + lexer_t lexer(fkyaml::detail::input_adapter("%TAG ")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::TAG_DIRECTIVE); @@ -92,7 +89,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas { auto buffer = GENERATE(std::string("%TUB"), std::string("%TAC")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -101,7 +98,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas SECTION("Test nothrow expected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%TAGE")); + lexer_t lexer(fkyaml::detail::input_adapter("%TAGE")); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -112,7 +109,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanReservedDirectiveTest", "[LexicalAnalyze GENERATE(std::string("%TEST"), std::string("%1984\n"), std::string("%TEST4LIB\r"), std::string("%%ERROR\r\n")); fkyaml::detail::lexical_token_t token; - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); @@ -122,13 +119,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanReservedDirectiveTest", "[LexicalAnalyze TEST_CASE("LexicalAnalyzerClassTest_ScanEmptyDirectiveTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%")); + lexer_t lexer(fkyaml::detail::input_adapter("%")); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDirectivesTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\nfoo: bar")); + lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\nfoo: bar")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -150,7 +147,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDirectivesTest", "[LexicalAnalyzerC TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDocumentsTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\n...")); + lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\n...")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -170,76 +167,99 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanColonTest", "[LexicalAnalyzerClassTest]" SECTION("Test colon with half-width space.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": ")); + lexer_t lexer(fkyaml::detail::input_adapter(": ")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with CR newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\r")); + lexer_t lexer(fkyaml::detail::input_adapter(":\r")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(":\r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\n")); + lexer_t lexer(fkyaml::detail::input_adapter(":\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); - } - - SECTION("Test colon with non-newline-code character.") - { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":test")); - REQUIRE_THROWS_AS(token = lexer.get_next_token(), fkyaml::parse_error); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and a CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment\r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment\r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and a LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and no newline code") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and a CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": \r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": \r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and a LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": \n")); + lexer_t lexer(fkyaml::detail::input_adapter(": \n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and no newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": ")); + lexer_t lexer(fkyaml::detail::input_adapter(": ")); + REQUIRE_NOTHROW(token = lexer.get_next_token()); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); + } + + SECTION("Test colon with an always-safe character.") + { + lexer_t lexer(fkyaml::detail::input_adapter(":test")); + REQUIRE_NOTHROW(token = lexer.get_next_token()); + REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); + REQUIRE(lexer.get_string() == ":test"); + } + + SECTION("Test colon with a flow indicator in a non-flow context.") + { + auto input = + GENERATE(std::string(":,"), std::string(":{"), std::string(":}"), std::string(":["), std::string(":]")); + lexer_t lexer(fkyaml::detail::input_adapter(input)); + REQUIRE_NOTHROW(token = lexer.get_next_token()); + REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); + REQUIRE(lexer.get_string() == input); + } + + SECTION("Test colon with a flow indicator in a flow context.") + { + auto input = GENERATE( + std::string("{:,"), std::string("{:{"), std::string("{:}"), std::string("{:["), std::string("{:]")); + lexer_t lexer(fkyaml::detail::input_adapter(input)); + REQUIRE_NOTHROW(token = lexer.get_next_token()); + REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_FLOW_BEGIN); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } @@ -252,7 +272,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanNullTokenTest", "[LexicalAnalyzerClassTe SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("null"), std::string("Null"), std::string("NULL"), std::string("~")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::NULL_VALUE); @@ -262,7 +282,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanNullTokenTest", "[LexicalAnalyzerClassTe SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_null(), fkyaml::parse_error); } @@ -275,7 +295,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanTrueTokenTest", "[LexicalAnalyzer SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("true"), std::string("True"), std::string("TRUE")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::BOOLEAN_VALUE); @@ -285,7 +305,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanTrueTokenTest", "[LexicalAnalyzer SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_boolean(), fkyaml::parse_error); } @@ -298,7 +318,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanFalseTokenTest", "[LexicalAnalyze SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("false"), std::string("False"), std::string("FALSE")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::BOOLEAN_VALUE); @@ -308,7 +328,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanFalseTokenTest", "[LexicalAnalyze SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_boolean(), fkyaml::parse_error); } @@ -329,7 +349,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanIntegerTokenTest", "[LexicalAnalyzerClas value_pair_t(std::string("643"), 643), value_pair_t(std::string("+123"), 123)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INTEGER_VALUE); @@ -339,7 +359,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanIntegerTokenTest", "[LexicalAnalyzerClas SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_integer(), fkyaml::parse_error); } @@ -354,7 +374,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanOctalNumberTokenTest", "[LexicalAnalyzer value_pair_t(std::string("0o77772"), 077772), value_pair_t(std::string("0o672}"), 0672)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -371,7 +391,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanHexadecimalNumberTokenTest", "[LexicalAn value_pair_t(std::string("0xa7F3"), 0xa7F3), value_pair_t(std::string("0xFf29Bc"), 0xFf29Bc)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -395,7 +415,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFloatNumberTokenTest", "[LexicalAnalyzer value_pair_t(std::string("3.95E3"), 3.95e3), value_pair_t(std::string("1.863e+3"), 1.863e+3)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::FLOAT_NUMBER_VALUE); @@ -406,13 +426,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFloatNumberTokenTest", "[LexicalAnalyzer SECTION("Test nothrow unexpected float tokens.") { auto input = GENERATE(std::string("0."), std::string("1.23e"), std::string("1.2e-z")); - str_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } SECTION("Test non-float tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_float_number(), fkyaml::parse_error); } @@ -427,7 +447,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInfinityTokenTest", "[LexicalAnalyzerCla std::string("-.inf"), std::string("-.Inf"), std::string("-.INF")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); SECTION("Test nothrow expected buffers.") { @@ -445,7 +465,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInfinityTokenTest", "[LexicalAnalyzerCla TEST_CASE("LexicalAnalyzerClassTest_ScanNaNTokenTest", "[LexicalAnalyzerClassTest]") { auto buffer = GENERATE(std::string(".nan"), std::string(".NaN"), std::string(".NAN")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); SECTION("Test nothrow expected buffers.") { @@ -541,7 +561,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanStringTokenTest", "[LexicalAnalyzerClass value_pair_t(std::string("\"foo\\Pbar\""), fkyaml::node::string_type("foo\u2029bar")), value_pair_t(std::string("\"\\x30\\x2B\\x6d\""), fkyaml::node::string_type("0+m"))); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -599,7 +619,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanMultiByteCharStringTokenTest", "[Lexical char_traits_t::to_char_type(0xBF), char_traits_t::to_char_type(0xBF)}); - str_lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); + lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -698,7 +718,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanEscapedUnicodeStringTokenTest", "[Lexica char_traits_t::to_char_type(0xBF), char_traits_t::to_char_type(0xBF)})); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -725,14 +745,14 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInvalidStringTokenTest", "[LexicalAnalyz std::string("\'\\t\'"), std::string("\"\\Q\"")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } SECTION("invalid_encoding expected") { std::string buffer = "\"\\U00110000\""; - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::invalid_encoding); } } @@ -865,8 +885,9 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInvalidMultiByteCharStringTokenTest", "[ char_traits_t::to_char_type(0x80), char_traits_t::to_char_type(0x80)}); - str_lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); - REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::invalid_encoding); + auto input_adapter = fkyaml::detail::input_adapter(mb_char); + REQUIRE_THROWS_AS(lexer_t(std::move(input_adapter)), fkyaml::invalid_encoding); + // REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::invalid_encoding); } TEST_CASE("LexicalAnalyzerClassTest_ScanUnescapedControlCharacter", "[LexicalAnalyzerClassTest]") @@ -903,7 +924,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanUnescapedControlCharacter", "[LexicalAna std::string buffer("test"); buffer.push_back(unescaped_char); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -915,7 +936,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|-\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -926,7 +947,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -937,7 +958,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|+\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -949,7 +970,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|0\n" "foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -958,7 +979,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|2\n" " foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -967,7 +988,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -979,7 +1000,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|\r\n" " foo\r\n" " bar\r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -995,7 +1016,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1010,7 +1031,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1026,7 +1047,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1041,7 +1062,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1055,7 +1076,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC " bar\n" "\n" " baz"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1071,7 +1092,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1086,7 +1107,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1102,7 +1123,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">-\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1113,7 +1134,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1124,7 +1145,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">+\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1136,7 +1157,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = "|0\n" "foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -1145,7 +1166,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -1154,7 +1175,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1166,7 +1187,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1181,7 +1202,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl "\r\n" " bar\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1195,7 +1216,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1209,7 +1230,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1223,7 +1244,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1235,47 +1256,45 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAnchorTokenTest", "[LexicalAnalyzerClass { fkyaml::detail::lexical_token_t token; - SECTION("Test nothorw expected tokens with an anchor.") + SECTION("valid anchor name") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: &anchor foo")); + auto input = GENERATE( + std::string("&:anchor"), + std::string("&:anchor "), + std::string("&:anchor\t"), + std::string("&:anchor\r"), + std::string("&:anchor\n"), + std::string("&:anchor{"), + std::string("&:anchor}"), + std::string("&:anchor["), + std::string("&:anchor]"), + std::string("&:anchor,"), + std::string("&:anchor: ")); - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("test") == 0); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::ANCHOR_PREFIX); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("anchor") == 0); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("foo") == 0); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::END_OF_BUFFER); + REQUIRE_NOTHROW(lexer.get_string() == ":anchor"); } - SECTION("Test nothrow unexpected tokens with an anchor.") + SECTION("invalid anchor name") { - auto buffer = - GENERATE(std::string("test: &anchor"), std::string("test: &anchor\r\n"), std::string("test: &anchor\n")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("test") == 0); + auto input = GENERATE( + std::string("&"), + std::string("& "), + std::string("&\t"), + std::string("&\r"), + std::string("&\n"), + std::string("&{"), + std::string("&}"), + std::string("&["), + std::string("&]"), + std::string("&,"), + std::string("&: ")); - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); - - REQUIRE_THROWS_AS(token = lexer.get_next_token(), fkyaml::parse_error); + lexer_t lexer(fkyaml::detail::input_adapter(input)); + REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -1283,42 +1302,45 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAliasTokenTest", "[LexicalAnalyzerClassT { fkyaml::detail::lexical_token_t token; - SECTION("Test nothrow expected tokens with an alias.") + SECTION("valid anchor name") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: *anchor")); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("test") == 0); + auto input = GENERATE( + std::string("*:anchor"), + std::string("*:anchor "), + std::string("*:anchor\t"), + std::string("*:anchor\r"), + std::string("*:anchor\n"), + std::string("*:anchor{"), + std::string("*:anchor}"), + std::string("*:anchor["), + std::string("*:anchor]"), + std::string("*:anchor,"), + std::string("*:anchor: ")); - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::ALIAS_PREFIX); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("anchor") == 0); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::END_OF_BUFFER); + REQUIRE_NOTHROW(lexer.get_string() == ":anchor"); } - SECTION("Test nothrow unexpected tokens with an anchor.") + SECTION("invalid anchor name") { - auto buffer = GENERATE( - std::string("test: *"), std::string("test: *\r\n"), std::string("test: *\n"), std::string("test: * ")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + auto input = GENERATE( + std::string("*"), + std::string("* "), + std::string("*\t"), + std::string("*\r"), + std::string("*\n"), + std::string("*{"), + std::string("*}"), + std::string("*["), + std::string("*]"), + std::string("*,"), + std::string("*: ")); - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); - REQUIRE_NOTHROW(lexer.get_string()); - REQUIRE(lexer.get_string().compare("test") == 0); - - REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); - - REQUIRE_THROWS_AS(token = lexer.get_next_token(), fkyaml::parse_error); + lexer_t lexer(fkyaml::detail::input_adapter(input)); + REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -1326,7 +1348,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanCommentTokenTest", "[LexicalAnalyzerClas { auto buffer = GENERATE( std::string("# comment\r"), std::string("# comment\r\n"), std::string("# comment\n"), std::string("# comment")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1339,13 +1361,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanCommentTokenTest", "[LexicalAnalyzerClas TEST_CASE("LexicalAnalyzerClassTest_ScanReservedIndicatorTokenTest", "[LexicalAnalyzerClassTest]") { auto buffer = GENERATE(std::string("@invalid"), std::string("`invalid")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: true")); + lexer_t lexer(fkyaml::detail::input_adapter("test: true")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1367,7 +1389,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanValuePairTokenTest", "[Lexical TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegerValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5784")); + lexer_t lexer(fkyaml::detail::input_adapter("test: -5784")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1389,7 +1411,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegerValuePairTokenTest", "[Lexical TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumberValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5.58e-3")); + lexer_t lexer(fkyaml::detail::input_adapter("test: -5.58e-3")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1411,7 +1433,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumberValuePairTokenTest", "[Lex TEST_CASE("LexicalAnalyzerClassTest_ScanKeyStringValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: \"some value\"")); + lexer_t lexer(fkyaml::detail::input_adapter("test: \"some value\"")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1437,7 +1459,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowSequenceTokenTest", "[LexicalAnalyze SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: [ foo, bar ]")); + lexer_t lexer(fkyaml::detail::input_adapter("test: [ foo, bar ]")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1472,8 +1494,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowSequenceTokenTest", "[LexicalAnalyze SECTION("Input source No.2.") { - pchar_lexer_t lexer( - fkyaml::detail::input_adapter("test: [ { foo: one, bar: false }, { foo: two, bar: true } ]")); + lexer_t lexer(fkyaml::detail::input_adapter("test: [ { foo: one, bar: false }, { foo: two, bar: true } ]")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1573,7 +1594,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowMappingTokenTest", "[LexicalAnalyzer SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: { bool: true, foo: bar, pi: 3.14 }")); + lexer_t lexer(fkyaml::detail::input_adapter("test: { bool: true, foo: bar, pi: 3.14 }")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1640,7 +1661,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowMappingTokenTest", "[LexicalAnalyzer SECTION("Input source No.2.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: {foo: bar}")); + lexer_t lexer(fkyaml::detail::input_adapter("test: {foo: bar}")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1682,7 +1703,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz { auto buffer = GENERATE(std::string("test:\n - foo\n - bar"), std::string("test:\r\n - foo\r\n - bar")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1690,7 +1711,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz REQUIRE(lexer.get_string().compare("test") == 0); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::SEQUENCE_BLOCK_PREFIX); @@ -1718,7 +1739,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz std::string("test:\r\n - foo: one\r\n bar: false\r\n - foo: two\r\n bar: true"), std::string("test:\n - foo: one\n bar: false\n - foo: two\n bar: true")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1726,7 +1747,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz REQUIRE(lexer.get_string().compare("test") == 0); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::SEQUENCE_BLOCK_PREFIX); @@ -1797,7 +1818,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test:\n bool: true\n foo: \'bar\'\n pi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test:\n bool: true\n foo: \'bar\'\n pi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1805,7 +1826,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze REQUIRE(lexer.get_string().compare("test") == 0); REQUIRE_NOTHROW(token = lexer.get_next_token()); - REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_BLOCK_PREFIX); + REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1852,7 +1873,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("input soure No.2.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: |\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test: |\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1899,7 +1920,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("input soure No.3.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: >\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test: >\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); diff --git a/test/unit_test/test_node_class.cpp b/test/unit_test/test_node_class.cpp index 49c7f9c9..100ceabc 100644 --- a/test/unit_test/test_node_class.cpp +++ b/test/unit_test/test_node_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -860,18 +860,25 @@ TEST_CASE("NodeClassTest_SubscriptOperatorTest", "[NodeClassTest]") REQUIRE_THROWS_AS(const_node[""], fkyaml::type_error); } - fkyaml::node node_key = - GENERATE(fkyaml::node::mapping(), fkyaml::node(), fkyaml::node(false), fkyaml::node(0.0), fkyaml::node("")); - SECTION("Test non-const node with a non-integer node.") { - REQUIRE_THROWS_AS(node[node_key], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node::sequence()], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node::mapping()], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node()], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node(false)], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node(0.0)], fkyaml::type_error); + REQUIRE_THROWS_AS(node[fkyaml::node("")], fkyaml::type_error); } SECTION("Test const node with a non-integer node.") { const fkyaml::node const_node = node; - REQUIRE_THROWS_AS(const_node[node_key], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node::sequence()], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node::mapping()], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node()], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node(false)], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node(0.0)], fkyaml::type_error); + REQUIRE_THROWS_AS(const_node[fkyaml::node("")], fkyaml::type_error); } } @@ -2003,6 +2010,246 @@ TEST_CASE("NodeClassTest_SizeGetterTest", "[NodeClassTest]") } } +// +// test cases for container element accessor with bounds checks +// + +TEST_CASE("NodeClassTest_AtTest", "[NodeClassTest]") +{ + SECTION("Test nothrow expected at() calls for mapping nodes.") + { + fkyaml::node::mapping_type map {{"test", fkyaml::node()}}; + + SECTION("Test the non-const string at() calls.") + { + fkyaml::node node = fkyaml::node::mapping(map); + + SECTION("Test the non-const lvalue string subscript operator.") + { + std::string key = "test"; + REQUIRE_NOTHROW(node.at(key)); + REQUIRE(node[key].is_null()); + } + + SECTION("Test the non-const rvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at("test")); + REQUIRE(node.at("test").is_null()); + } + } + + SECTION("Test the const string at() calls.") + { + const fkyaml::node node = fkyaml::node::mapping(map); + std::string key = "test"; + + SECTION("Test the const lvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at(key)); + } + + SECTION("Test the const rvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at("test")); + } + } + + SECTION("Test the non-const string node at() calls.") + { + fkyaml::node node = fkyaml::node::mapping(map); + fkyaml::node node_key = "test"; + + SECTION("Test the non-const lvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at(node_key)); + } + + SECTION("Test the non-const rvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at(std::move(node_key))); + } + } + + SECTION("Test the const string node at() calls.") + { + const fkyaml::node node = fkyaml::node::mapping(map); + fkyaml::node node_key = "test"; + + SECTION("Test the non-const lvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at(node_key)); + } + + SECTION("Test the non-const rvalue string subscript operator.") + { + REQUIRE_NOTHROW(node.at(std::move(node_key))); + } + } + } + + SECTION("Test throwing expected at() calls for mapping nodes.") + { + fkyaml::node node = {{"foo", 123}}; + + SECTION("Test at() calls with compatible type objects") + { + REQUIRE_THROWS_AS(node.at(fkyaml::node::sequence_type()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node::mapping_type()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(nullptr), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(true), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(123), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(3.14), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at("bar"), fkyaml::out_of_range); + } + + SECTION("Test const at() calls with compatible type objects") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::sequence_type()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::mapping_type()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(nullptr), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(true), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(123), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(3.14), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at("bar"), fkyaml::out_of_range); + } + + SECTION("Test at() calls with basic_node objects") + { + REQUIRE_THROWS_AS(node.at(fkyaml::node::sequence()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node::mapping()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node(true)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node(123)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node(3.14)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node("bar")), fkyaml::out_of_range); + } + + SECTION("Test const at() calls with basic_node objects") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::sequence()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::mapping()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node()), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node(true)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node(123)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node(3.14)), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node("bar")), fkyaml::out_of_range); + } + } + + SECTION("Test nothrow expected at() calls for sequence nodes.") + { + fkyaml::node node = fkyaml::node::sequence(); + node.get_value_ref().emplace_back(); + + SECTION("Test non-const integer at() calls") + { + REQUIRE_NOTHROW(node.at(0)); + } + + SECTION("Test const integer at() calls") + { + const fkyaml::node const_node = node; + REQUIRE_NOTHROW(const_node.at(0)); + } + + SECTION("Test non-const integer at() calls") + { + REQUIRE_NOTHROW(node.at(fkyaml::node(0))); + } + + SECTION("Test const integer at() calls") + { + const fkyaml::node const_node = node; + REQUIRE_NOTHROW(const_node.at(fkyaml::node(0))); + } + } + + SECTION("Test throwing expected at() call for sequence nodes.") + { + fkyaml::node node = fkyaml::node::sequence(); + node.get_value_ref().emplace_back(); + + SECTION("Test non-const node with a non-integer value.") + { + REQUIRE_THROWS_AS(node.at(fkyaml::node::sequence_type()), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(fkyaml::node::mapping_type()), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(nullptr), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(false), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(0.0), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(""), fkyaml::type_error); + } + + SECTION("Test const node with a non-integer value.") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::sequence_type()), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::mapping_type()), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(nullptr), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(false), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(0.0), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(""), fkyaml::type_error); + } + + SECTION("Test non-const node with a non-integer node.") + { + REQUIRE_THROWS_AS(node.at(fkyaml::node::mapping()), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(fkyaml::node::sequence()), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(fkyaml::node()), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(fkyaml::node(false)), fkyaml::type_error); + REQUIRE_THROWS_AS(node.at(fkyaml::node("")), fkyaml::type_error); + } + + SECTION("Test const node with a non-integer node.") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::mapping()), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node::sequence()), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node()), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node(false)), fkyaml::type_error); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node("")), fkyaml::type_error); + } + + SECTION("Test at() calls with an out-of-range integer value.") + { + REQUIRE_THROWS_AS(node.at(1), fkyaml::out_of_range); + REQUIRE_THROWS_AS(node.at(fkyaml::node(1)), fkyaml::out_of_range); + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(1), fkyaml::out_of_range); + REQUIRE_THROWS_AS(const_node.at(fkyaml::node(1)), fkyaml::out_of_range); + } + } + + SECTION("Test throwing expected at() call for scalar nodes.") + { + auto node = GENERATE(fkyaml::node(), fkyaml::node(false), fkyaml::node(0), fkyaml::node(0.0), fkyaml::node("")); + fkyaml::node node_key = 0; + + SECTION("Test non-const node with an integer.") + { + REQUIRE_THROWS_AS(node.at(0), fkyaml::type_error); + } + + SECTION("Test const node with an integer.") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(0), fkyaml::type_error); + } + + SECTION("Test non-const node with an integer node.") + { + REQUIRE_THROWS_AS(node.at(node_key), fkyaml::type_error); + } + + SECTION("Test const node with an integer node.") + { + const fkyaml::node const_node = node; + REQUIRE_THROWS_AS(const_node.at(node_key), fkyaml::type_error); + } + } +} + // // test cases for YAML version property getter/setter // diff --git a/test/unit_test/test_node_ref_storage_class.cpp b/test/unit_test/test_node_ref_storage_class.cpp index ba8742d9..d621c309 100644 --- a/test/unit_test/test_node_ref_storage_class.cpp +++ b/test/unit_test/test_node_ref_storage_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -64,4 +64,4 @@ TEST_CASE("NodeRefStorageTest_ReleaseTest", "[NodeRefStorageTest]") fkyaml::node released_node2 = storage2.release(); REQUIRE(released_node2.is_sequence()); REQUIRE(released_node2.size() == 2); -} \ No newline at end of file +} diff --git a/test/unit_test/test_ordered_map_class.cpp b/test/unit_test/test_ordered_map_class.cpp index 23b33a1d..3aafc593 100644 --- a/test/unit_test/test_ordered_map_class.cpp +++ b/test/unit_test/test_ordered_map_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/test_serializer_class.cpp b/test/unit_test/test_serializer_class.cpp index 973123e1..e32c36dc 100644 --- a/test/unit_test/test_serializer_class.cpp +++ b/test/unit_test/test_serializer_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani diff --git a/test/unit_test/test_string_formatter.cpp b/test/unit_test/test_string_formatter.cpp new file mode 100644 index 00000000..bd3b4b07 --- /dev/null +++ b/test/unit_test/test_string_formatter.cpp @@ -0,0 +1,23 @@ +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#include + +#ifdef FK_YAML_TEST_USE_SINGLE_HEADER + #include +#else + #include +#endif + +TEST_CASE("StringFormatterTest_ValidMessageFormat", "[StringFormatterTest]") +{ + const char* p_label = "foo_label"; + int result = 0; + std::string formatted = fkyaml::detail::format("%s: ret=%d", p_label, result); + REQUIRE(formatted == "foo_label: ret=0"); +} diff --git a/test/unit_test/test_utf8_encoding_class.cpp b/test/unit_test/test_utf8_encoding_class.cpp index ebfb0ffd..e6769cd0 100644 --- a/test/unit_test/test_utf8_encoding_class.cpp +++ b/test/unit_test/test_utf8_encoding_class.cpp @@ -1,6 +1,6 @@ // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) -// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 +// | __| _ < \_ _/| ___ | _ | |___ version 0.3.3 // |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML // // SPDX-FileCopyrightText: 2023-2024 Kensuke Fukutani @@ -302,4 +302,4 @@ TEST_CASE("UTF8EncodingClassTest_FromUTF32Test", "[UTF8EncodingClassTest]") REQUIRE_THROWS_AS( fkyaml::detail::utf8_encoding::from_utf32(utf32, utf8_bytes, encoded_size), fkyaml::invalid_encoding); } -} \ No newline at end of file +} diff --git a/tool/amalgamation/CHANGES.md b/tool/amalgamation/CHANGES.md index 5955ec06..4201734c 100644 --- a/tool/amalgamation/CHANGES.md +++ b/tool/amalgamation/CHANGES.md @@ -4,4 +4,4 @@ The following changes have been made to the code with respect to