From e5a882a4f128fb8d207d625fb5be4e2af3262739 Mon Sep 17 00:00:00 2001 From: Allan CORNET Date: Sun, 24 Nov 2024 21:05:19 +0100 Subject: [PATCH 1/4] feat(spreadsheet): Add detectImportOptions and readtable functionality - Implement detectImportOptions for CSV import configuration - Add readtable function for importing tabular data - Enhance readcell with new import options - Add comprehensive test coverage for new functionality - Update ArrayOf to support table data type --- .../display_format/src/cpp/FormatHelpers.cpp | 10 + .../tests/test_display_struct.m | 28 + .../builtin/c/nlsSpreadsheet_builtin.vcxproj | 4 + .../c/nlsSpreadsheet_builtin.vcxproj.filters | 12 + modules/spreadsheet/builtin/cpp/Gateway.cpp | 6 + .../cpp/detectImportOptionsBuiltin.cpp | 53 ++ .../builtin/cpp/readcellBuiltin.cpp | 37 +- .../builtin/cpp/readtableBuiltin.cpp | 56 ++ .../include/detectImportOptionsBuiltin.hpp | 20 + .../builtin/include/readtableBuiltin.hpp | 20 + .../@DelimitedTextImportOptions/properties.m | 16 + modules/spreadsheet/functions/readtableold.m | 46 ++ .../spreadsheet/src/c/nlsSpreadsheet.vcxproj | 4 + .../src/c/nlsSpreadsheet.vcxproj.filters | 52 +- .../src/cpp/DetectImportOptions.cpp | 526 ++++++++++++++++++ modules/spreadsheet/src/cpp/ReadCell.cpp | 272 +++++++-- modules/spreadsheet/src/cpp/ReadTable.cpp | 326 +++++++++++ modules/spreadsheet/src/cpp/rapidcsv.h | 12 +- .../src/include/DetectImportOptions.hpp | 40 ++ modules/spreadsheet/src/include/ReadCell.hpp | 4 +- modules/spreadsheet/src/include/ReadTable.hpp | 21 + .../spreadsheet/tests/dlmread_comments.csv | 1 + modules/spreadsheet/tests/readcell_1.csv | 6 + modules/spreadsheet/tests/readcell_2.csv | 10 + modules/spreadsheet/tests/readcell_3.csv | 5 + modules/spreadsheet/tests/readcell_4.csv | 3 + .../tests/test_detectImportOptions.m | 21 + modules/spreadsheet/tests/test_readcell.m | 69 +++ modules/spreadsheet/tests/test_readtable.m | 56 ++ .../spreadsheet/tests/test_readtable_1.csv | 6 + .../spreadsheet/tests/test_readtable_2.csv | 6 + .../spreadsheet/tests/test_readtable_3.csv | 6 + modules/table/functions/@table/properties.m | 7 +- modules/table/functions/@table/subsref.m | 7 +- modules/types/src/c/nlsTypes.vcxproj | 1 + modules/types/src/c/nlsTypes.vcxproj.filters | 3 + .../types/src/cpp/ArrayOf_CharacterType.cpp | 4 +- modules/types/src/cpp/ArrayOf_DoubleType.cpp | 30 + modules/types/src/cpp/ArrayOf_StringType.cpp | 16 + modules/types/src/cpp/ArrayOf_TableType.cpp | 65 +++ modules/types/src/include/ArrayOf.hpp | 57 ++ 41 files changed, 1864 insertions(+), 80 deletions(-) create mode 100644 modules/display_format/tests/test_display_struct.m create mode 100644 modules/spreadsheet/builtin/cpp/detectImportOptionsBuiltin.cpp create mode 100644 modules/spreadsheet/builtin/cpp/readtableBuiltin.cpp create mode 100644 modules/spreadsheet/builtin/include/detectImportOptionsBuiltin.hpp create mode 100644 modules/spreadsheet/builtin/include/readtableBuiltin.hpp create mode 100644 modules/spreadsheet/functions/@DelimitedTextImportOptions/properties.m create mode 100644 modules/spreadsheet/functions/readtableold.m create mode 100644 modules/spreadsheet/src/cpp/DetectImportOptions.cpp create mode 100644 modules/spreadsheet/src/cpp/ReadTable.cpp create mode 100644 modules/spreadsheet/src/include/DetectImportOptions.hpp create mode 100644 modules/spreadsheet/src/include/ReadTable.hpp create mode 100644 modules/spreadsheet/tests/readcell_1.csv create mode 100644 modules/spreadsheet/tests/readcell_2.csv create mode 100644 modules/spreadsheet/tests/readcell_3.csv create mode 100644 modules/spreadsheet/tests/readcell_4.csv create mode 100644 modules/spreadsheet/tests/test_detectImportOptions.m create mode 100644 modules/spreadsheet/tests/test_readcell.m create mode 100644 modules/spreadsheet/tests/test_readtable.m create mode 100644 modules/spreadsheet/tests/test_readtable_1.csv create mode 100644 modules/spreadsheet/tests/test_readtable_2.csv create mode 100644 modules/spreadsheet/tests/test_readtable_3.csv create mode 100644 modules/types/src/cpp/ArrayOf_TableType.cpp diff --git a/modules/display_format/src/cpp/FormatHelpers.cpp b/modules/display_format/src/cpp/FormatHelpers.cpp index 8495437df0..7c7384457e 100644 --- a/modules/display_format/src/cpp/FormatHelpers.cpp +++ b/modules/display_format/src/cpp/FormatHelpers.cpp @@ -93,6 +93,16 @@ template static std::wstring formatIntegerReal(T val, const FormatDisplayInformation& formatInfo) { + if (std::isnan(val)) { + return L"NaN"; + } + if (std::isinf(val)) { + if (val > 0) { + return L"Inf"; + } else { + return L"-Inf"; + } + } if (formatInfo.decimalsReal != 0) { return fmt::sprintf( formatInfo.formatReal, formatInfo.widthReal, formatInfo.decimalsReal, val); diff --git a/modules/display_format/tests/test_display_struct.m b/modules/display_format/tests/test_display_struct.m new file mode 100644 index 0000000000..c3d6f2f124 --- /dev/null +++ b/modules/display_format/tests/test_display_struct.m @@ -0,0 +1,28 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +% <--ENGLISH IMPOSED--> +%============================================================================= +st = []; +st.A = [1 Inf]; +st.B = [2 -Inf]; +st.C = [3 NaN]; +R = evalc('st'); +REF = ' +st = + + struct with fields: + + A: [1 Inf] + B: [2 -Inf] + C: [3 NaN] + +'; +assert_isequal(R, REF); +%============================================================================= diff --git a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj index 1315bdbaef..f324f4789e 100644 --- a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj +++ b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj @@ -212,18 +212,22 @@ + + + + diff --git a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters index 781635d16c..93c863e34c 100644 --- a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters +++ b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters @@ -47,6 +47,12 @@ Source Files + + Source Files + + + Source Files + @@ -64,5 +70,11 @@ Header Files + + Header Files + + + Header Files + \ No newline at end of file diff --git a/modules/spreadsheet/builtin/cpp/Gateway.cpp b/modules/spreadsheet/builtin/cpp/Gateway.cpp index 9a46a99836..837820ddf4 100644 --- a/modules/spreadsheet/builtin/cpp/Gateway.cpp +++ b/modules/spreadsheet/builtin/cpp/Gateway.cpp @@ -9,9 +9,11 @@ //============================================================================= #include "NelsonGateway.hpp" #include "readcellBuiltin.hpp" +#include "readtableBuiltin.hpp" #include "dlmreadBuiltin.hpp" #include "dlmwriteBuiltin.hpp" #include "writetableBuiltin.hpp" +#include "detectImportOptionsBuiltin.hpp" //============================================================================= using namespace Nelson; //============================================================================= @@ -19,10 +21,14 @@ const std::wstring gatewayName = L"spreadsheet"; //============================================================================= static const nlsGateway gateway[] = { { "readcell", (ptrBuiltin)Nelson::SpreadsheetGateway::readcellBuiltin, 1, 1 }, + { "readtable", (ptrBuiltin)Nelson::SpreadsheetGateway::readtableBuiltin, 1, 1 }, { "dlmread", (ptrBuiltin)Nelson::SpreadsheetGateway::dlmreadBuiltin, 1, 4 }, { "dlmwrite", (ptrBuiltin)Nelson::SpreadsheetGateway::dlmwriteBuiltin, 0, -3, CPP_BUILTIN_WITH_EVALUATOR }, { "writetable", (ptrBuiltin)Nelson::SpreadsheetGateway::writetableBuiltin, 0, 4 }, + { "detectImportOptions", (ptrBuiltin)Nelson::SpreadsheetGateway::detectImportOptionsBuiltin, 1, + -1 }, + }; //============================================================================= NLSGATEWAYFUNC(gateway) diff --git a/modules/spreadsheet/builtin/cpp/detectImportOptionsBuiltin.cpp b/modules/spreadsheet/builtin/cpp/detectImportOptionsBuiltin.cpp new file mode 100644 index 0000000000..5ddc99f35a --- /dev/null +++ b/modules/spreadsheet/builtin/cpp/detectImportOptionsBuiltin.cpp @@ -0,0 +1,53 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "detectImportOptionsBuiltin.hpp" +#include "Error.hpp" +#include "InputOutputArgumentsCheckers.hpp" +#include "DetectImportOptions.hpp" +//============================================================================= +using namespace Nelson; +//============================================================================= +ArrayOfVector +Nelson::SpreadsheetGateway::detectImportOptionsBuiltin(int nLhs, const ArrayOfVector& argIn) +{ + ArrayOfVector retval; + nargoutcheck(nLhs, 0, 1); + nargincheck(argIn, 1, 4); + + std::wstring filename = argIn[0].getContentAsWideString(); + + detectImportOptions options; + initializeDetectImportOptions(options); + + std::string errorMessage; + analyzeFileFormatImportOptions(filename, 4096, options, errorMessage); + if (!errorMessage.empty()) { + Error(errorMessage); + } + + stringVector fieldnames = { "Delimiter", "LineEnding", "CommentStyle", "EmptyLineRule", + "VariableNamesLine", "VariableNames", "RowNamesColumn", "DataLines" }; + ArrayOfVector fieldvalues; + + fieldvalues << ArrayOf::toCellArrayOfCharacterRowVectors(options.Delimiter); + fieldvalues << ArrayOf::toCellArrayOfCharacterRowVectors(options.LineEnding); + fieldvalues << ArrayOf::toCellArrayOfCharacterRowVectors(options.CommentStyle); + fieldvalues << ArrayOf::characterArrayConstructor(options.EmptyLineRule); + fieldvalues << ArrayOf::doubleConstructor(options.VariableNamesLine); + fieldvalues << ArrayOf::toCellArrayOfCharacterRowVectors(options.VariableNames); + fieldvalues << ArrayOf::doubleConstructor(options.RowNamesColumn); + fieldvalues << ArrayOf::doubleVectorConstructor(options.DataLines); + + ArrayOf importOptions + = ArrayOf::classConstructor("DelimitedTextImportOptions", fieldnames, fieldvalues); + retval << importOptions; + return retval; +} +//============================================================================= diff --git a/modules/spreadsheet/builtin/cpp/readcellBuiltin.cpp b/modules/spreadsheet/builtin/cpp/readcellBuiltin.cpp index 8d8c4de8d1..4cd59f2274 100644 --- a/modules/spreadsheet/builtin/cpp/readcellBuiltin.cpp +++ b/modules/spreadsheet/builtin/cpp/readcellBuiltin.cpp @@ -11,6 +11,7 @@ #include "Error.hpp" #include "InputOutputArgumentsCheckers.hpp" #include "ReadCell.hpp" +#include "DetectImportOptions.hpp" //============================================================================= using namespace Nelson; //============================================================================= @@ -19,11 +20,41 @@ Nelson::SpreadsheetGateway::readcellBuiltin(int nLhs, const ArrayOfVector& argIn { ArrayOfVector retval; nargoutcheck(nLhs, 0, 1); - nargincheck(argIn, 1, 1); + nargincheck(argIn, 1, 10); std::wstring filename = argIn[0].getContentAsWideString(); - std::wstring errorMessage; - retval << ReadCell(filename, errorMessage); + std::string errorMessage; + detectImportOptions options; + initializeDetectImportOptions(options); + + if (argIn.size() > 1 && argIn[1].isClassType() + && argIn[1].getClassType() == "DelimitedTextImportOptions") { + + options.Delimiter = argIn[1].getField("Delimiter").getContentAsCStringRowVector(); + options.LineEnding = argIn[1].getField("LineEnding").getContentAsCStringRowVector(); + options.CommentStyle = argIn[1].getField("CommentStyle").getContentAsCStringRowVector(); + options.EmptyLineRule = argIn[1].getField("EmptyLineRule").getContentAsCString(); + options.VariableNamesLine + = argIn[1].getField("VariableNamesLine").getContentAsDoubleScalar(); + options.VariableNames = argIn[1].getField("VariableNames").getContentAsCStringRowVector(); + options.RowNamesColumn = argIn[1].getField("RowNamesColumn").getContentAsDoubleScalar(); + options.DataLines = argIn[1].getField("DataLines").getContentAsDoubleVector(); + + } else { + analyzeFileFormatImportOptions(filename, 4096, options, errorMessage); + options.CommentStyle.clear(); + options.DataLines.clear(); + options.DataLines.push_back(1); + options.DataLines.push_back(std::numeric_limits::infinity()); + if (!errorMessage.empty()) { + Error(errorMessage); + } + } + + retval << ReadCell(filename, options, errorMessage); + if (!errorMessage.empty()) { + Error(errorMessage); + } return retval; } //============================================================================= diff --git a/modules/spreadsheet/builtin/cpp/readtableBuiltin.cpp b/modules/spreadsheet/builtin/cpp/readtableBuiltin.cpp new file mode 100644 index 0000000000..e63589f3d3 --- /dev/null +++ b/modules/spreadsheet/builtin/cpp/readtableBuiltin.cpp @@ -0,0 +1,56 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "readtableBuiltin.hpp" +#include "Error.hpp" +#include "InputOutputArgumentsCheckers.hpp" +#include "ReadTable.hpp" +#include "DetectImportOptions.hpp" +//============================================================================= +using namespace Nelson; +//============================================================================= +ArrayOfVector +Nelson::SpreadsheetGateway::readtableBuiltin(int nLhs, const ArrayOfVector& argIn) +{ + ArrayOfVector retval; + nargoutcheck(nLhs, 0, 1); + nargincheck(argIn, 1); + std::wstring filename = argIn[0].getContentAsWideString(); + std::string errorMessage; + detectImportOptions options; + + initializeDetectImportOptions(options); + + if (argIn.size() > 1 && argIn[1].isClassType() + && argIn[1].getClassType() == "DelimitedTextImportOptions") { + + options.Delimiter = argIn[1].getField("Delimiter").getContentAsCStringRowVector(); + options.LineEnding = argIn[1].getField("LineEnding").getContentAsCStringRowVector(); + options.CommentStyle = argIn[1].getField("CommentStyle").getContentAsCStringRowVector(); + options.EmptyLineRule = argIn[1].getField("EmptyLineRule").getContentAsCString(); + options.VariableNamesLine + = argIn[1].getField("VariableNamesLine").getContentAsDoubleScalar(); + options.VariableNames = argIn[1].getField("VariableNames").getContentAsCStringRowVector(); + options.RowNamesColumn = argIn[1].getField("RowNamesColumn").getContentAsDoubleScalar(); + options.DataLines = argIn[1].getField("DataLines").getContentAsDoubleVector(); + + } else { + analyzeFileFormatImportOptions(filename, 4096, options, errorMessage); + if (!errorMessage.empty()) { + Error(errorMessage); + } + } + + retval << ReadTable(filename, options, errorMessage); + if (!errorMessage.empty()) { + Error(errorMessage); + } + return retval; +} +//============================================================================= diff --git a/modules/spreadsheet/builtin/include/detectImportOptionsBuiltin.hpp b/modules/spreadsheet/builtin/include/detectImportOptionsBuiltin.hpp new file mode 100644 index 0000000000..a6044617fd --- /dev/null +++ b/modules/spreadsheet/builtin/include/detectImportOptionsBuiltin.hpp @@ -0,0 +1,20 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +//============================================================================= +namespace Nelson::SpreadsheetGateway { +//============================================================================= +ArrayOfVector +detectImportOptionsBuiltin(int nLhs, const ArrayOfVector& argIn); +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/spreadsheet/builtin/include/readtableBuiltin.hpp b/modules/spreadsheet/builtin/include/readtableBuiltin.hpp new file mode 100644 index 0000000000..026d285e7b --- /dev/null +++ b/modules/spreadsheet/builtin/include/readtableBuiltin.hpp @@ -0,0 +1,20 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +//============================================================================= +namespace Nelson::SpreadsheetGateway { +//============================================================================= +ArrayOfVector +readtableBuiltin(int nLhs, const ArrayOfVector& argIn); +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/spreadsheet/functions/@DelimitedTextImportOptions/properties.m b/modules/spreadsheet/functions/@DelimitedTextImportOptions/properties.m new file mode 100644 index 0000000000..25fe968c5c --- /dev/null +++ b/modules/spreadsheet/functions/@DelimitedTextImportOptions/properties.m @@ -0,0 +1,16 @@ +%============================================================================= +% Copyright (c) 2023-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +function varargout = properties(T) + st = struct(T); + props = fieldnames(st); + varargout{1} = props; +end +%============================================================================= + \ No newline at end of file diff --git a/modules/spreadsheet/functions/readtableold.m b/modules/spreadsheet/functions/readtableold.m new file mode 100644 index 0000000000..201bfcf1df --- /dev/null +++ b/modules/spreadsheet/functions/readtableold.m @@ -0,0 +1,46 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +function varargout = readtable(varargin) + narginchk(1, 10); + nargoutchk(0, 1); + filename = convertStringsToChars(varargin{1}); + if (nargin > 1) && isa(varargin{2}, 'DelimitedTextImportOptions') + options = varargin{2}; + else + options = detectImportOptions(filename); + end + ce = readcell(filename, options); + variableNames = options.VariableNames; + VariableNamesLine = options.VariableNamesLine; + RowNamesColumn = options.RowNamesColumn; + if ~isempty(variableNames) && (VariableNamesLine > 0) + ce(VariableNamesLine, :) = []; + end + if RowNamesColumn > 0 + variableNames(VariableNamesLine) = []; + rowNames = ce(:, RowNamesColumn); + ce(:, RowNamesColumn) = []; + else + rowNames = {}; + end + args = {}; + if ~isempty(variableNames) + args = [args, 'VariableNames', {variableNames}]; + end + if ~isempty(rowNames) + args = [args, 'RowNames', {rowNames'}]; + end + if isrow(ce) + varargout{1} = table(ce{:}, args{:}); + else + varargout{1} = table(ce, args{:}); + end +end +%============================================================================= diff --git a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj index 260f031313..75a63c8662 100644 --- a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj +++ b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj @@ -210,8 +210,10 @@ + + @@ -226,8 +228,10 @@ + + diff --git a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters index 2f146a4428..61bc5d33b9 100644 --- a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters +++ b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters @@ -26,72 +26,84 @@ - + Source Files - + Source Files - + Source Files - + Source Files - + Source Files - + Source Files - + Source Files - + Source Files - + Source Files Source Files + + Source Files + + + Source Files + - + Header Files - + Header Files - + Header Files - + Header Files - + Header Files - + + Header Files + + Header Files Header Files - + Header Files - + Header Files - + Header Files - + Header Files - + + Header Files + + Header Files diff --git a/modules/spreadsheet/src/cpp/DetectImportOptions.cpp b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp new file mode 100644 index 0000000000..fca143cf3b --- /dev/null +++ b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp @@ -0,0 +1,526 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include +#include +#include +#include +#include +#include "DetectImportOptions.hpp" +#include "i18n.hpp" +#include "characters_encoding.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +// Constants +namespace { + const stringVector DELIMITERS = { ",", "\t", ";", "|" }; + const stringVector LINE_ENDINGS = { "\r\n", "\n", "\r" }; + const stringVector COMMENT_STYLES = { "%", "#", "//", "--" }; + const size_t MIN_HEADER_SAMPLE = 5; +} +//============================================================================= +// Helper Functions +//============================================================================= +static std::string +escapeSpecialCharacters(const std::string& input) +{ + static const std::map escapeMap + = { { '\n', "\\n" }, { '\r', "\\r" }, { '\t', "\\t" }, { '\\', "\\\\" } }; + + std::string escaped; + escaped.reserve(input.length()); + + for (char c : input) { + auto it = escapeMap.find(c); + if (it != escapeMap.end()) { + escaped += it->second; + } else { + escaped += c; + } + } + return escaped; +} +//============================================================================= +static std::string +unescapeSpecialCharacters(const std::string& input) +{ + static const std::map unescapeMap + = { { "\\n", '\n' }, { "\\r", '\r' }, { "\\t", '\t' }, { "\\\\", '\\' } }; + + std::string unescaped; + unescaped.reserve(input.length()); + size_t i = 0; + + while (i < input.length()) { + if (input[i] == '\\' && i + 1 < input.length()) { + // Check the next two characters + std::string potentialEscape = input.substr(i, 2); + auto it = unescapeMap.find(potentialEscape); + if (it != unescapeMap.end()) { + unescaped += it->second; // Add unescaped character + i += 2; // Move past the escape sequence + continue; + } + } + // Add the current character if not an escape sequence + unescaped += input[i]; + ++i; + } + + return unescaped; +} +//============================================================================= +static std::string +readFileContent(const std::wstring& filename, size_t sampleSize, std::string& errorMessage) +{ +#ifdef _MSC_VER + std::wifstream file(filename, std::ios::binary); +#else + std::ifstream file(wstring_to_utf8(filename), std::ios::binary); +#endif + + if (!file.is_open()) { + errorMessage = _("Unable to open file."); + return ""; + } + +#ifdef _MSC_VER + std::wstring wcontent(sampleSize, L'\0'); + file.read(&wcontent[0], sampleSize); + size_t actualSize = file.gcount(); + wcontent.resize(actualSize); + return wstring_to_utf8(wcontent); +#else + std::string content(sampleSize, '\0'); + file.read(&content[0], sampleSize); + size_t actualSize = file.gcount(); + content.resize(actualSize); + return content; +#endif +} +//============================================================================= +static std::vector +splitIntoLines(const std::string& content, const std::string& lineEnding) +{ + std::vector lines; + size_t start = 0; + size_t end = content.find(lineEnding); + + while (end != std::string::npos) { + // Add the substring between start and end as a line + lines.push_back(content.substr(start, end - start)); + // Move the start position past the current line ending + start = end + lineEnding.length(); + // Find the next occurrence of the line ending + end = content.find(lineEnding, start); + } + + // Add the last line (including an empty one if `lineEnding` is at the end) + lines.push_back(content.substr(start)); + + return lines; +} +//============================================================================= +static std::vector +splitLine(const std::string& line, const std::string& delimiter) +{ + std::vector tokens; + size_t start = 0; + size_t end = line.find(delimiter); + + while (end != std::string::npos) { + std::string token = line.substr(start, end - start); + tokens.push_back(token); + start = end + delimiter.length(); + end = line.find(delimiter, start); + } + + tokens.push_back(line.substr(start)); + return tokens; +} +//============================================================================= +// Analysis Functions +//============================================================================= +struct DelimiterStats +{ + std::string delimiter; + double averageCount; + size_t consistentLines; + size_t totalCount; +}; +//============================================================================= +static DelimiterStats +analyzeDelimiterInLine(const std::string& line, const std::string& delimiter) +{ + DelimiterStats stats { delimiter, 0.0, 0, 0 }; + + if (line.empty()) { + return stats; + } + + size_t count = 0; + size_t pos = 0; + while ((pos = line.find(delimiter, pos)) != std::string::npos) { + ++count; + pos += delimiter.length(); + } + + stats.totalCount = count; + return stats; +} +//============================================================================= +static void +detectDelimiter(const std::vector& lines, detectImportOptions& options) +{ + std::map> delimiterCounts; + + // Count delimiters in each non-empty line + size_t validLines = 0; + for (const auto& line : lines) { + if (line.empty()) { + continue; + } + + validLines++; + for (const auto& delimiter : DELIMITERS) { + size_t count = 0; + size_t pos = 0; + while ((pos = line.find(delimiter, pos)) != std::string::npos) { + ++count; + pos += delimiter.length(); + } + delimiterCounts[delimiter].push_back(count); + } + } + + // Calculate statistics for each delimiter + std::vector delimiterStats; + for (const auto& [delimiter, counts] : delimiterCounts) { + if (counts.empty()) { + continue; + } + + DelimiterStats stats { delimiter, 0.0, 0, 0 }; + + // Calculate total and mean + size_t total = 0; + for (size_t count : counts) { + total += count; + } + double mean = static_cast(total) / counts.size(); + + // Count lines with consistent delimiter count (within ±1 of mean) + size_t consistentLines = 0; + for (size_t count : counts) { + if (std::abs(count - mean) <= 1.0) { + consistentLines++; + } + } + + stats.averageCount = mean; + stats.consistentLines = consistentLines; + stats.totalCount = total; + + delimiterStats.push_back(stats); + } + + // Select the best delimiter based on consistency and frequency + auto bestDelimiter = std::max_element(delimiterStats.begin(), delimiterStats.end(), + [validLines](const DelimiterStats& a, const DelimiterStats& b) { + // First prioritize consistency across lines + double aConsistency = static_cast(a.consistentLines) / validLines; + double bConsistency = static_cast(b.consistentLines) / validLines; + + if (std::abs(aConsistency - bConsistency) > 0.1) { // 10% threshold + return aConsistency < bConsistency; + } + + // If consistency is similar, look at average count + if (std::abs(a.averageCount - b.averageCount) > 0.5) { // 0.5 threshold + return a.averageCount < b.averageCount; + } + + // If all else is similar, prefer simpler delimiters + return a.delimiter.length() > b.delimiter.length(); + }); + + if (bestDelimiter != delimiterStats.end() && bestDelimiter->totalCount > 0) { + options.Delimiter = { bestDelimiter->delimiter }; + stringVector defaultVariableNames; + size_t nbElements = bestDelimiter->averageCount + 1; + defaultVariableNames.resize(nbElements); + for (size_t k = 0; k < nbElements; ++k) { + defaultVariableNames[k] = "Var" + std::to_string((int)(k + 1)); + } + options.VariableNames = defaultVariableNames; + } else { + // Default to comma if no clear delimiter is found + options.Delimiter = { "," }; + } +} +//============================================================================= +static void +detectLineEndings(const std::string& content, detectImportOptions& options) +{ + size_t maxLineEndingsCount = 0; + + for (const auto& lineEnding : LINE_ENDINGS) { + size_t count = 0; + size_t pos = 0; + + while ((pos = content.find(lineEnding, pos)) != std::string::npos) { + ++count; + pos += lineEnding.length(); + } + + if (count > maxLineEndingsCount) { + maxLineEndingsCount = count; + options.LineEnding = { escapeSpecialCharacters(lineEnding) }; + } + } +} +//============================================================================= +static void +detectCommentStyle( + const std::vector& lines, detectImportOptions& options, std::string& errorMessage) +{ + std::map commentCounts; + + for (const auto& comment : COMMENT_STYLES) { + std::string escapedComment + = std::regex_replace(comment, std::regex("[\\[\\](){}.*+?^$\\\\|]"), "\\$&"); + std::string pattern = "^[ \\t]*" + escapedComment; + + try { + std::regex commentRegex(pattern); + size_t count = std::count_if( + lines.begin(), lines.end(), [&commentRegex](const std::string& line) { + return std::regex_search(line, commentRegex); + }); + + if (count > 0) { + commentCounts[comment] = count; + } + } catch (const std::regex_error&) { + errorMessage = "Regex error for pattern: " + pattern; + continue; + } + } + + std::vector> sortedComments( + commentCounts.begin(), commentCounts.end()); + std::sort(sortedComments.begin(), sortedComments.end(), + [](const auto& a, const auto& b) { return a.second > b.second; }); + + options.CommentStyle.clear(); + for (const auto& [style, count] : sortedComments) { + options.CommentStyle.push_back(style); + } +} +//============================================================================= +static void +detectEmptyLineRule(const std::vector& lines, detectImportOptions& options) +{ + bool hasEmptyLines = false; + bool hasConsecutiveEmptyLines = false; + size_t emptyLineCount = 0; + + for (const auto& line : lines) { + std::string trimmedLine = line; + trimmedLine.erase(0, trimmedLine.find_first_not_of(" \t\r\n")); + trimmedLine.erase(trimmedLine.find_last_not_of(" \t\r\n") + 1); + + if (trimmedLine.empty()) { + emptyLineCount++; + hasEmptyLines = true; + if (emptyLineCount > 1) { + hasConsecutiveEmptyLines = true; + break; + } + } else { + emptyLineCount = 0; + } + } + + options.EmptyLineRule + = (!hasEmptyLines || !hasConsecutiveEmptyLines) ? "skip" : "AllowConsecutiveEmpty"; +} +//============================================================================= +static bool +isNumeric(const std::string& str) +{ + if (str.empty()) + return false; + + std::istringstream iss(str); + double value; + iss >> std::noskipws >> value; + + return iss.eof() && !iss.fail(); +} +//============================================================================= +static bool +isPotentialHeader(const std::string& str) +{ + if (str.empty()) + return false; + + // Check if it's not purely numeric + if (isNumeric(str)) + return false; + + // Check for presence of letters or special characters + bool hasLetters = false; + for (char c : str) { + if (std::isalpha(c)) { + hasLetters = true; + break; + } + } + + return hasLetters; +} +//============================================================================= +static void +detectColumnsAndRowNames(std::vector& lines, const std::string& delimiter, + const std::string& lineEnding, detectImportOptions& options) +{ + std::vector> parsedLines; + size_t maxColumns = 0; + size_t headerLineIndex = -1; + + // Parse first few lines + size_t sampleSize = std::min(lines.size(), MIN_HEADER_SAMPLE); + for (size_t i = 0; i < sampleSize; i++) { + if (lines[i].size() >= lineEnding.size() + && lines[i].substr(lines[i].size() - lineEnding.size()) == lineEnding) { + lines[i] = lines[i].substr(0, lines[i].size() - lineEnding.size()); + } + if (lines[i].empty()) { + continue; + } + + auto tokens = splitLine(lines[i], delimiter); + maxColumns = std::max(maxColumns, tokens.size()); + parsedLines.push_back(tokens); + + if (headerLineIndex == -1 && std::any_of(tokens.begin(), tokens.end(), isPotentialHeader)) { + headerLineIndex = i; + } + } + + if (parsedLines.empty() || maxColumns == 0) + return; + + std::vector columnNames; + // Detect column headers + bool hasColumnHeaders = false; + if (!parsedLines.empty()) { + size_t headerCandidates = 0; + + for (size_t col = 0; col < parsedLines[0].size(); col++) { + if (isPotentialHeader(parsedLines[0][col])) { + headerCandidates++; + columnNames.push_back(parsedLines[0][col]); + } + } + hasColumnHeaders = (headerCandidates > parsedLines[0].size() / 2); + if (!hasColumnHeaders) { + columnNames.clear(); + options.VariableNamesLine = 0; + } else { + options.VariableNames = columnNames; + options.VariableNamesLine = headerLineIndex + 1; + } + } +} +//============================================================================= +static void +detectDataLines(std::vector& lines, detectImportOptions& options) +{ + size_t dataLineStart = options.VariableNamesLine; + + // Loop through lines to find the first valid data line + while (dataLineStart < lines.size()) { + std::string trimmedLine = lines[dataLineStart]; + trimmedLine.erase(0, trimmedLine.find_first_not_of(" \t\r\n")); // Trim leading spaces + trimmedLine.erase(trimmedLine.find_last_not_of(" \t\r\n") + 1); // Trim trailing spaces + + // Skip empty lines if EmptyLineRule is "skip" + if (options.EmptyLineRule == "skip" && trimmedLine.empty()) { + dataLineStart++; + continue; + } + + // Skip lines starting with any comment style + bool isCommentLine = false; + for (const auto& comment : options.CommentStyle) { + if (trimmedLine.find(comment) == 0) { + isCommentLine = true; + break; + } + } + if (isCommentLine) { + dataLineStart++; + continue; + } + break; + } + options.DataLines[0] = dataLineStart + 1; +} +//============================================================================= +// Public Interface +//============================================================================= +void +initializeDetectImportOptions(detectImportOptions& options) +{ + // Initialize default values if needed + options.Delimiter.clear(); + options.LineEnding.clear(); + options.CommentStyle.clear(); + options.EmptyLineRule = "skip"; + options.TextType = "char"; + options.VariableNamesLine = 0; + options.VariableNames.clear(); + options.RowNamesColumn = 0; + options.DataLines.push_back(1); + options.DataLines.push_back(std::numeric_limits::infinity()); +} +//============================================================================= +void +analyzeFileFormatImportOptions(std::wstring filename, size_t sampleSize, + detectImportOptions& options, std::string& errorMessage) +{ + + // Read file content + std::string content = readFileContent(filename, sampleSize, errorMessage); + if (content.empty() && !errorMessage.empty()) { + return; + } + + // Perform various detections + detectLineEndings(content, options); + std::string lineEnding + = options.LineEnding.empty() ? "\n" : unescapeSpecialCharacters(options.LineEnding[0]); + + std::vector lines = splitIntoLines(content, lineEnding); + + detectDelimiter(lines, options); + detectCommentStyle(lines, options, errorMessage); + detectEmptyLineRule(lines, options); + + if (!options.Delimiter.empty()) { + detectColumnsAndRowNames(lines, options.Delimiter[0], lineEnding, options); + } + detectDataLines(lines, options); +} +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/ReadCell.cpp b/modules/spreadsheet/src/cpp/ReadCell.cpp index 49e549c46d..293172e60e 100644 --- a/modules/spreadsheet/src/cpp/ReadCell.cpp +++ b/modules/spreadsheet/src/cpp/ReadCell.cpp @@ -8,93 +8,265 @@ // LICENCE_BLOCK_END //============================================================================= #include +#include +#include +#include #include "ReadCell.hpp" #include "characters_encoding.hpp" +#include "nlsBuildConfig.h" //============================================================================= namespace Nelson { //============================================================================= -static void -ConvertToArrayOf(const std::string& pStr, ArrayOf& pVal) +struct ComplexPatterns +{ + // Regex for special values (Inf, NaN) + static inline const std::string special_re = R"((?:[Nn][Aa][Nn]|[Ii][Nn][Ff]))"; + + // Full regex patterns combining numbers and special values + static inline const std::regex full_complex { R"(([+-]?(?:\d*\.?\d+|)" + special_re + + R"())([+-](?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; + static inline const std::regex real_only { + R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())(?![ij]))", std::regex::optimize + }; + static inline const std::regex imag_only { R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; +}; +//============================================================================= +static bool +ConvertToDouble(const std::string& pStr, double& pVal) { - pVal = ArrayOf::characterArrayConstructor(pStr); + fast_float::parse_options options { fast_float::chars_format::fortran }; + + const char* first = pStr.data(); + const char* last = pStr.data() + pStr.size(); + if (!pStr.empty() && pStr.front() == '+') { + first += 1; + } + + auto answer = fast_float::from_chars_advanced(first, last, pVal, options); + + if (answer.ec != std::errc() || answer.ptr != last) { + return false; + } + return true; } //============================================================================= -static char -detectSeparator(const std::string& filename) +static bool +ConvertToDoubleComplex(const std::string& str, std::complex& pVal) { - std::ifstream file(filename); - if (!file.is_open()) { - std::cerr << "Error opening file!" << std::endl; - return '\0'; + char lastChar = '\0'; + if (!str.empty()) { + lastChar = str.back(); } + if ((lastChar != '\0') && lastChar == 'I' || lastChar == 'J' || lastChar == 'i' + || lastChar == 'j') { + std::smatch matches; + if (std::regex_match(str, matches, ComplexPatterns::full_complex)) { + bool isNegativeReal = false; + bool isNegativeImag = false; + std::string realStr = matches[1].str(); + std::string imagStr = matches[2].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); + } - std::string line; - if (std::getline(file, line)) { - // Count occurrences of potential separators - std::unordered_map separatorCount; - for (char sep : { ',', ';', '\t' }) { - separatorCount[sep] = 0; - } + double realPart, imagPart; - // Increment counts for each potential separator - for (char ch : line) { - if (separatorCount.find(ch) != separatorCount.end()) { - separatorCount[ch]++; + bool res = ConvertToDouble(realStr, realPart); + if (!res) { + return res; + } + res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return res; + } + if (isNegativeReal) { + realPart = -realPart; + } + if (isNegativeImag) { + imagPart = -imagPart; + } + pVal = { realPart, imagPart }; + return true; + } else if (std::regex_match(str, matches, ComplexPatterns::imag_only)) { + bool isNegativeImag = false; + std::string imagStr = matches[1].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); } - } - // Find the separator with the most occurrences - char likelySeparator = '\0'; - int maxCount = 0; - for (const auto& entry : separatorCount) { - if (entry.second > maxCount) { - likelySeparator = entry.first; - maxCount = entry.second; + double imagPart; + bool res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return false; + } + if (isNegativeImag) { + imagPart = -imagPart; } + pVal = { 0., imagPart }; + return true; + } else { + return false; + } + } else { + double valueReal; + bool res = ConvertToDouble(str, valueReal); + if (res) { + pVal = { valueReal, 0 }; + return true; + } + } + return false; +} +//============================================================================= +static void +ConvertToArrayOfCharacter(const std::string& pStr, ArrayOf& pVal) +{ + std::complex value; + if (ConvertToDoubleComplex(pStr, value)) { + if (value.imag() == 0) { + pVal = ArrayOf::doubleConstructor(value.real()); + } else { + pVal = ArrayOf::dcomplexConstructor(value.real(), value.imag()); } + } else { + if (pStr == "") { + Dimensions dims(1, 1); + pVal = ArrayOf::stringArrayConstructorAllMissing(dims); + } else { + pVal = ArrayOf::characterArrayConstructor(pStr); + } + } +} +//============================================================================= +static void +ConvertToArrayOfString(const std::string& pStr, ArrayOf& pVal) +{ + std::complex value; + if (ConvertToDoubleComplex(pStr, value)) { + if (value.imag() == 0) { + pVal = ArrayOf::doubleConstructor(value.real()); + } else { + pVal = ArrayOf::dcomplexConstructor(value.real(), value.imag()); + } + } else { + pVal = ArrayOf::stringArrayConstructor(pStr); + } +} +//============================================================================= +static std::stringstream +readLinesFromFile(const std::wstring& filename, const detectImportOptions& options) +{ + std::ifstream file; +#ifdef _MSC_VER + file.open(filename); +#else + file.open(wstring_to_utf8(filename)); +#endif + + std::string line; + int currentLine = 1; + std::stringstream normalizedStream; - return likelySeparator; + while (currentLine < (int)options.DataLines[0] && std::getline(file, line)) { + currentLine++; } - return '\0'; // Return null character if no valid separator found + auto normalizeLineEnding = [](const std::string& inputLine) -> std::string { + std::string normalized = inputLine; + normalized.erase(std::remove(normalized.begin(), normalized.end(), '\r'), normalized.end()); + return normalized; + }; + + if (std::isinf(options.DataLines[1])) { + while (std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } else { + while (currentLine <= (int)options.DataLines[1] && std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } + return normalizedStream; } //============================================================================= ArrayOf -ReadCell(const std::wstring& filename, std::wstring& errorMessage) +ReadCell( + const std::wstring& filename, const detectImportOptions& options, std::string& errorMessage) { - try { - char separator = detectSeparator(wstring_to_utf8(filename)); + char separator = options.Delimiter[0][0]; + bool pHasCR = false; + rapidcsv::SeparatorParams separatorParams + = rapidcsv::SeparatorParams(separator, true, pHasCR, false, false); + + rapidcsv::ConverterParams converterParams; + converterParams.mHasDefaultConverter = false; + converterParams.mNumericLocale = false; - rapidcsv::Document doc(wstring_to_utf8(filename), rapidcsv::LabelParams(), - rapidcsv::SeparatorParams(separator)); + rapidcsv::LineReaderParams lineReaderParams; + lineReaderParams.mSkipCommentLines = !options.CommentStyle.empty(); + if (options.CommentStyle.empty()) { + lineReaderParams.mCommentPrefix = '\0'; + lineReaderParams.mSkipCommentLines = false; + } else { + lineReaderParams.mCommentPrefix = options.CommentStyle[0][0]; + lineReaderParams.mSkipCommentLines = true; + } + lineReaderParams.mSkipEmptyLines = options.EmptyLineRule == "skip"; + + rapidcsv::LabelParams labelParams(-1, -1); + try { + std::stringstream stream = readLinesFromFile(filename, options); + rapidcsv::Document doc( + stream, labelParams, separatorParams, converterParams, lineReaderParams); stringVector columnNames = doc.GetColumnNames(); + stringVector rowNames = doc.GetRowNames(); size_t nbRows = doc.GetRowCount(); size_t nbColumns = doc.GetColumnCount(); - size_t nbElements = nbRows * nbColumns; + size_t nbElements = nbRows + * (options.VariableNames.size() > nbColumns ? options.VariableNames.size() : nbColumns); ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbElements); - Dimensions dims(nbRows, nbColumns); + Dimensions dims(nbRows, + options.VariableNames.size() > nbColumns ? options.VariableNames.size() : nbColumns); ArrayOf result = ArrayOf(NLS_CELL_ARRAY, dims, elements); - /* - for (size_t i = 0; i < nbColumns; ++i) { - for (size_t j = 0; j < nbRows; ++j) { - size_t index = i * nbRows + j; // Corrected index calculation - elements[index] = doc.GetCell(i, j, ConvertToArrayOf); + ompIndexType nbAvailableElements = (ompIndexType)(nbColumns * nbRows); + + if (options.TextType == "char") { +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + elements[index] = doc.GetCell(i, j, ConvertToArrayOfCharacter); + } + } else { +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + elements[index] = doc.GetCell(i, j, ConvertToArrayOfString); } } - */ - for (size_t index = 0; index < nbColumns * nbRows; ++index) { - size_t i = index / nbRows; // Calculate the column index - size_t j = index % nbRows; // Calculate the row index - elements[index] = doc.GetCell(i, j, ConvertToArrayOf); - } return result; } catch (const std::exception& e) { - errorMessage = utf8_to_wstring(e.what()); - return ArrayOf(); // Return an empty ArrayOf on error + errorMessage = e.what(); } + return {}; } //============================================================================= } // namespace Nelson diff --git a/modules/spreadsheet/src/cpp/ReadTable.cpp b/modules/spreadsheet/src/cpp/ReadTable.cpp new file mode 100644 index 0000000000..57c63e69e3 --- /dev/null +++ b/modules/spreadsheet/src/cpp/ReadTable.cpp @@ -0,0 +1,326 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include +#include +#include +#include +#include "ReadTable.hpp" +#include "characters_encoding.hpp" +#include "nlsBuildConfig.h" +#if WITH_OPENMP +#include +#endif +//============================================================================= +namespace Nelson { +//============================================================================= +struct DoubleDoubleComplexString +{ + double asDouble; + std::complex asDoubleComplex; + std::string asString; + NelsonType nelsonType; +}; +//============================================================================= +struct ComplexPatterns +{ + // Regex for special values (Inf, NaN) + static inline const std::string special_re = R"((?:[Nn][Aa][Nn]|[Ii][Nn][Ff]))"; + + // Full regex patterns combining numbers and special values + static inline const std::regex full_complex { R"(([+-]?(?:\d*\.?\d+|)" + special_re + + R"())([+-](?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; + static inline const std::regex real_only { + R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())(?![ij]))", std::regex::optimize + }; + static inline const std::regex imag_only { R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; +}; +//============================================================================= +static bool +ConvertToDouble(const std::string& pStr, double& pVal) +{ + fast_float::parse_options options { fast_float::chars_format::fortran }; + + const char* first = pStr.data(); + const char* last = pStr.data() + pStr.size(); + if (!pStr.empty() && pStr.front() == '+') { + first += 1; + } + + auto answer = fast_float::from_chars_advanced(first, last, pVal, options); + + if (answer.ec != std::errc() || answer.ptr != last) { + return false; + } + return true; +} +//============================================================================= +static bool +ConvertToDoubleComplex(const std::string& str, std::complex& pVal) +{ + char lastChar = '\0'; + if (!str.empty()) { + lastChar = str.back(); + } + if ((lastChar != '\0') && lastChar == 'I' || lastChar == 'J' || lastChar == 'i' + || lastChar == 'j') { + std::smatch matches; + if (std::regex_match(str, matches, ComplexPatterns::full_complex)) { + bool isNegativeReal = false; + bool isNegativeImag = false; + std::string realStr = matches[1].str(); + std::string imagStr = matches[2].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); + } + + double realPart, imagPart; + + bool res = ConvertToDouble(realStr, realPart); + if (!res) { + return res; + } + res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return res; + } + if (isNegativeReal) { + realPart = -realPart; + } + if (isNegativeImag) { + imagPart = -imagPart; + } + pVal = { realPart, imagPart }; + return true; + } else if (std::regex_match(str, matches, ComplexPatterns::imag_only)) { + bool isNegativeImag = false; + std::string imagStr = matches[1].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); + } + + double imagPart; + bool res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return false; + } + if (isNegativeImag) { + imagPart = -imagPart; + } + pVal = { 0., imagPart }; + return true; + } else { + return false; + } + } + return false; +} +//============================================================================= +static void +ConvertToArrayOfCharacter(const std::string& pStr, struct DoubleDoubleComplexString& structValue) +{ + double value; + structValue.asString = pStr; + if (ConvertToDouble(pStr, value)) { + structValue.asDouble = value; + structValue.nelsonType = NLS_DOUBLE; + structValue.asDoubleComplex = std::complex(value, 0); + return; + } + std::complex cvalue; + if (ConvertToDoubleComplex(pStr, cvalue)) { + structValue.asDouble = cvalue.real(); + structValue.nelsonType = NLS_DCOMPLEX; + structValue.asDoubleComplex = cvalue; + return; + } + structValue.asDouble = std::nan("NaN"); + structValue.asDoubleComplex = std::complex(std::nan("NaN"), std::nan("NaN")); + structValue.nelsonType = NLS_CHAR; +} +//============================================================================= +static std::stringstream +readLinesFromFile(const std::wstring& filename, const detectImportOptions& options) +{ + std::ifstream file; +#ifdef _MSC_VER + file.open(filename); +#else + file.open(wstring_to_utf8(filename)); +#endif + + std::string line; + int currentLine = 1; + std::stringstream normalizedStream; + + while (currentLine < (int)options.DataLines[0] && std::getline(file, line)) { + currentLine++; + } + + auto normalizeLineEnding = [](const std::string& inputLine) -> std::string { + std::string normalized = inputLine; + normalized.erase(std::remove(normalized.begin(), normalized.end(), '\r'), normalized.end()); + return normalized; + }; + + if (std::isinf(options.DataLines[1])) { + while (std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } else { + while (currentLine <= (int)options.DataLines[1] && std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } + return normalizedStream; +} +//============================================================================= +ArrayOf +ReadTable( + const std::wstring& filename, const detectImportOptions& options, std::string& errorMessage) +{ + char separator = options.Delimiter[0][0]; + bool pHasCR = false; + rapidcsv::SeparatorParams separatorParams + = rapidcsv::SeparatorParams(separator, true, pHasCR, false, false); + + rapidcsv::ConverterParams converterParams; + converterParams.mHasDefaultConverter = false; + converterParams.mNumericLocale = false; + + rapidcsv::LineReaderParams lineReaderParams; + lineReaderParams.mSkipCommentLines = !options.CommentStyle.empty(); + if (options.CommentStyle.empty()) { + lineReaderParams.mCommentPrefix = '\0'; + lineReaderParams.mSkipCommentLines = false; + } else { + lineReaderParams.mCommentPrefix = options.CommentStyle[0][0]; + lineReaderParams.mSkipCommentLines = true; + } + lineReaderParams.mSkipEmptyLines = options.EmptyLineRule == "skip"; + + rapidcsv::LabelParams labelParams(-1, -1); + try { + std::stringstream stream = readLinesFromFile(filename, options); + rapidcsv::Document doc( + stream, labelParams, separatorParams, converterParams, lineReaderParams); + stringVector columnNames = options.VariableNames; + stringVector rowNames = doc.GetRowNames(); + size_t nbRows = doc.GetRowCount(); + size_t nbColumns = doc.GetColumnCount(); + + ArrayOfVector columnValues; + columnValues.resize(nbColumns); + for (ompIndexType c = 0; c < (ompIndexType)columnValues.size(); c++) { + std::vector structValues; + structValues.resize(nbRows); + +#if WITH_OPENMP + int nbThreads = omp_get_max_threads(); +#else + int nbThreads = 1; + +#endif + std::unordered_map countMap; + std::vector> localCountMaps(nbThreads); + +#if WITH_OPENMP +#pragma omp parallel +#endif + { +#if WITH_OPENMP + int threadId = omp_get_thread_num(); +#else + int threadId = 1; +#endif + std::unordered_map& localMap = localCountMaps[threadId]; +#if WITH_OPENMP +#pragma omp for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + structValues[r] + = doc.GetCell(c, r, ConvertToArrayOfCharacter); + localMap[structValues[r].nelsonType]++; + } + } + // Merge results from all threads + for (const auto& localMap : localCountMaps) { + for (const auto& entry : localMap) { + countMap[entry.first] += entry.second; + } + } + + int maxCount = 0; + NelsonType mostFrequentType = NLS_CELL_ARRAY; + for (const auto& pair : countMap) { + if (pair.second > maxCount) { + maxCount = pair.second; + mostFrequentType = pair.first; + } + } + + if (mostFrequentType == NLS_DOUBLE && countMap[NLS_DCOMPLEX] > 0) { + mostFrequentType = NLS_DCOMPLEX; + } + + Dimensions dims(nbRows, 1); + switch (mostFrequentType) { + case NLS_DOUBLE: { + double* ptr = (double*)ArrayOf::allocateArrayOf(NLS_DOUBLE, nbRows); +#if WITH_OPENMP +#pragma omp for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + ptr[r] = structValues[r].asDouble; + } + columnValues[c] = ArrayOf(NLS_DOUBLE, dims, ptr); + } break; + case NLS_DCOMPLEX: { + std::complex* ptr + = (std::complex*)ArrayOf::allocateArrayOf(NLS_DCOMPLEX, nbRows); +#if WITH_OPENMP +#pragma omp for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + ptr[r] = structValues[r].asDoubleComplex; + } + columnValues[c] = ArrayOf(NLS_DCOMPLEX, dims, ptr); + } break; + case NLS_CELL_ARRAY: + default: { + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbRows); +#if WITH_OPENMP +#pragma omp for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + elements[r] = ArrayOf::characterArrayConstructor(structValues[r].asString); + } + columnValues[c] = ArrayOf(NLS_CELL_ARRAY, dims, elements); + } break; + } + } + return ArrayOf::tableConstructor(columnValues, columnNames, rowNames); + } catch (const std::exception& e) { + errorMessage = e.what(); + } + return {}; +} +//============================================================================= +} // namespace Nelson + //============================================================================= diff --git a/modules/spreadsheet/src/cpp/rapidcsv.h b/modules/spreadsheet/src/cpp/rapidcsv.h index 670576f1c0..484f5d52aa 100644 --- a/modules/spreadsheet/src/cpp/rapidcsv.h +++ b/modules/spreadsheet/src/cpp/rapidcsv.h @@ -1062,7 +1062,11 @@ class Document const size_t dataRowIdx = GetDataRowIndex(pRowIdx); T val; - pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + if (dataRowIdx < mData.size() && dataColumnIdx < mData.at(dataRowIdx).size()) { + pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + } else { + pToVal("", val); + } return val; } @@ -1544,8 +1548,12 @@ class Document if (!hasIJLastChar) { hasIJLastChar = LastCharIsIorJ(value); } - row.push_back(value); + if (value.empty()) { + row.push_back(""); + } else { + row.push_back(value); + } if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() && (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) { // skip comment line diff --git a/modules/spreadsheet/src/include/DetectImportOptions.hpp b/modules/spreadsheet/src/include/DetectImportOptions.hpp new file mode 100644 index 0000000000..a1062d0ac5 --- /dev/null +++ b/modules/spreadsheet/src/include/DetectImportOptions.hpp @@ -0,0 +1,40 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include +#include "nlsSpreadsheet_exports.h" +#include "Types.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +class NLSSPREADSHEET_IMPEXP detectImportOptions +{ +public: + std::vector Delimiter; + std::vector LineEnding; + std::vector CommentStyle; + std::string EmptyLineRule; + std::string TextType; + int VariableNamesLine; + int RowNamesColumn; + std::vector VariableNames; + std::vector DataLines; +}; +//============================================================================= +NLSSPREADSHEET_IMPEXP void +initializeDetectImportOptions(detectImportOptions& options); +//============================================================================= +NLSSPREADSHEET_IMPEXP void +analyzeFileFormatImportOptions(std::wstring filename, size_t sampleSize, + detectImportOptions& options, std::string& errorMessage); +//============================================================================= +} +//============================================================================= diff --git a/modules/spreadsheet/src/include/ReadCell.hpp b/modules/spreadsheet/src/include/ReadCell.hpp index da2c28942b..f035a025a0 100644 --- a/modules/spreadsheet/src/include/ReadCell.hpp +++ b/modules/spreadsheet/src/include/ReadCell.hpp @@ -11,9 +11,11 @@ //============================================================================= #include "ArrayOf.hpp" #include "nlsSpreadsheet_exports.h" +#include "DetectImportOptions.hpp" //============================================================================= namespace Nelson { NLSSPREADSHEET_IMPEXP ArrayOf -ReadCell(const std::wstring& filename, std::wstring& errorMessage); +ReadCell( + const std::wstring& filename, const detectImportOptions& options, std::string& errorMessage); }; //============================================================================= diff --git a/modules/spreadsheet/src/include/ReadTable.hpp b/modules/spreadsheet/src/include/ReadTable.hpp new file mode 100644 index 0000000000..37b534c348 --- /dev/null +++ b/modules/spreadsheet/src/include/ReadTable.hpp @@ -0,0 +1,21 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +#include "nlsSpreadsheet_exports.h" +#include "DetectImportOptions.hpp" +//============================================================================= +namespace Nelson { +NLSSPREADSHEET_IMPEXP ArrayOf +ReadTable( + const std::wstring& filename, const detectImportOptions& options, std::string& errorMessage); +}; +//============================================================================= diff --git a/modules/spreadsheet/tests/dlmread_comments.csv b/modules/spreadsheet/tests/dlmread_comments.csv index 2ff1402280..edf7f3c2fa 100644 --- a/modules/spreadsheet/tests/dlmread_comments.csv +++ b/modules/spreadsheet/tests/dlmread_comments.csv @@ -1,3 +1,4 @@ + # tata 1,0,0,0,0 # titi diff --git a/modules/spreadsheet/tests/readcell_1.csv b/modules/spreadsheet/tests/readcell_1.csv new file mode 100644 index 0000000000..c958cd1b08 --- /dev/null +++ b/modules/spreadsheet/tests/readcell_1.csv @@ -0,0 +1,6 @@ +Row,Age,Height,Weight,BloodPressure_1,BloodPressure_2 +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119,122,80 diff --git a/modules/spreadsheet/tests/readcell_2.csv b/modules/spreadsheet/tests/readcell_2.csv new file mode 100644 index 0000000000..dc67cc293b --- /dev/null +++ b/modules/spreadsheet/tests/readcell_2.csv @@ -0,0 +1,10 @@ + +Row,Age,Height,Weight,BloodPressure_1,BloodPressure_2 + +# test + +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119,122,80 diff --git a/modules/spreadsheet/tests/readcell_3.csv b/modules/spreadsheet/tests/readcell_3.csv new file mode 100644 index 0000000000..50266cda10 --- /dev/null +++ b/modules/spreadsheet/tests/readcell_3.csv @@ -0,0 +1,5 @@ +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119,122,80 \ No newline at end of file diff --git a/modules/spreadsheet/tests/readcell_4.csv b/modules/spreadsheet/tests/readcell_4.csv new file mode 100644 index 0000000000..492f5380e7 --- /dev/null +++ b/modules/spreadsheet/tests/readcell_4.csv @@ -0,0 +1,3 @@ +1,2,3 +-Inf,world,NaN +13-Oct-2024 11:25:56,3, diff --git a/modules/spreadsheet/tests/test_detectImportOptions.m b/modules/spreadsheet/tests/test_detectImportOptions.m new file mode 100644 index 0000000000..9e25a3cba5 --- /dev/null +++ b/modules/spreadsheet/tests/test_detectImportOptions.m @@ -0,0 +1,21 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_1.csv']; +options = detectImportOptions(csv_filename); +assert_isequal(options.DataLines, [2 Inf]); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_2.csv']; +options = detectImportOptions(csv_filename); +assert_isequal(options.DataLines, [6 Inf]); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_3.csv']; +options = detectImportOptions(csv_filename); +assert_isequal(options.DataLines, [1 Inf]); +%============================================================================= diff --git a/modules/spreadsheet/tests/test_readcell.m b/modules/spreadsheet/tests/test_readcell.m new file mode 100644 index 0000000000..31d37ae7b1 --- /dev/null +++ b/modules/spreadsheet/tests/test_readcell.m @@ -0,0 +1,69 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_1.csv']; +R = readcell(csv_filename); +REF = {'Row', 'Age', 'Height', 'Weight', 'BloodPressure_1', 'BloodPressure_2'; +'Smith', [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson', [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones', [ 40], [ 67], [ 133], [ 117], [ 75] +'Brown', [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_2.csv']; +R = readcell(csv_filename); +REF = {'Row' , 'Age' , 'Height' , 'Weight' , 'BloodPressure_1', 'BloodPressure_2'; +'# test' , string(NaN), string(NaN), string(NaN), string(NaN), string(NaN); +'Smith' , [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson' , [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones' , [ 40], [ 67], [ 133], [ 117], [ 75]; +'Brown' , [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_3.csv']; +R = readcell(csv_filename); +REF = {'Smith' , [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson' , [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones' , [ 40], [ 67], [ 133], [ 117], [ 75]; +'Brown' , [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_1.csv']; +options = detectImportOptions(csv_filename); +R = readcell(csv_filename, options); +REF = {'Smith' , [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson' , [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones' , [ 40], [ 67], [ 133], [ 117], [ 75]; +'Brown' , [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_2.csv']; +options = detectImportOptions(csv_filename); +R = readcell(csv_filename, options); +REF = {'Smith' , [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson' , [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones' , [ 40], [ 67], [ 133], [ 117], [ 75]; +'Brown' , [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readcell_3.csv']; +options = detectImportOptions(csv_filename); +R = readcell(csv_filename, options); +REF = {'Smith' , [ 38], [ 71], [ 176], [ 124], [ 93]; +'Johnson' , [ 43], [ 69], [ 163], [ 109], [ 77]; +'Williams', [ 38], [ 64], [ 131], [ 125], [ 83]; +'Jones' , [ 40], [ 67], [ 133], [ 117], [ 75]; +'Brown' , [ 49], [ 64], [ 119], [ 122], [ 80]}; +assert_isequal(R, REF); +%============================================================================= diff --git a/modules/spreadsheet/tests/test_readtable.m b/modules/spreadsheet/tests/test_readtable.m new file mode 100644 index 0000000000..0046e2e0a0 --- /dev/null +++ b/modules/spreadsheet/tests/test_readtable.m @@ -0,0 +1,56 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +LastName = {'Sanchez';'Johnson';'Li';'Diaz';'Brown'}; +Age = [38;43;38;40;49]; +Smoker = [1;0;1;0;1]; +Height = [71;69;64;67;64]; +Weight = [176;163;131;133;119]; +BloodPressure_1 = [124; 109; 125; 117; 122]; +BloodPressure_2 = [93;77;83;75;80]; +REF = table(LastName, Age, Smoker, Height, Weight, BloodPressure_1, BloodPressure_2); +csv_filename = [modulepath('spreadsheet'), '/tests/test_readtable_1.csv']; +R = readtable(csv_filename); +assert_isequal(R, REF) +%============================================================================= +LastName = {'Sanchez';'Johnson';'Li';'Diaz';'Brown'}; +Age = [38;43;38;40;49]; +Smoker = {'1';'''0''';'''1''';'''0''';'1'}; +Height = [71;69;64;67;64]; +Weight = [176;163;131;133;119]; +BloodPressure_1 = [124; 109; 125; 117; 122]; +BloodPressure_2 = [93;77;83;75;80]; +REF = table(LastName, Age, Smoker, Height, Weight, BloodPressure_1, BloodPressure_2); +csv_filename = [modulepath('spreadsheet'), '/tests/test_readtable_2.csv']; +R = readtable(csv_filename); +assert_isequal(R, REF) +%============================================================================= +LastName = {'Sanchez';'Johnson';'Li';'Diaz';'Brown'}; +Age = [38;43;38;40;49]; +Smoker = [1;NaN;NaN;0;1]; +Height = [71;69;64;67;64]; +Weight = [176;163;131;133;119]; +BloodPressure_1 = [124; 109; 125; 117; 122]; +BloodPressure_2 = [93;77;83;75;80]; +REF = table(LastName, Age, Smoker, Height, Weight, BloodPressure_1, BloodPressure_2); +csv_filename = [modulepath('spreadsheet'), '/tests/test_readtable_3.csv']; +R = readtable(csv_filename); +assert_isequal(R, REF) +%============================================================================= + + + + + + + + + + + diff --git a/modules/spreadsheet/tests/test_readtable_1.csv b/modules/spreadsheet/tests/test_readtable_1.csv new file mode 100644 index 0000000000..cc52dafe64 --- /dev/null +++ b/modules/spreadsheet/tests/test_readtable_1.csv @@ -0,0 +1,6 @@ +LastName,Age,Smoker,Height,Weight,BloodPressure_1,BloodPressure_2 +Sanchez,38,1,71,176,124,93 +Johnson,43,0,69,163,109,77 +Li,38,1,64,131,125,83 +Diaz,40,0,67,133,117,75 +Brown,49,1,64,119,122,80 diff --git a/modules/spreadsheet/tests/test_readtable_2.csv b/modules/spreadsheet/tests/test_readtable_2.csv new file mode 100644 index 0000000000..991f921481 --- /dev/null +++ b/modules/spreadsheet/tests/test_readtable_2.csv @@ -0,0 +1,6 @@ +LastName,Age,Smoker,Height,Weight,BloodPressure_1,BloodPressure_2 +Sanchez,38,1,71,176,124,93 +Johnson,43,'0',69,163,109,77 +Li,38,'1',64,131,125,83 +Diaz,40,'0',67,133,117,75 +Brown,49,1,64,119,122,80 diff --git a/modules/spreadsheet/tests/test_readtable_3.csv b/modules/spreadsheet/tests/test_readtable_3.csv new file mode 100644 index 0000000000..9f717a2fe0 --- /dev/null +++ b/modules/spreadsheet/tests/test_readtable_3.csv @@ -0,0 +1,6 @@ +LastName,Age,Smoker,Height,Weight,BloodPressure_1,BloodPressure_2 +Sanchez,38,1,71,176,124,93 +Johnson,43,'0',69,163,109,77 +Li,38,'1',64,131,125,83 +Diaz,40,0,67,133,117,75 +Brown,49,1,64,119,122,80 diff --git a/modules/table/functions/@table/properties.m b/modules/table/functions/@table/properties.m index 216cfe951d..1b6130440a 100644 --- a/modules/table/functions/@table/properties.m +++ b/modules/table/functions/@table/properties.m @@ -10,7 +10,12 @@ function varargout = properties(T) st = struct(T); props = fieldnames(st.data); - props = [props; {'Properties'; 'Row'; 'Variable'}]; + if any(contains(props, 'Row')) + rowPropertyName = 'Row_1'; + else + rowPropertyName = 'Row'; + end + props = [props; {'Properties'; rowPropertyName; 'Variable'}]; if (nargout == 0) currentFormat = format(); if strcmp(currentFormat.LineSpacing, 'loose') diff --git a/modules/table/functions/@table/subsref.m b/modules/table/functions/@table/subsref.m index 66d2763a51..71d8045c5e 100644 --- a/modules/table/functions/@table/subsref.m +++ b/modules/table/functions/@table/subsref.m @@ -28,7 +28,12 @@ %============================================================================= function R = dotSubsref(T, sref) st = struct(T); - if ischar(sref(1).subs) && strcmp(sref(1).subs, 'Row') + if any(contains(st.Properties.VariableNames, 'Row')) + rowPropertyName = 'Row_1'; + else + rowPropertyName = 'Row'; + end + if ischar(sref(1).subs) && strcmp(sref(1).subs, rowPropertyName) R = st.Properties.RowNames; if isrow(R) R = R'; diff --git a/modules/types/src/c/nlsTypes.vcxproj b/modules/types/src/c/nlsTypes.vcxproj index 58f4c60e27..91aeb6c4d0 100644 --- a/modules/types/src/c/nlsTypes.vcxproj +++ b/modules/types/src/c/nlsTypes.vcxproj @@ -254,6 +254,7 @@ + diff --git a/modules/types/src/c/nlsTypes.vcxproj.filters b/modules/types/src/c/nlsTypes.vcxproj.filters index 11ad9d3e74..d215218323 100644 --- a/modules/types/src/c/nlsTypes.vcxproj.filters +++ b/modules/types/src/c/nlsTypes.vcxproj.filters @@ -194,6 +194,9 @@ Source Files + + Source Files + diff --git a/modules/types/src/cpp/ArrayOf_CharacterType.cpp b/modules/types/src/cpp/ArrayOf_CharacterType.cpp index 38c448cfac..b3f89bcdd6 100644 --- a/modules/types/src/cpp/ArrayOf_CharacterType.cpp +++ b/modules/types/src/cpp/ArrayOf_CharacterType.cpp @@ -379,7 +379,7 @@ wstringVector ArrayOf::getContentAsWideStringRowVector() const { wstringVector res; - if (!isCell() || !isStringArray()) { + if (!(isCell() || isStringArray())) { Error(_W("A cell or string array expected.")); } if (isRowVector()) { @@ -426,7 +426,7 @@ wstringVector ArrayOf::getContentAsWideStringColumnVector() const { wstringVector res; - if (!isCell() && !isStringArray()) { + if (!(isCell() || isStringArray())) { Error(_W("A cell or string array expected.")); } if (!isEmpty()) { diff --git a/modules/types/src/cpp/ArrayOf_DoubleType.cpp b/modules/types/src/cpp/ArrayOf_DoubleType.cpp index a2a983a911..29f160da02 100644 --- a/modules/types/src/cpp/ArrayOf_DoubleType.cpp +++ b/modules/types/src/cpp/ArrayOf_DoubleType.cpp @@ -63,6 +63,16 @@ ArrayOf::doubleConstructor(double aval) return ArrayOf(NLS_DOUBLE, Dimensions(1, 1), data); } //============================================================================= +ArrayOf +ArrayOf::doubleVectorConstructor(std::vector values) +{ + double* data + = static_cast(allocateArrayOf(NLS_DOUBLE, values.size(), stringVector(), true)); + std::copy(values.begin(), values.end(), data); + return ArrayOf(NLS_DOUBLE, Dimensions(1, values.size()), data); +} +//============================================================================= + ArrayOf ArrayOf::doubleVectorConstructor(indexType len) { @@ -86,6 +96,26 @@ ArrayOf::dcomplexConstructor(double aval, double bval) return ArrayOf(NLS_DCOMPLEX, Dimensions(1, 1), data); } //============================================================================= +std::vector +ArrayOf::getContentAsDoubleVector() const +{ + if (isComplex() || isReferenceType() || isCharacterArray() || isSparse()) { + Error(_W("Expected a real value.")); + } + size_t elementCount = getElementCount(); + std::vector values(elementCount); + if (getDataClass() != NLS_DOUBLE) { + ArrayOf P(*this); + P.promoteType(NLS_DOUBLE); + const double* data = static_cast(P.getDataPointer()); + std::copy(data, data + elementCount, values.begin()); + } else { + const double* data = static_cast(dp->getData()); + std::copy(data, data + elementCount, values.begin()); + } + return values; +} +//============================================================================= double ArrayOf::getContentAsDoubleScalar(bool arrayAsScalar, bool checkIsIntegerValue) const { diff --git a/modules/types/src/cpp/ArrayOf_StringType.cpp b/modules/types/src/cpp/ArrayOf_StringType.cpp index 30c8d9cd4e..2ab7e5b2d8 100644 --- a/modules/types/src/cpp/ArrayOf_StringType.cpp +++ b/modules/types/src/cpp/ArrayOf_StringType.cpp @@ -74,6 +74,22 @@ ArrayOf::stringArrayConstructor(const std::wstring& value) } //============================================================================= ArrayOf +ArrayOf::stringArrayConstructorAllMissing(Dimensions& dims) +{ + ArrayOf* elements = nullptr; + size_t nbElements = dims.getElementCount(); + try { + elements = new ArrayOf[nbElements]; + } catch (const std::bad_alloc&) { + Error(ERROR_MEMORY_ALLOCATION); + } + for (size_t k = 0; k < nbElements; k++) { + elements[k] = ArrayOf::doubleConstructor(std::nan("NaN")); + } + return ArrayOf(NLS_STRING_ARRAY, dims, elements); +} +//============================================================================= +ArrayOf ArrayOf::stringArrayConstructor(const stringVector& values, Dimensions& dims) { ArrayOf* elements = nullptr; diff --git a/modules/types/src/cpp/ArrayOf_TableType.cpp b/modules/types/src/cpp/ArrayOf_TableType.cpp new file mode 100644 index 0000000000..55d35a7373 --- /dev/null +++ b/modules/types/src/cpp/ArrayOf_TableType.cpp @@ -0,0 +1,65 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "nlsBuildConfig.h" +#include "ArrayOf.hpp" +#include "Data.hpp" +#include "Error.hpp" +#include "i18n.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +#define NLS_TABLE_TYPE "table" +#define NLS_TABLE_VERSION 1 +//============================================================================= +bool +ArrayOf::isTable() const +{ + if (dp == nullptr) { + return false; + } + return (dp->dataClass == NLS_CLASS_ARRAY || dp->classTypeName == "table"); +} +//============================================================================= +ArrayOf +ArrayOf::tableConstructor(const ArrayOfVector& columnValues, const stringVector& variableNames, + const stringVector& rowNames) +{ + Dimensions dims(1, 1); + stringVector fieldnames = { "data", "Version", "Properties" }; + + ArrayOf* table = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CLASS_ARRAY, 1, fieldnames); + ArrayOf tableArrayOf = ArrayOf(NLS_CLASS_ARRAY, dims, table, false, fieldnames); + tableArrayOf.setClassType("table"); + + ArrayOf data; + if (columnValues.empty()) { + data = ArrayOf::emptyStructWithoutFields(); + } else { + data = ArrayOf::structScalarConstructor(variableNames, columnValues); + } + tableArrayOf.setField(fieldnames[0], data); + + ArrayOf version = ArrayOf::doubleConstructor(NLS_TABLE_VERSION); + tableArrayOf.setField(fieldnames[1], version); + + ArrayOf properties; + stringVector propertyNames = { "VariableNames", "RowNames" }; + ArrayOfVector propertyValues; + propertyValues.push_back(ArrayOf::toCellArrayOfCharacterRowVectors(variableNames)); + propertyValues.push_back(ArrayOf::toCellArrayOfCharacterRowVectors(rowNames)); + properties = ArrayOf::structScalarConstructor(propertyNames, propertyValues); + + tableArrayOf.setField(fieldnames[2], properties); + + return tableArrayOf; +} +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/types/src/include/ArrayOf.hpp b/modules/types/src/include/ArrayOf.hpp index 0510991ec1..adf0801460 100644 --- a/modules/types/src/include/ArrayOf.hpp +++ b/modules/types/src/include/ArrayOf.hpp @@ -715,6 +715,13 @@ class NLSTYPES_IMPEXP ArrayOf static ArrayOf doubleVectorConstructor(indexType len); + /** + * Double vector constructor - Construct an NLS_DOUBLE object + * that is a (row) vector with the given length. + */ + static ArrayOf + doubleVectorConstructor(std::vector values); + /** * Single vector constructor - Construct an NLS_SINGLE object * that is a (row) vector with the given length. @@ -1051,6 +1058,15 @@ class NLSTYPES_IMPEXP ArrayOf [[nodiscard]] double getContentAsDoubleScalar(bool arrayAsScalar = false, bool checkIsIntegerValue = false) const; + /** + * Get our contents as a double vector. + * Throws an exception if cannot meaningfully + * be converted to a double precision value. + */ + + [[nodiscard]] std::vector + getContentAsDoubleVector() const; + /** * Get our contents as an unsigned integer scalar 64. * Throws an exception if we are not a scalar integer type. @@ -1361,6 +1377,14 @@ class NLSTYPES_IMPEXP ArrayOf static ArrayOf stringArrayConstructor(const std::wstring& value); + /** + * @brief Constructs a string array with all elements initialized to Missing. + * @param dims The dimensions of the string array. + * @return An ArrayOf object representing the string array. + */ + static ArrayOf + stringArrayConstructorAllMissing(Dimensions& dims); + /** * @brief Constructs a string array from a vector of strings and dimensions. * @param values The vector of strings to be converted to a string array. @@ -1402,6 +1426,39 @@ class NLSTYPES_IMPEXP ArrayOf */ [[nodiscard]] go_handle getContentAsGraphicsObjectScalar() const; + + //========================================================================= + // Table class object + //========================================================================= + /* + * check is Table type + */ + [[nodiscard]] bool + isTable() const; + + /** + * @brief Constructs a table class object. + * + * This function creates and returns a table represented as an `ArrayOf` object, + * based on the provided column values, variable (column) names, and row names. + * + * @param columnValues A collection of vectors, where each vector represents the + * values in a single column of the table. All columns should + * have the same number of rows for consistency. + * @param variableNames(optional: empty) A vector of strings representing the names of the + * columns in the table. The size of this vector should match the number of columns in + * `columnValues`. + * @param rowNames (optional: empty) A vector of strings representing the names of the rows in + * the table. The size of this vector should match the number of rows in each column of + * `columnValues`. + * + * @return An `ArrayOf` object representing the constructed table, with the + * specified column values, variable names, and row names. + */ + + static ArrayOf + tableConstructor(const ArrayOfVector& columnValues, const stringVector& variableNames, + const stringVector& rowNames); }; //========================================================================= bool From 3aa8c700788bbf5652007ac886583df102af45ed Mon Sep 17 00:00:00 2001 From: Allan CORNET Date: Sat, 7 Dec 2024 10:11:11 +0100 Subject: [PATCH 2/4] Fix #1292 - Enhance String Handling and Table Display - Implement optimized join() function for string concatenation - Add strjust() implementation with performance improvements - Refactor table display functionality --- CHANGELOG.md | 6 + ...unningSrv.xml => actxGetRunningServer.xml} | 6 +- modules/core/src/cpp/Banner.cpp | 10 +- .../help/en_US/xml/cellfun.xml | 4 +- .../en_US/xml/{num2cellxml => num2cell.xml} | 12 +- .../functions/formattedDisplayText.m | 18 +- ...{loadcompiler.xml => loadcompilerconf.xml} | 0 modules/interpreter/help/en_US/xml/switch.xml | 2 +- .../memory_manager/help/en_US/xml/global.xml | 2 +- .../cpp/detectImportOptionsBuiltin.cpp | 2 +- .../builtin/cpp/readcellBuiltin.cpp | 7 +- .../builtin/cpp/readtableBuiltin.cpp | 7 +- modules/spreadsheet/functions/readtableold.m | 46 -- .../help/en_US/xml/detectImportOptions.xml | 86 ++ .../spreadsheet/help/en_US/xml/readcell.xml | 115 +++ .../spreadsheet/help/en_US/xml/readtable.xml | 111 +++ .../src/cpp/DetectImportOptions.cpp | 8 +- modules/spreadsheet/src/cpp/ReadTable.cpp | 4 +- .../src/include/DetectImportOptions.hpp | 7 + modules/spreadsheet/tests/test_readtable.m | 23 +- .../spreadsheet/tests/test_readtable_4.csv | 6 + .../builtin/c/nlsString_builtin.vcxproj | 4 + .../c/nlsString_builtin.vcxproj.filters | 12 + modules/string/builtin/cpp/Gateway.cpp | 4 + modules/string/builtin/cpp/joinBuiltin.cpp | 76 ++ modules/string/builtin/cpp/strjustBuiltin.cpp | 53 ++ .../string/builtin/include/joinBuiltin.hpp | 20 + .../string/builtin/include/strjustBuiltin.hpp | 20 + modules/string/functions/@cell/strjust.m | 35 - modules/string/functions/@string/strjust.m | 32 - modules/string/functions/strjust.m | 83 -- modules/string/help/en_US/xml/join.xml | 105 +++ modules/string/help/en_US/xml/strcat.xml | 4 + modules/string/module.iss | 2 - modules/string/src/c/nlsString.vcxproj | 4 + .../string/src/c/nlsString.vcxproj.filters | 12 + modules/string/src/cpp/StringJoin.cpp | 756 ++++++++++++++++++ modules/string/src/cpp/StringJustify.cpp | 98 +++ modules/string/src/include/StringJoin.hpp | 21 + modules/string/src/include/StringJustify.hpp | 28 + modules/string/tests/bench_join.m | 11 + modules/string/tests/test_join.m | 230 ++++++ modules/table/functions/@table/disp.m | 129 ++- modules/table/functions/@table/subsref.m | 58 +- modules/types/src/cpp/ArrayOf_StringType.cpp | 4 +- modules/types/src/include/ArrayOf.hpp | 4 +- tools/missing_help/help_ignore.txt | 354 ++++---- 47 files changed, 2169 insertions(+), 472 deletions(-) rename modules/com_engine/help/en_US/xml/{actxGetRunningSrv.xml => actxGetRunningServer.xml} (88%) rename modules/data_structures/help/en_US/xml/{num2cellxml => num2cell.xml} (72%) rename modules/dynamic_link/help/en_US/xml/{loadcompiler.xml => loadcompilerconf.xml} (100%) delete mode 100644 modules/spreadsheet/functions/readtableold.m create mode 100644 modules/spreadsheet/help/en_US/xml/detectImportOptions.xml create mode 100644 modules/spreadsheet/help/en_US/xml/readcell.xml create mode 100644 modules/spreadsheet/help/en_US/xml/readtable.xml create mode 100644 modules/spreadsheet/tests/test_readtable_4.csv create mode 100644 modules/string/builtin/cpp/joinBuiltin.cpp create mode 100644 modules/string/builtin/cpp/strjustBuiltin.cpp create mode 100644 modules/string/builtin/include/joinBuiltin.hpp create mode 100644 modules/string/builtin/include/strjustBuiltin.hpp delete mode 100644 modules/string/functions/@cell/strjust.m delete mode 100644 modules/string/functions/@string/strjust.m delete mode 100644 modules/string/functions/strjust.m create mode 100644 modules/string/help/en_US/xml/join.xml create mode 100644 modules/string/src/cpp/StringJoin.cpp create mode 100644 modules/string/src/cpp/StringJustify.cpp create mode 100644 modules/string/src/include/StringJoin.hpp create mode 100644 modules/string/src/include/StringJustify.hpp create mode 100644 modules/string/tests/bench_join.m create mode 100644 modules/string/tests/test_join.m diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fde50fe3f..e2c48f9f56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `detectImportOptions`: Generate import options from the file's content. +- `readcell`: Read cell array from file. +- `readtable`: Read table from file. - `writetable`: Write table to file. - `writecell`: write cell array to file. - `writematrix`: write matrix to file. @@ -17,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `dlmread`: Read ASCII-delimited file of numeric data into matrix. - `realmin`: Smallest normalized floating-point number. - [#1288](http://github.com/nelson-lang/nelson/issues/1288) `mustBeMatrix`, `mustBeRow`, `mustBeColumn` validator functions. +- `join`: Combine strings. +- [#1292](http://github.com/nelson-lang/nelson/issues/1292) Large Table Display. ### Changed @@ -25,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `title`: `Visible` property is inherited from the parent if not explicitly defined. - i18n: migration PO files to JSON. - `dlmwrite`: rework the function to be more fast and robust. +- `strjust`: rework the function to be more fast and robust. ### Fixed diff --git a/modules/com_engine/help/en_US/xml/actxGetRunningSrv.xml b/modules/com_engine/help/en_US/xml/actxGetRunningServer.xml similarity index 88% rename from modules/com_engine/help/en_US/xml/actxGetRunningSrv.xml rename to modules/com_engine/help/en_US/xml/actxGetRunningServer.xml index 1d378acf65..77cfc3a885 100644 --- a/modules/com_engine/help/en_US/xml/actxGetRunningSrv.xml +++ b/modules/com_engine/help/en_US/xml/actxGetRunningServer.xml @@ -3,12 +3,12 @@ SAME AS NELSON SOFTWARE en_US - actxGetRunningSrv + actxGetRunningServer Handle to running instance of Automation server. - h = actxGetRunningSrv(progid) + h = actxGetRunningServer(progid) @@ -30,7 +30,7 @@

h = actxGetRunningSrv(progid) gets a reference to a running instance of the OLE/COM Automation server.

+ >h = actxGetRunningServer(progid) gets a reference to a running instance of the OLE/COM Automation server.

progid is the programmatic identifier of the Automation server object and h is the handle to the default interface of the server object.

diff --git a/modules/core/src/cpp/Banner.cpp b/modules/core/src/cpp/Banner.cpp index 2d4484a441..9c6621dcbc 100644 --- a/modules/core/src/cpp/Banner.cpp +++ b/modules/core/src/cpp/Banner.cpp @@ -15,11 +15,11 @@ void Banner() { NelsonPrint(L"\n"); - NelsonPrint(L" __ _ __\n"); - NelsonPrint(L" /\\ \\ \\___| |/ _\\ ___ _ __\n"); - NelsonPrint(L" / \\/ / _ | |\\ \\ / _ \\| '_ \\\n"); - NelsonPrint(L"/ /\\ | __| |_\\ | (_) | | | |\n"); - NelsonPrint(L"\\_\\ \\/ \\___|_|\\__/\\___/|_| |_|\n"); + NelsonPrint(L" _ __ __\n"); + NelsonPrint(L" / | / /__ / /________ ____\n"); + NelsonPrint(L" / |/ / _ \\/ / ___/ __ \\/ __ \\\n"); + NelsonPrint(L" / /| / __/ (__ ) /_/ / / / /\n"); + NelsonPrint(L"/_/ |_/\\___/_/____/\\____/_/ /_/\n"); } //============================================================================= } // namespace Nelson diff --git a/modules/data_structures/help/en_US/xml/cellfun.xml b/modules/data_structures/help/en_US/xml/cellfun.xml index ce52ca68bb..28263d7ae0 100644 --- a/modules/data_structures/help/en_US/xml/cellfun.xml +++ b/modules/data_structures/help/en_US/xml/cellfun.xml @@ -81,7 +81,7 @@ f = str2func('size'); y; -endfunction +end function result = errorfun(S, varargin) disp(nargin()) @@ -91,7 +91,7 @@ function result = errorfun(S, varargin) disp(varargin{1}) disp(varargin{2}) result = false; -endfunction]]> +end]]> diff --git a/modules/data_structures/help/en_US/xml/num2cellxml b/modules/data_structures/help/en_US/xml/num2cell.xml similarity index 72% rename from modules/data_structures/help/en_US/xml/num2cellxml rename to modules/data_structures/help/en_US/xml/num2cell.xml index 06ecacb074..82c7028c2f 100644 --- a/modules/data_structures/help/en_US/xml/num2cellxml +++ b/modules/data_structures/help/en_US/xml/num2cell.xml @@ -4,7 +4,8 @@ en_US num2cell - Convert array to cell array with consistently sized cells. + Convert array to cell array with consistently sized cells. C = num2cell(A) @@ -19,7 +20,8 @@ dim - positive integer value or positive vector of integers. + positive integer value or positive vector of integers.
@@ -33,8 +35,10 @@ -

num2cell function converts a numeric array into a cell array, where each element of the numeric array is placed in its own cell in the resulting cell array.

-

If A is a character array, num2cell will convert each row of the array into a separate cell in the resulting cell array.

+

num2cell function converts a numeric array into a cell array, where each element of the numeric array is placed in its own cell in the resulting cell array.

+

If A is a character array, num2cell will convert each row of the array into a separate cell in the resulting cell array.

diff --git a/modules/display_format/functions/formattedDisplayText.m b/modules/display_format/functions/formattedDisplayText.m index e2a5bbd5c9..1002bee559 100644 --- a/modules/display_format/functions/formattedDisplayText.m +++ b/modules/display_format/functions/formattedDisplayText.m @@ -8,7 +8,7 @@ % LICENCE_BLOCK_END %============================================================================= function result = formattedDisplayText(varargin) - nbArgsValid = (nargin >= 1) && (mod(nargin, 2) == 1); + nbArgsValid = mod(nargin, 2) == 1; if ~nbArgsValid error(_('Wrong number of input arguments.')); end @@ -25,23 +25,21 @@ validField = false; name = lower(args{i}); value = args{i + 1}; - if strcmp(name, 'numericformat') + switch name + case 'numericformat' validateNumericFormat(value, i + 1); newFormat.NumericFormat = value; validField = true; - end - if strcmp(name, 'linespacing') + + case 'linespacing' validateLineSpacing(value, i + 1); newFormat.LineSpacing = value; validField = true; - end - if strcmp(name, 'suppressmarkup') - % not managed -> ignored - validField = true; - end - if strcmp(name, 'usetruefalseforlogical') + + case {'suppressmarkup', 'usetruefalseforlogical'} % not managed -> ignored validField = true; + end if ~validField msg = sprintf(_('Invalid name-value argument: %s.'), args{i}); diff --git a/modules/dynamic_link/help/en_US/xml/loadcompiler.xml b/modules/dynamic_link/help/en_US/xml/loadcompilerconf.xml similarity index 100% rename from modules/dynamic_link/help/en_US/xml/loadcompiler.xml rename to modules/dynamic_link/help/en_US/xml/loadcompilerconf.xml diff --git a/modules/interpreter/help/en_US/xml/switch.xml b/modules/interpreter/help/en_US/xml/switch.xml index 30c1d44bca..e94b925429 100644 --- a/modules/interpreter/help/en_US/xml/switch.xml +++ b/modules/interpreter/help/en_US/xml/switch.xml @@ -36,7 +36,7 @@ otherwise c = 'not sure'; end -endfunction +end ]]> diff --git a/modules/memory_manager/help/en_US/xml/global.xml b/modules/memory_manager/help/en_US/xml/global.xml index e5a9698794..b5799e4a3e 100644 --- a/modules/memory_manager/help/en_US/xml/global.xml +++ b/modules/memory_manager/help/en_US/xml/global.xml @@ -36,7 +36,7 @@ > 1) && isa(varargin{2}, 'DelimitedTextImportOptions') - options = varargin{2}; - else - options = detectImportOptions(filename); - end - ce = readcell(filename, options); - variableNames = options.VariableNames; - VariableNamesLine = options.VariableNamesLine; - RowNamesColumn = options.RowNamesColumn; - if ~isempty(variableNames) && (VariableNamesLine > 0) - ce(VariableNamesLine, :) = []; - end - if RowNamesColumn > 0 - variableNames(VariableNamesLine) = []; - rowNames = ce(:, RowNamesColumn); - ce(:, RowNamesColumn) = []; - else - rowNames = {}; - end - args = {}; - if ~isempty(variableNames) - args = [args, 'VariableNames', {variableNames}]; - end - if ~isempty(rowNames) - args = [args, 'RowNames', {rowNames'}]; - end - if isrow(ce) - varargout{1} = table(ce{:}, args{:}); - else - varargout{1} = table(ce, args{:}); - end -end -%============================================================================= diff --git a/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml b/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml new file mode 100644 index 0000000000..2d03e87a02 --- /dev/null +++ b/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml @@ -0,0 +1,86 @@ + + + SAME AS NELSON SOFTWARE + + en_US + detectImportOptions + Create import options based on file content. + + + options = detectImportOptions(filename) + + + + + filename + a string: filename source. + + + + + + options + DelimitedTextImportOptions object. + + + + + +

options = detectImportOptions(filename) identifies a table in a file and returns an import options object.

+

You can customize this object and use it with readtable, readcell or readmatrix to control how Nelson imports data as a table, cell array, or matrix.

+

The type of the returned options object depends on the file's extension.

+
+ + + + + + + nelson + + +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T = table(Names, Age, Height, Weight); +writetable(T, [tempdir,'readcell_1.csv']) +options = detectImportOptions([tempdir,'readcell_1.csv']) +C1 = readcell([tempdir,'readcell_1.csv'], options) +options.DataLines = [1 Inf] +C2 = readcell([tempdir,'readcell_1.csv'], options) + + + + + + + + + readcell + + + readtable + + + readmatrix + + + + + + 1.10.0 + initial version + + + + + Allan CORNET + +
diff --git a/modules/spreadsheet/help/en_US/xml/readcell.xml b/modules/spreadsheet/help/en_US/xml/readcell.xml new file mode 100644 index 0000000000..bd432c5858 --- /dev/null +++ b/modules/spreadsheet/help/en_US/xml/readcell.xml @@ -0,0 +1,115 @@ + + + SAME AS NELSON SOFTWARE + + en_US + readcell + Create cell array from file. + + + C = readcell(filename) + C = readcell(filename, opts) + + + + + filename + a string: filename source. + + + + opts + DelimitedTextImportOptions object + + + + + + + C + a cell. + + + + + +

C = readcell(filename) creates a cell array by importing column-oriented data from a text or spreadsheet file.

+

C = readcell(filename, opts) creates a cell array using the settings defined in the opts import options object. The import options object allows you to customize how readcell interprets the file, offering greater control, improved performance, and the ability to reuse the configuration compared to the default syntax.

+
+ + + + + + + + nelson + + +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T = table(Names, Age, Height, Weight); +writetable(T, [tempdir,'readcell_1.csv']) +C = readcell([tempdir,'readcell_1.csv']) + + + + + + nelson + + +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T = table(Names, Age, Height, Weight); +writetable(T, [tempdir,'readcell_1.csv']) +options = detectImportOptions([tempdir,'readcell_1.csv']); +C1 = readcell([tempdir,'readcell_1.csv'], options) +options.DataLines = [1 Inf] +C2 = readcell([tempdir,'readcell_1.csv'], options) + + + + + + + + + writecell + + + detectImportOptions + + + writetable + + + readtable + + + fileread + + + + + + 1.10.0 + initial version + + + + + Allan CORNET + +
diff --git a/modules/spreadsheet/help/en_US/xml/readtable.xml b/modules/spreadsheet/help/en_US/xml/readtable.xml new file mode 100644 index 0000000000..0bfc58b0e6 --- /dev/null +++ b/modules/spreadsheet/help/en_US/xml/readtable.xml @@ -0,0 +1,111 @@ + + + SAME AS NELSON SOFTWARE + + en_US + readtable + Create table from file. + + + T = readtable(filename) + T = readtable(filename, opts) + + + + + filename + a string: filename source. + + + + opts + DelimitedTextImportOptions object + + + + + + + T + a table. + + + + + +

T = readtable(filename) creates a table by importing column-oriented data from a text or spreadsheet file.

+

T = readtable(filename, opts) creates a table using the settings defined in the opts import options object. The import options object allows you to customize how readtable interprets the file, offering greater control, improved performance, and the ability to reuse the configuration compared to the default syntax.

+
+ + + + + + + + nelson + + +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T1 = table(Names, Age, Height, Weight); +writetable(T1, [tempdir,'readtable_1.csv']) +T2 = readtable([tempdir,'readtable_1.csv']) + + + + + + nelson + + +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T = table(Names, Age, Height, Weight); +writetable(T, [tempdir,'readtable_1.csv']) +options = detectImportOptions([tempdir,'readtable_1.csv']); +T1 = readtable([tempdir,'readtable_1.csv'], options) +options.DataLines = [1 Inf] +T2 = readtable([tempdir,'readtable_1.csv'], options) + + + + + + + + writetable + + + detectImportOptions + + + readcell + + + fileread + + + + + + 1.10.0 + initial version + + + + + Allan CORNET + +
diff --git a/modules/spreadsheet/src/cpp/DetectImportOptions.cpp b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp index fca143cf3b..28295ad657 100644 --- a/modules/spreadsheet/src/cpp/DetectImportOptions.cpp +++ b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp @@ -156,7 +156,7 @@ struct DelimiterStats size_t totalCount; }; //============================================================================= -static DelimiterStats +DelimiterStats analyzeDelimiterInLine(const std::string& line, const std::string& delimiter) { DelimiterStats stats { delimiter, 0.0, 0, 0 }; @@ -254,7 +254,7 @@ detectDelimiter(const std::vector& lines, detectImportOptions& opti if (bestDelimiter != delimiterStats.end() && bestDelimiter->totalCount > 0) { options.Delimiter = { bestDelimiter->delimiter }; stringVector defaultVariableNames; - size_t nbElements = bestDelimiter->averageCount + 1; + size_t nbElements = (size_t)bestDelimiter->averageCount + 1; defaultVariableNames.resize(nbElements); for (size_t k = 0; k < nbElements; ++k) { defaultVariableNames[k] = "Var" + std::to_string((int)(k + 1)); @@ -437,7 +437,7 @@ detectColumnsAndRowNames(std::vector& lines, const std::string& del options.VariableNamesLine = 0; } else { options.VariableNames = columnNames; - options.VariableNamesLine = headerLineIndex + 1; + options.VariableNamesLine = (int)(headerLineIndex + 1); } } } @@ -473,7 +473,7 @@ detectDataLines(std::vector& lines, detectImportOptions& options) } break; } - options.DataLines[0] = dataLineStart + 1; + options.DataLines[0] = (double)(dataLineStart + 1); } //============================================================================= // Public Interface diff --git a/modules/spreadsheet/src/cpp/ReadTable.cpp b/modules/spreadsheet/src/cpp/ReadTable.cpp index 57c63e69e3..6debdfc887 100644 --- a/modules/spreadsheet/src/cpp/ReadTable.cpp +++ b/modules/spreadsheet/src/cpp/ReadTable.cpp @@ -292,8 +292,8 @@ ReadTable( columnValues[c] = ArrayOf(NLS_DOUBLE, dims, ptr); } break; case NLS_DCOMPLEX: { - std::complex* ptr - = (std::complex*)ArrayOf::allocateArrayOf(NLS_DCOMPLEX, nbRows); + std::complex* ptr = reinterpret_cast*>( + ArrayOf::allocateArrayOf(NLS_DCOMPLEX, nbRows)); #if WITH_OPENMP #pragma omp for #endif diff --git a/modules/spreadsheet/src/include/DetectImportOptions.hpp b/modules/spreadsheet/src/include/DetectImportOptions.hpp index a1062d0ac5..5f6ec9f5ed 100644 --- a/modules/spreadsheet/src/include/DetectImportOptions.hpp +++ b/modules/spreadsheet/src/include/DetectImportOptions.hpp @@ -15,6 +15,10 @@ //============================================================================= namespace Nelson { //============================================================================= +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4251) +#endif class NLSSPREADSHEET_IMPEXP detectImportOptions { public: @@ -28,6 +32,9 @@ class NLSSPREADSHEET_IMPEXP detectImportOptions std::vector VariableNames; std::vector DataLines; }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif //============================================================================= NLSSPREADSHEET_IMPEXP void initializeDetectImportOptions(detectImportOptions& options); diff --git a/modules/spreadsheet/tests/test_readtable.m b/modules/spreadsheet/tests/test_readtable.m index 0046e2e0a0..4014b5e0c9 100644 --- a/modules/spreadsheet/tests/test_readtable.m +++ b/modules/spreadsheet/tests/test_readtable.m @@ -43,14 +43,15 @@ R = readtable(csv_filename); assert_isequal(R, REF) %============================================================================= - - - - - - - - - - - +LastName = {'Sanchez';'Johnson';'Li';'Diaz';'Brown'}; +Age = [38;43;38;40;49]; +Smoker = [1;NaN;NaN;0;1]; +Height = [71;69;64;67;64]; +Weight = [176;163;131;133;119]; +BloodPressure_1 = [124; 109; 125; 117; 122]; +BloodPressure_2 = [93;77;83;75;80]; +REF = table(LastName, Age, Smoker, Height, Weight, BloodPressure_1, BloodPressure_2, 'VariableNames', {'Last Name','Age','Smoker','Height','Weight','BloodPressure 1','BloodPressure 2'}); +csv_filename = [modulepath('spreadsheet'), '/tests/test_readtable_4.csv']; +R = readtable(csv_filename); +assert_isequal(R, REF) +%============================================================================= diff --git a/modules/spreadsheet/tests/test_readtable_4.csv b/modules/spreadsheet/tests/test_readtable_4.csv new file mode 100644 index 0000000000..09a19fee3b --- /dev/null +++ b/modules/spreadsheet/tests/test_readtable_4.csv @@ -0,0 +1,6 @@ +Last Name,Age,Smoker,Height,Weight,BloodPressure 1,BloodPressure 2 +Sanchez,38,1,71,176,124,93 +Johnson,43,'0',69,163,109,77 +Li,38,'1',64,131,125,83 +Diaz,40,0,67,133,117,75 +Brown,49,1,64,119,122,80 diff --git a/modules/string/builtin/c/nlsString_builtin.vcxproj b/modules/string/builtin/c/nlsString_builtin.vcxproj index 3e1e5fab76..577a07cbe5 100644 --- a/modules/string/builtin/c/nlsString_builtin.vcxproj +++ b/modules/string/builtin/c/nlsString_builtin.vcxproj @@ -192,6 +192,7 @@ + @@ -204,6 +205,7 @@ + @@ -250,6 +252,7 @@ + @@ -262,6 +265,7 @@ + diff --git a/modules/string/builtin/c/nlsString_builtin.vcxproj.filters b/modules/string/builtin/c/nlsString_builtin.vcxproj.filters index 87ec5454c6..8fd3a708ab 100644 --- a/modules/string/builtin/c/nlsString_builtin.vcxproj.filters +++ b/modules/string/builtin/c/nlsString_builtin.vcxproj.filters @@ -108,6 +108,12 @@ Source Files + + Source Files + + + Source Files +
@@ -203,6 +209,12 @@ Header Files + + Header Files + + + Header Files + diff --git a/modules/string/builtin/cpp/Gateway.cpp b/modules/string/builtin/cpp/Gateway.cpp index 9e7ed789ab..5b888a668a 100644 --- a/modules/string/builtin/cpp/Gateway.cpp +++ b/modules/string/builtin/cpp/Gateway.cpp @@ -41,6 +41,8 @@ #include "strcatBuiltin.hpp" #include "appendBuiltin.hpp" #include "isletterBuiltin.hpp" +#include "joinBuiltin.hpp" +#include "strjustBuiltin.hpp" //============================================================================= using namespace Nelson; //============================================================================= @@ -83,6 +85,8 @@ static const nlsGateway gateway[] = { { "strcat", (ptrBuiltin)Nelson::StringGateway::strcatBuiltin, 1, -1 }, { "append", (ptrBuiltin)Nelson::StringGateway::appendBuiltin, 1, -1 }, { "isletter", (ptrBuiltin)Nelson::StringGateway::isletterBuiltin, 1, 1 }, + { "join", (ptrBuiltin)Nelson::StringGateway::joinBuiltin, 1, -2 }, + { "strjust", (ptrBuiltin)Nelson::StringGateway::strjustBuiltin, 1, 2 }, }; //============================================================================= NLSGATEWAYFUNC(gateway) diff --git a/modules/string/builtin/cpp/joinBuiltin.cpp b/modules/string/builtin/cpp/joinBuiltin.cpp new file mode 100644 index 0000000000..29ca38a608 --- /dev/null +++ b/modules/string/builtin/cpp/joinBuiltin.cpp @@ -0,0 +1,76 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "joinBuiltin.hpp" +#include "StringJoin.hpp" +#include "Error.hpp" +#include "i18n.hpp" +#include "InputOutputArgumentsCheckers.hpp" +#include "OverloadRequired.hpp" +//============================================================================= +using namespace Nelson; +//============================================================================= +ArrayOfVector +Nelson::StringGateway::joinBuiltin(int nLhs, const ArrayOfVector& argIn) +{ + ArrayOfVector retval; + nargoutcheck(nLhs, 0, 1); + nargincheck(argIn, 1, 3); + ArrayOf delimiters; + ArrayOf A = argIn[0]; + std::vector vec = A.getDimensions().getAsVector(); + indexType dim = 0; + if (!A.isEmpty()) { + for (int k = static_cast(vec.size()) - 1; k >= 0; --k) { + if (vec[k] != 0 && vec[k] != 1) { + dim = k; + break; + } + } + } + dim += 1; + switch (argIn.size()) { + case 1: { + delimiters = ArrayOf::stringArrayConstructor(L" "); + } break; + case 2: { + if (argIn[1].isRowVectorCharacterArray()) { + delimiters = ArrayOf::stringArrayConstructor(argIn[1].getContentAsWideString()); + } else if (argIn[1].isStringArray()) { + delimiters = argIn[1]; + } else if (argIn[1].isCellArrayOfCharacterVectors()) { + delimiters = ArrayOf::stringArrayConstructor( + argIn[1].getContentAsWideStringVector(false), argIn[1].getDimensions()); + } else { + dim = argIn[1].getContentAsScalarIndex(false, true); + delimiters = ArrayOf::stringArrayConstructor(" "); + } + } break; + case 3: { + if (argIn[1].isRowVectorCharacterArray()) { + delimiters = ArrayOf::stringArrayConstructor(argIn[1].getContentAsWideString()); + } else if (argIn[1].isStringArray()) { + delimiters = argIn[1]; + } else if (argIn[1].isCellArrayOfCharacterVectors()) { + delimiters = ArrayOf::stringArrayConstructor( + argIn[1].getContentAsWideStringVector(false), argIn[1].getDimensions()); + } else { + Error(_W( + "Wrong type for argument #3: string, characters or cell of characters expected.")); + } + dim = argIn[2].getContentAsScalarIndex(false, true); + } break; + default: { + Error(ERROR_WRONG_NUMBERS_INPUT_ARGS); + } break; + } + retval << StringJoin(A, delimiters, dim); + return retval; +} +//============================================================================= diff --git a/modules/string/builtin/cpp/strjustBuiltin.cpp b/modules/string/builtin/cpp/strjustBuiltin.cpp new file mode 100644 index 0000000000..b7c4bbf831 --- /dev/null +++ b/modules/string/builtin/cpp/strjustBuiltin.cpp @@ -0,0 +1,53 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "strjustBuiltin.hpp" +#include "StringJustify.hpp" +#include "Error.hpp" +#include "i18n.hpp" +#include "InputOutputArgumentsCheckers.hpp" +#include "OverloadRequired.hpp" +//============================================================================= +using namespace Nelson; +//============================================================================= +ArrayOfVector +Nelson::StringGateway::strjustBuiltin(int nLhs, const ArrayOfVector& argIn) +{ + ArrayOfVector retval; + nargincheck(argIn, 1, 2); + nargoutcheck(nLhs, 0, 1); + ArrayOf A = argIn[0]; + if (A.isEmpty()) { + retval << A; + return retval; + } + if ((A.isNumeric() || A.isLogical()) && !A.isSparse()) { + A.promoteType(NLS_CHAR); + } + if (A.isCharacterArray() || A.isStringArray() || A.isCellArrayOfCharacterVectors()) { + STRINGJUSTIFY side = STRINGJUSTIFY::NLS_JUSTIFY_LEFT; + if (argIn.size() == 2) { + std::wstring style = argIn[1].getContentAsWideString(); + if (style == L"left") { + side = STRINGJUSTIFY::NLS_JUSTIFY_LEFT; + } else if (style == L"center") { + side = STRINGJUSTIFY::NLS_JUSTIFY_CENTER; + } else if (style == L"right") { + side = STRINGJUSTIFY::NLS_JUSTIFY_RIGHT; + } else { + Error(_W("Wrong value for #2 argument: 'left', 'right', 'center' expected.")); + } + } + retval << StringJustify(A, side); + } else { + OverloadRequired("strjust"); + } + return retval; +} +//============================================================================= diff --git a/modules/string/builtin/include/joinBuiltin.hpp b/modules/string/builtin/include/joinBuiltin.hpp new file mode 100644 index 0000000000..cd2ff9b35a --- /dev/null +++ b/modules/string/builtin/include/joinBuiltin.hpp @@ -0,0 +1,20 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +//============================================================================= +namespace Nelson::StringGateway { +//============================================================================= +ArrayOfVector +joinBuiltin(int nLhs, const ArrayOfVector& argIn); +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/string/builtin/include/strjustBuiltin.hpp b/modules/string/builtin/include/strjustBuiltin.hpp new file mode 100644 index 0000000000..e7f7d7fbfa --- /dev/null +++ b/modules/string/builtin/include/strjustBuiltin.hpp @@ -0,0 +1,20 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +//============================================================================= +namespace Nelson::StringGateway { +//============================================================================= +ArrayOfVector +strjustBuiltin(int nLhs, const ArrayOfVector& argIn); +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/string/functions/@cell/strjust.m b/modules/string/functions/@cell/strjust.m deleted file mode 100644 index 6462cc7cfd..0000000000 --- a/modules/string/functions/@cell/strjust.m +++ /dev/null @@ -1,35 +0,0 @@ -%============================================================================= -% Copyright (c) 2016-present Allan CORNET (Nelson) -%============================================================================= -% This file is part of the Nelson. -%============================================================================= -% LICENCE_BLOCK_BEGIN -% SPDX-License-Identifier: LGPL-3.0-or-later -% LICENCE_BLOCK_END -%============================================================================= -function varargout = strjust(varargin) - narginchk(1, 2); - nargoutchk(0, 1); - - if nargin == 1 - justify = 'right'; - else - justify = lower(varargin{2}); - end - mustBeMember(justify, ["left", "right", "center"], 2); - - str = varargin{1}; - if ~iscellstr(str) - error(_('String, cell of chars or characters vector expected.')); - end - result = str; - for k = 1:numel(str) - if ischar(str{k}) - result{k} = strjust(str{k}, justify); - else - error(_('String, cell of chars or characters vector expected.')); - end - end - varargout{1} = result; -end -%============================================================================= diff --git a/modules/string/functions/@string/strjust.m b/modules/string/functions/@string/strjust.m deleted file mode 100644 index fb0903010c..0000000000 --- a/modules/string/functions/@string/strjust.m +++ /dev/null @@ -1,32 +0,0 @@ -%============================================================================= -% Copyright (c) 2016-present Allan CORNET (Nelson) -%============================================================================= -% This file is part of the Nelson. -%============================================================================= -% LICENCE_BLOCK_BEGIN -% SPDX-License-Identifier: LGPL-3.0-or-later -% LICENCE_BLOCK_END -%============================================================================= -function varargout = strjust(varargin) - narginchk(1, 2); - nargoutchk(0, 1); - - if nargin == 1 - justify = 'right'; - else - justify = lower(varargin{2}); - end - mustBeMember(justify, ["left", "right", "center"], 2); - - str = varargin{1}; - result = str; - for k = 1:numel(str) - if ischar(str{k}) - result{k} = strjust(str{k}, justify); - else - error(_('String, cell of chars or characters vector expected.')); - end - end - varargout{1} = result; -end -%============================================================================= diff --git a/modules/string/functions/strjust.m b/modules/string/functions/strjust.m deleted file mode 100644 index 1374d4253b..0000000000 --- a/modules/string/functions/strjust.m +++ /dev/null @@ -1,83 +0,0 @@ -%============================================================================= -% Copyright (c) 2016-present Allan CORNET (Nelson) -%============================================================================= -% This file is part of the Nelson. -%============================================================================= -% LICENCE_BLOCK_BEGIN -% SPDX-License-Identifier: LGPL-3.0-or-later -% LICENCE_BLOCK_END -%============================================================================= -function varargout = strjust(varargin) - narginchk(1, 2); - nargoutchk(0, 1); - - if nargin == 1 - justify = 'right'; - else - justify = lower(varargin{2}); - end - mustBeMember(justify, ["left", "right", "center"], 2); - - str = varargin{1}; - % Handle empty input - if isempty(str) - varargout{1} = str; - return; - end - - if isnumeric(str) - str = char(str); - end - - % Get the size of the input string - [m, n] = size(str); - - % Initialize justifiedText as a space-filled matrix of the same size - justifiedText = repmat(' ', m, n); - - % Determine the column to add spaces - if (strcmp(justify, 'left') && ~any(str(:, 1) == ' ')) || (strcmp(justify, 'right') && ~any(str(:, n) == ' ')) - varargout{1} = char(str); - return; - end - % Find row and column indices of non-space characters - isChar = (str ~= 0 & str ~= ' '); - [nr, nc] = find(isChar); - - % Determine how to justify the text - if strcmp(justify, 'left') - shift = shiftLeft(isChar); - elseif strcmp(justify, 'right') - shift = shiftRight(isChar); - else - shift = shiftCenter(isChar); - end - - % Calculate input and output positions for character copying - posIn = nr + (nc - 1) * m; - posOut = nr + (nc + shift(nr)' - 1) * m; - % Copy characters to the justifiedText matrix - justifiedText(posOut) = str(posIn); - - varargout{1} = justifiedText; -end -%============================================================================= -function shift = shiftLeft(isChar) - % For left justification, find the leftmost non-space character in each row - [dummy, shift] = max(isChar, [], 2); - shift = 1 - shift; -end -%============================================================================= -function shift = shiftRight(isChar) - % For right justification, find the rightmost non-space character in each row - [dummy, shift] = max(fliplr(isChar), [], 2); - shift = shift - 1; -end -%============================================================================= -function shift = shiftCenter(isChar) - % For center justification, find the middle point between leftmost and rightmost non-space characters - [dummy, shiftBefore] = max(isChar, [], 2); - [dummy, shiftAfter] = max(fliplr(isChar), [], 2); - shift = floor((shiftAfter - shiftBefore) / 2); -end -%============================================================================= diff --git a/modules/string/help/en_US/xml/join.xml b/modules/string/help/en_US/xml/join.xml new file mode 100644 index 0000000000..9876f1b146 --- /dev/null +++ b/modules/string/help/en_US/xml/join.xml @@ -0,0 +1,105 @@ + + + SAME AS NELSON SOFTWARE + + en_US + join + Combine strings. + + + res = join(str) + res = join(str, delimiter) + res = join(str, dim) + res = join(str, delimiter, dim) + + + + + str + a string, string array or cell of strings. + + + delimiter + a string, string array or cell of strings:Characters used to separate and join strings. + + + dim + positive integer: Dimension along which to join strings. + + + + + + + res + a string, string array or cell of strings. + + + + +

res = join(str) combines the elements of str into a single text by joining them with a space character as the default delimiter.

+

The input, str, can be either a string array or a cell array of character vectors. The output, res, has the same data type as str.

+ +

If str is a 1-by-N or N-by-1 string array or cell array, res will be a string scalar or a cell array containing a single character vector.

+

If str is an M-by-N string array or cell array, res will be an M-by-1 string array or cell array.

+

For arrays of any size, join concatenates elements along the last dimension with a size greater than 1.

+ +

res = join(str, delimiter) joins the elements of str using the specified delimiter instead of the default space character.

+

If delimiter is an array of multiple delimiters, and str has N elements along the joining dimension, delimiter must have N–1 elements along the same dimension. All other dimensions of delimiter must either have size 1 or match the size of the corresponding dimensions of str.

+

res = join(str, dim) combines the elements of str along the specified dimension dim.

+ +

res = join(str, delimiter, dim) joins the elements of str along the specified dimension dim, using delimiter to separate them.

+
+ + + + + + + nelson + + + + + + + + + append + + + strcat + + + + + + + 1.10.0 + initial version + + + + + Allan CORNET + +
diff --git a/modules/string/help/en_US/xml/strcat.xml b/modules/string/help/en_US/xml/strcat.xml index 0327c22c58..a8967fd8d7 100644 --- a/modules/string/help/en_US/xml/strcat.xml +++ b/modules/string/help/en_US/xml/strcat.xml @@ -60,6 +60,10 @@ C = strcat(A, B)]]> append + + join + + diff --git a/modules/string/module.iss b/modules/string/module.iss index 6ffcbdca77..0274ddae7f 100644 --- a/modules/string/module.iss +++ b/modules/string/module.iss @@ -17,8 +17,6 @@ Source: {#RootPath}modules\{#MODULE_NAME}\etc\startup.m; DestDir: {app}\modules\ Source: {#RootPath}modules\{#MODULE_NAME}\etc\finish.m; DestDir: {app}\modules\{#MODULE_NAME}\etc\; ;============================================================================== Source: {#RootPath}modules\{#MODULE_NAME}\functions\*.m; DestDir: {app}\modules\{#MODULE_NAME}\functions\; -Source: {#RootPath}modules\{#MODULE_NAME}\functions\@cell\*.m; DestDir: {app}\modules\{#MODULE_NAME}\functions\@cell; -Source: {#RootPath}modules\{#MODULE_NAME}\functions\@string\*.m; DestDir: {app}\modules\{#MODULE_NAME}\functions\@string; ;============================================================================== Source: {#RootPath}modules\{#MODULE_NAME}\help\*.qch; DestDir: {app}\modules\{#MODULE_NAME}\help\; Flags: recursesubdirs;Components: {#COMPONENT_HELP_FILES} and {#COMPONENT_HELP_BROWSER}; ;============================================================================== diff --git a/modules/string/src/c/nlsString.vcxproj b/modules/string/src/c/nlsString.vcxproj index b9003ab902..3293e8d38a 100644 --- a/modules/string/src/c/nlsString.vcxproj +++ b/modules/string/src/c/nlsString.vcxproj @@ -189,6 +189,8 @@ + + @@ -218,6 +220,8 @@ + + diff --git a/modules/string/src/c/nlsString.vcxproj.filters b/modules/string/src/c/nlsString.vcxproj.filters index d434eaea8f..99e19e8570 100644 --- a/modules/string/src/c/nlsString.vcxproj.filters +++ b/modules/string/src/c/nlsString.vcxproj.filters @@ -90,6 +90,12 @@ Source Files + + Source Files + + + Source Files +
@@ -167,6 +173,12 @@ Header Files + + Header Files + + + Header Files + diff --git a/modules/string/src/cpp/StringJoin.cpp b/modules/string/src/cpp/StringJoin.cpp new file mode 100644 index 0000000000..bdc6dda9d9 --- /dev/null +++ b/modules/string/src/cpp/StringJoin.cpp @@ -0,0 +1,756 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "StringJoin.hpp" +#include "Error.hpp" +#include "i18n.hpp" +#include "nlsBuildConfig.h" +//============================================================================= +namespace Nelson { +//============================================================================= +static ArrayOf +StringJoinCharacters(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +//============================================================================= +static ArrayOf +StringJoinCellCharacters(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinCellCharactersScalarDelimiter( + const ArrayOf& A, const ArrayOf& delimiter, size_t dimension); +static ArrayOf +StringJoinCellCharactersRowVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinCellCharactersColumnVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinCellCharactersMatrixDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +//============================================================================= +static ArrayOf +StringJoinStringArray(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinStringArrayScalarDelimiter(const ArrayOf& A, const ArrayOf& delimiter, size_t dimension); +static ArrayOf +StringJoinStringArrayRowVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinStringArrayColumnVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +static ArrayOf +StringJoinStringArrayMatrixDelimiter(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension); +//============================================================================= +static bool +validateDelimiterDimensions(const ArrayOf& input, const ArrayOf& delimiters, size_t dimension); +//============================================================================= +ArrayOf +StringJoin(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + if ((dimension > 2 || dimension < 1) + || !validateDelimiterDimensions(A, delimiters, dimension)) { + Error(_W("Invalid delimiter dimensions.")); + } + if (!delimiters.isStringArray()) { + Error(_W("Invalid delimiter type.")); + } + switch (A.getDataClass()) { + case NLS_CHAR: { + return StringJoinCharacters(A, delimiters, dimension); + } break; + case NLS_CELL_ARRAY: { + return StringJoinCellCharacters(A, delimiters, dimension); + } break; + case NLS_STRING_ARRAY: { + return StringJoinStringArray(A, delimiters, dimension); + } break; + default: { + Error(_W("Type not supported.")); + } break; + } + return {}; +} +//============================================================================= +bool +validateDelimiterDimensions(const ArrayOf& input, const ArrayOf& delimiters, size_t dimension) +{ + Dimensions inputDims = input.getDimensions(); + Dimensions delimDims = delimiters.getDimensions(); + + // Case 1: Scalar delimiter + if (delimiters.isScalar()) { + return true; + } + + // Case 2: Vector delimiter + if (delimiters.isRowVector()) { + if (dimension == 1) { + return (delimDims.getColumns() == inputDims.getRows() - 1); + } + if (dimension == 2) { + return (delimDims.getColumns() == inputDims.getColumns() - 1); + } + } + if (delimiters.isColumnVector()) { + if (dimension == 1) { + return (delimDims.getRows() == inputDims.getRows() - 1); + } + if (dimension == 2) { + return delimDims.getColumns() == inputDims.getColumns() - 1; + } + } + + // Case 3: Matrix delimiter + if (!delimiters.isVector()) { + if (dimension == 1) { + return (delimDims.getRows() == inputDims.getRows() - 1 + && delimDims.getColumns() == inputDims.getColumns()); + } + if (dimension == 2) { + return (delimDims.getRows() == inputDims.getRows() + && delimDims.getColumns() == inputDims.getColumns() - 1); + } + } + + return false; +} +//============================================================================= +ArrayOf +StringJoinCharacters(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + if (A.isEmpty()) { + return ArrayOf::characterArrayConstructor(""); + } + std::wstring strdelimiter; + if ((delimiters.isCellArrayOfCharacterVectors() && delimiters.isScalar()) + || delimiters.isScalarStringArray() || delimiters.isRowVectorCharacterArray()) { + strdelimiter = delimiters.getContentAsWideString(); + } else { + Error(_W("Invalid delimiter dimensions.")); + } + return A; +} +//============================================================================= +ArrayOf +StringJoinCellCharacters(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + if (A.isEmpty()) { + Dimensions dims(0, 1); + return ArrayOf::emptyCell(dims); + } + if (delimiters.isScalar()) { + return StringJoinCellCharactersScalarDelimiter(A, delimiters, dimension); + } + if (delimiters.isVector()) { + if (delimiters.isRowVector()) { + return StringJoinCellCharactersRowVectorDelimiter(A, delimiters, dimension); + } + return StringJoinCellCharactersColumnVectorDelimiter(A, delimiters, dimension); + } + return StringJoinCellCharactersMatrixDelimiter(A, delimiters, dimension); +} +//============================================================================= +ArrayOf +StringJoinStringArrayScalarDelimiter(const ArrayOf& A, const ArrayOf& delimiter, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiter.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); + + // Process each column +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + // Join rows within this column + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[0].isRowVectorCharacterArray()) { + joined += delim[0].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[c] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } else { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); + + // Process each row +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + // Join columns within this row + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[0].isRowVectorCharacterArray()) { + joined += delim[0].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[r] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +ArrayOf +StringJoinStringArrayRowVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); + + // Process each column +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + // Join rows within this column + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[r - 1].isRowVectorCharacterArray()) { + joined += delim[r - 1].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[c] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } else /* dimension == 2 */ { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); + + // Process each row +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + // Join columns within this row + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[c - 1].isRowVectorCharacterArray()) { + joined += delim[c - 1].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[r] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } + return {}; +} +//============================================================================= +ArrayOf +StringJoinStringArrayColumnVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[r - 1].isRowVectorCharacterArray()) { + joined += delim[r - 1].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[c] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + + } else /* dimension == 2 */ { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[c - 1].isRowVectorCharacterArray()) { + joined += delim[c - 1].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[r] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +ArrayOf +StringJoinStringArrayMatrixDelimiter(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delims = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); + +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delims[(r - 1) + c * (nbRows - 1)].isRowVectorCharacterArray()) { + joined += delims[(r - 1) + c * (nbRows - 1)].getContentAsWideString(); + } else { + isMissing = true; + } + } + isMissing = isMissing || !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[c] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + + } else /* dimension == 2 */ { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_STRING_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delims[r + (c - 1) * nbRows].isRowVectorCharacterArray()) { + joined += delims[r + (c - 1) * nbRows].getContentAsWideString(); + + } else { + isMissing = true; + } + } + isMissing = !isMissing && !strs[r + c * nbRows].isRowVectorCharacterArray(); + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[r] = isMissing ? ArrayOf::doubleConstructor(std::nan("NaN")) + : ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +ArrayOf +StringJoinStringArray(const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + if (A.isEmpty()) { + Dimensions dims(0, 1); + return ArrayOf::stringArrayConstructor(wstringVector(), dims); + } + if (delimiters.isScalar()) { + return StringJoinStringArrayScalarDelimiter(A, delimiters, dimension); + } + if (delimiters.isVector()) { + if (delimiters.isRowVector()) { + return StringJoinStringArrayRowVectorDelimiter(A, delimiters, dimension); + } + return StringJoinStringArrayColumnVectorDelimiter(A, delimiters, dimension); + } + return StringJoinStringArrayMatrixDelimiter(A, delimiters, dimension); +} +//============================================================================= +ArrayOf +StringJoinCellCharactersScalarDelimiter( + const ArrayOf& A, const ArrayOf& delimiter, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiter.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); + + // Process each column +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + // Join rows within this column + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[0].isRowVectorCharacterArray()) { + joined += delim[0].getContentAsWideString(); + } else { + isMissing = true; + joined = L""; + } + } + if (isMissing) { + joined = L""; + } else { + joined += strs[r + c * nbRows].getContentAsWideString(); + } + } + elements[c] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } else { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); + + // Process each row +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + // Join columns within this row + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[0].isRowVectorCharacterArray()) { + joined += delim[0].getContentAsWideString(); + } else { + isMissing = true; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[r] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +ArrayOf +StringJoinCellCharactersRowVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); + + // Process each column +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + // Join rows within this column + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[r - 1].isRowVectorCharacterArray()) { + joined += delim[r - 1].getContentAsWideString(); + } else { + joined = L""; + isMissing = true; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[c] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } else /* dimension == 2 */ { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); + + // Process each row +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + // Join columns within this row + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[c - 1].isRowVectorCharacterArray()) { + joined += delim[c - 1].getContentAsWideString(); + } else { + joined = L""; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[r] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } + return {}; +} +//============================================================================= +ArrayOf +StringJoinCellCharactersColumnVectorDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delim = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); + +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delim[r - 1].isRowVectorCharacterArray()) { + joined += delim[r - 1].getContentAsWideString(); + } else { + isMissing = true; + joined = L""; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[c] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } else /* dimension == 2 */ { + // Join by columns - output will have nbRows rows + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delim[c - 1].isRowVectorCharacterArray()) { + joined += delim[c - 1].getContentAsWideString(); + isMissing = true; + } else { + joined = L""; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[r] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +ArrayOf +StringJoinCellCharactersMatrixDelimiter( + const ArrayOf& A, const ArrayOf& delimiters, size_t dimension) +{ + ArrayOf* strs = (ArrayOf*)A.getDataPointer(); + ArrayOf* delims = (ArrayOf*)delimiters.getDataPointer(); + size_t nbRows = A.getRows(); + size_t nbCols = A.getColumns(); + + if (dimension == 1) { + // Join by rows - output will have nbCols columns + Dimensions dims(1, nbCols); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbCols); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType c = 0; c < (ompIndexType)nbCols; c++) { + std::wstring joined; + bool isMissing = false; + for (size_t r = 0; r < nbRows && !isMissing; r++) { + if (r > 0) { + if (delims[(r - 1) + c * (nbRows - 1)].isRowVectorCharacterArray()) { + joined += delims[(r - 1) + c * (nbRows - 1)].getContentAsWideString(); + } else { + isMissing = true; + joined = L""; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[c] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + + } else /* dimension == 2 */ { + Dimensions dims(nbRows, 1); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbRows); + ArrayOf strArray = ArrayOf(NLS_CELL_ARRAY, dims, elements); +#ifdef WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType r = 0; r < (ompIndexType)nbRows; r++) { + std::wstring joined; + bool isMissing = false; + for (size_t c = 0; c < nbCols && !isMissing; c++) { + if (c > 0) { + if (delims[r + (c - 1) * nbRows].isRowVectorCharacterArray()) { + joined += delims[r + (c - 1) * nbRows].getContentAsWideString(); + } else { + isMissing = true; + joined = L""; + } + } + if (!isMissing) { + joined += strs[r + c * nbRows].getContentAsWideString(); + } else { + joined = L""; + } + } + elements[r] = ArrayOf::characterArrayConstructor(joined); + } + return strArray; + } +} +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/string/src/cpp/StringJustify.cpp b/modules/string/src/cpp/StringJustify.cpp new file mode 100644 index 0000000000..4bc9210a3d --- /dev/null +++ b/modules/string/src/cpp/StringJustify.cpp @@ -0,0 +1,98 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#ifdef _MSC_VER +#define _SCL_SECURE_NO_WARNINGS +#endif +#define FMT_HEADER_ONLY +#include +#include +#include +#include "StringJustify.hpp" +#include "Error.hpp" +#include "i18n.hpp" +#include "nlsBuildConfig.h" +//============================================================================= +namespace Nelson { +//============================================================================= +static inline std::wstring_view +stringTrimView(const std::wstring& str) +{ + auto start = str.find_first_not_of(L' '); + if (start == std::wstring::npos) + return std::wstring_view(); + auto end = str.find_last_not_of(L' '); + return std::wstring_view(str.data() + start, end - start + 1); +} +//============================================================================= +static inline std::wstring +stringJustifyLeft(const std::wstring& str) +{ + return fmt::format(L"{:<{}}", stringTrimView(str), str.length()); +} +//============================================================================= +static inline std::wstring +stringJustifyCenter(const std::wstring& str) +{ + return fmt::format(L"{:^{}}", stringTrimView(str), str.length()); +} +//============================================================================= +static inline std::wstring +stringJustifyRight(const std::wstring& str) +{ + return fmt::format(L"{:>{}}", stringTrimView(str), str.length()); +} +//============================================================================= +ArrayOf +StringJustify(const ArrayOf& stringArrayOf, STRINGJUSTIFY style) +{ + switch (stringArrayOf.getDataClass()) { + case NLS_CHAR: { + std::wstring str = stringArrayOf.getContentAsWideString(); + switch (style) { + case STRINGJUSTIFY::NLS_JUSTIFY_CENTER: { + return ArrayOf::characterArrayConstructor(stringJustifyCenter(str)); + } break; + case STRINGJUSTIFY::NLS_JUSTIFY_RIGHT: { + return ArrayOf::characterArrayConstructor(stringJustifyRight(str)); + } break; + default: + case STRINGJUSTIFY::NLS_JUSTIFY_LEFT: { + return ArrayOf::characterArrayConstructor(stringJustifyLeft(str)); + } break; + } + } break; + case NLS_CELL_ARRAY: + case NLS_STRING_ARRAY: { + ArrayOf* ptr = (ArrayOf*)stringArrayOf.getDataPointer(); + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf( + stringArrayOf.getDataClass(), stringArrayOf.getElementCount()); + ArrayOf cell + = ArrayOf(stringArrayOf.getDataClass(), stringArrayOf.getDimensions(), elements); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType k = 0; k < (ompIndexType)stringArrayOf.getElementCount(); ++k) { + if (ptr[k].isRowVectorCharacterArray()) { + elements[k] = StringJustify(ptr[k], style); + } else { + elements[k] = ArrayOf::doubleConstructor(std::nan("NaN")); + } + } + return cell; + } break; + default: { + Error(_W("Type not managed.")); + } break; + } + return {}; +} +//============================================================================= +}; +//============================================================================= diff --git a/modules/string/src/include/StringJoin.hpp b/modules/string/src/include/StringJoin.hpp new file mode 100644 index 0000000000..1cf7603f31 --- /dev/null +++ b/modules/string/src/include/StringJoin.hpp @@ -0,0 +1,21 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +#include "nlsString_exports.h" +//============================================================================= +namespace Nelson { +//============================================================================= +NLSSTRING_IMPEXP ArrayOf +StringJoin(const ArrayOf& A, const ArrayOf& delimiter, size_t dimension); +//============================================================================= +} +//============================================================================= diff --git a/modules/string/src/include/StringJustify.hpp b/modules/string/src/include/StringJustify.hpp new file mode 100644 index 0000000000..fd79bc2bad --- /dev/null +++ b/modules/string/src/include/StringJustify.hpp @@ -0,0 +1,28 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +#include "nlsString_exports.h" +//============================================================================= +namespace Nelson { +//============================================================================= +enum STRINGJUSTIFY +{ + NLS_JUSTIFY_LEFT = 0, + NLS_JUSTIFY_CENTER = 1, + NLS_JUSTIFY_RIGHT = 2, +}; +//============================================================================= +NLSSTRING_IMPEXP ArrayOf +StringJustify(const ArrayOf& stringArrayOf, STRINGJUSTIFY style); +//============================================================================= +} +//============================================================================= diff --git a/modules/string/tests/bench_join.m b/modules/string/tests/bench_join.m new file mode 100644 index 0000000000..ef0c3ab34e --- /dev/null +++ b/modules/string/tests/bench_join.m @@ -0,0 +1,11 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +A = string(zeros(3000, 3000)); +tic(); R = join(A, ","); toc() \ No newline at end of file diff --git a/modules/string/tests/test_join.m b/modules/string/tests/test_join.m new file mode 100644 index 0000000000..d5835b7d08 --- /dev/null +++ b/modules/string/tests/test_join.m @@ -0,0 +1,230 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +assert_isequal(nargin('join'), -2); +assert_isequal(nargout('join'), 1); +%============================================================================= +R = join('addd', " "); +REF = 'addd'; +assert_isequal(R, REF); +%============================================================================= +R = join('', "ab"); +REF = ''; +assert_isequal(R, REF); +%============================================================================= +str = ["a", "b", "c"]; +R = join(str); +REF = "a b c"; +assert_isequal(R, REF); +%============================================================================= +str = ["a", "b", "c"]; +R = join(str, "-"); +REF = "a-b-c"; +assert_isequal(R, REF); +%============================================================================= +C = {'a', 'b', 'c'}; +R = join(C); +REF = {'a b c'}; +assert_isequal(R, REF); +%============================================================================= +C = {'a', 'b', 'c'}; +R = join(C, '+'); +REF = {'a+b+c'}; +assert_isequal(R, REF); +%============================================================================= +M = ["a" "b"; "c" "d"]; +R = join(M, "-"); +REF = ["a-b"; "c-d"]; +assert_isequal(R, REF); +%============================================================================= +M = ["a" "b"; "c" "d"]; +R = join(M, "-", 1); +REF = ["a-c" "b-d"]; +assert_isequal(R, REF); +%============================================================================= +M = ["a" "b"; "c" "d"]; +R = join(M, "-", 2); +REF = ["a-b"; "c-d"]; +assert_isequal(R, REF); +%============================================================================= +R = join(string([]), "-"); +REF = string(zeros(0, 1)); +assert_isequal(R, REF); +%============================================================================= +R = join({}, "-"); +REF = cell(0, 1); +assert_isequal(R, REF); +%============================================================================= +str = ["a", "b", "c"]; +R = join(str, [":", ";"]); +REF = "a:b;c"; +assert_isequal(R, REF); +%============================================================================= +str = ["x","y","z"; "a","b","c"]; +delimiters = [" + "," = "; " - "," = "]; +R = join(str, delimiters); +REF = ["x + y = z"; "a - b = c"]; +assert_isequal(R, REF); +%============================================================================= +M = ["x","y","z"; + "a","b","c"; + "d","e","f"; + "g","h","i"]; +L = ["A","B"; +"C","D"; +"E", "F"; +"G", "H"]; +R = join(M, L); +REF = ["xAyBz"; +"aCbDc"; +"dEeFf"; +"gGhHi"]; +assert_isequal(R, REF); +%============================================================================= +M = ["x","y","z"; + "a","b","c"; + "d","e","f"; + "g","h","i"]; +L = ["A","B"]; +R = join(M, L); +REF = ["xAyBz"; + "aAbBc"; + "dAeBf"; + "gAhBi"]; +assert_isequal(R, REF); +%============================================================================= +M = ["x","y"; + "a","b"; + "d","e"; + "g","h"]; +L = ["A";"B";"C";"D"]; +R = join(M, L); +REF = ["xAy"; +"aAb"; +"dAe"; +"gAh"]; +assert_isequal(R, REF); +%============================================================================= +M = [string(NaN), "b", "c"]; +R = join(M); +REF = string(NaN); +assert_isequal(R, REF); +%============================================================================= +M = [string(NaN), "b"; "c", string(NaN)]; +R = join(M); +REF = [string(NaN); string(NaN)]; +assert_isequal(R, REF); +%============================================================================= +m = ["a",string(NaN),"b"; "c", "e","d"]; +R = join(m, "-"); +REF = [string(NaN); "c-e-d"]; +assert_isequal(R, REF); +%============================================================================= +m = [string(NaN),string(NaN),string(NaN)]; +R = join(m, 1); +REF = [string(NaN), string(NaN), string(NaN)]; +assert_isequal(R, REF); +%============================================================================= +m = [string(NaN),string(NaN),string(NaN)]; +R= join(m, 2); +REF = string(NaN); +assert_isequal(R, REF); +%============================================================================= +str = ["x","y","z"; "a","b","c"]; +R = join(str, string(NaN), 1); +REF = [string(NaN),string(NaN),string(NaN)]; +assert_isequal(R, REF); +%============================================================================= +str = ["x","y","z"; "a","b","c"]; +R = join(str, string(NaN), 2); +REF = [string(NaN);string(NaN)]; +assert_isequal(R, REF); +%============================================================================= +M = ["x","y","z"; + "a","b","c"; + "d","e","f"; + "g","h","i"]; +%============================================================================= +R = join(M, "A"); +REF = ["xAyAz"; +"aAbAc"; +"dAeAf"; +"gAhAi"]; +assert_isequal(R, REF); +%============================================================================= +cmd = "R = join(M, [""A"";""B""])"; +msg = _('Invalid delimiter dimensions.'); +assert_checkerror(cmd, msg); +%============================================================================= +R = join(M, ["A"; "B";"C"], 1); +REF = [ "xAaBdCg", "yAbBeCh", "zAcBfCi"]; +assert_isequal(R, REF); +%============================================================================= +R = "join(M, [""A""; ""B"";""C""], 2)"; +msg = _('Invalid delimiter dimensions.'); +assert_checkerror(cmd, msg); +%============================================================================= +R = join(M, ["A", "B","C"], 1); +REF = [ "xAaBdCg", "yAbBeCh", "zAcBfCi"]; +assert_isequal(R, REF); +%============================================================================= +R = "join(M, [""A"", ""B"",""C""], 2)"; +msg = _('Invalid delimiter dimensions.'); +assert_checkerror(cmd, msg); +%============================================================================= +R = "join(M, [""A"",""B"";""C"",""D""], 1)"; +msg = _('Invalid delimiter dimensions.'); +assert_checkerror(cmd, msg); +%============================================================================= +R = "join(M, [""A"",""B"";""C"",""D""], 2)"; +msg = _('Invalid delimiter dimensions.'); +assert_checkerror(cmd, msg); +%============================================================================= +M = ["x", "y", "z", "c"; + "a", "b", "c", "c"; + "d", "e", "f", "c"; + "g", "h", "i", "c"]; +L = ["5", "6", "7", "8"; + "9", "10", "11", "12"; + "13", "14", "15", "16"]; +R = join(M, L, 1); +REF = ["x5a9d13g", "y6b10e14h", "z7c11f15i", "c8c12c16c"]; +assert_isequal(R, REF); +%============================================================================= +M = {'x', 'y', 'z', 'c'; + 'a', 'b', 'c', 'c'; + 'd', 'e', 'f', 'c'; + 'g', 'h', 'i', 'c'}; +L = {'5', '6', '7', '8'; + '9', '10', '11', '12'; + '13', '14', '15', '16'}; +R = join(M, L, 1); +REF = {'x5a9d13g', 'y6b10e14h', 'z7c11f15i', 'c8c12c16c'}; +assert_isequal(R, REF); +%============================================================================= +str = {'x','y','z'; 'a','b','c'}; +R = join(str, string(NaN), 2); +REF = {'';''}; +assert_isequal(R, REF); +%============================================================================= +M = {'x','y','z'; + 'a','b','c'; + 'd','e','f'; + 'g','h','i'}; +L = ["A","B"; +"C","D"; +string(NaN), "F"; +"G", "H"]; +R = join(M, L); +REF = {'xAyBz'; +'aCbDc'; +''; +'gGhHi'}; +assert_isequal(R, REF); +%============================================================================= diff --git a/modules/table/functions/@table/disp.m b/modules/table/functions/@table/disp.m index 64d15e3147..37cc10ea5c 100644 --- a/modules/table/functions/@table/disp.m +++ b/modules/table/functions/@table/disp.m @@ -13,17 +13,110 @@ function disp(varargin) if isempty(T) return; end + numRows = height(T); + NB_LINES_TO_DISPLAY = 20; + if (numRows > NB_LINES_TO_DISPLAY) + dispTableCompact(T); + else + dispTableFull(T); + end currentFormat = format(); isLineSpacing = strcmp(currentFormat.LineSpacing, 'loose'); - + if isLineSpacing + disp(' '); + end +end +%============================================================================= +function dispTableCompact(T) + NB_LINES_BEFORE = 5; + NB_LINES_AFTER = 5; + NB_LINES_ELLIPSIS = 3; + varNames = T.Properties.VariableNames; numCols = width(T); numRows = height(T); haveRowsNames = ~isempty(T.Properties.RowNames); - % Display Header + head = T(1:NB_LINES_BEFORE, :); + tail = T(size(T, 1) - NB_LINES_AFTER + 1:size(T, 1), :); + + nbColsToDisplay = numCols + 1; + nbRowsToDisplay = NB_LINES_BEFORE + NB_LINES_AFTER + NB_LINES_ELLIPSIS + 1; + strs = string(cell(nbRowsToDisplay, nbColsToDisplay)); + % Add variable names + for j = 1:numCols + strs{1, j+1} = varNames{j}; + end + + for j = 1:numCols + for i = 1:NB_LINES_BEFORE + t = head{i,j}; + value = t; + is2d = length(size(value)); + if is2d < 3 + isString = isa(value, 'string'); + value = formattedDisplayText(value); + if isString + value = """" + value + """"; + end + value = char(value); + value = strtrim(value(1:end)); + if length(value) > 15 + strs{i + 1, j + 1} = sizeAsString(t); + else + strs{i + 1, j + 1} = value; + end + else + strs{i + 1, j + 1} = sizeAsString(value); + end + end + end + + for j = 1:numCols + for i = NB_LINES_BEFORE + 1:NB_LINES_BEFORE + NB_LINES_ELLIPSIS + strs{i + 1, j + 1} = ':'; + end + end + + for j = 1:numCols + for i = 1:NB_LINES_AFTER + t = tail{i,j}; + value = t; + is2d = length(size(value)); + idx = NB_LINES_BEFORE + NB_LINES_ELLIPSIS + i + 1; + if is2d < 3 + isString = isa(value, 'string'); + value = formattedDisplayText(value); + if isString + value = """" + value + """"; + end + value = char(value); + value = strtrim(value(1:end)); + if length(value) > 15 + strs{idx, j + 1} = sizeAsString(t); + else + strs{idx, j + 1} = value; + end + else + strs{idx, j + 1} = sizeAsString(value); + end + end + end + + formatTableDisplay(strs, haveRowsNames); + +end +%============================================================================= +function dispTableFull(T) + + varNames = T.Properties.VariableNames; + numCols = width(T); + numRows = height(T); + + haveRowsNames = ~isempty(T.Properties.RowNames); strs = string(cell(numRows + 1, numCols + 1)); + for j = 1:numCols for i = 1:numRows t = T{i,j}; @@ -60,6 +153,18 @@ function disp(varargin) end end + formatTableDisplay(strs, haveRowsNames) + +end +%============================================================================= +function s = sizeAsString(value) + sz = size(value); + sizeStr = sprintf('%dx', sz(1:end)); + sizeStr = sizeStr(1:end-1); + s = [sizeStr, ' ', class(value)]; +end +%============================================================================= +function formatTableDisplay(strs, haveRowsNames) numCols = size(strs, 2); % Create a new row with empty strings, matching the number of columns in A @@ -69,10 +174,9 @@ function disp(varargin) strs = [strs(1,:); newRow; newRow; strs(2:end,:)]; maxLenPerCol = max(strlength(strs), [], 1); + startIndex = 1; if haveRowsNames startIndex = 2; - else - startIndex = 1; end for j = startIndex:numCols @@ -82,6 +186,7 @@ function disp(varargin) % Initialize the adjusted array with the same size B = strings(size(strs)); + % Adjust each string in the array to be centered based on the column's max length of each column for j = 1:size(strs, 2) for i = 1:size(strs, 1) @@ -97,22 +202,8 @@ function disp(varargin) startIndex = 2; end for i = 1:size(B, 1) - line = blanksSeparator; - for j = startIndex:size(B, 2) - line = line + B(i, j) + blanksSeparator; - end + line = blanksSeparator + join(B(i, startIndex:end), blanksSeparator) + blanksSeparator; disp(line); end - - if isLineSpacing - disp(' '); - end -end -%============================================================================= -function s = sizeAsString(value) - sz = size(value); - sizeStr = sprintf('%dx', sz(1:end)); - sizeStr = sizeStr(1:end-1); - s = [sizeStr, ' ', class(value)]; end %============================================================================= diff --git a/modules/table/functions/@table/subsref.m b/modules/table/functions/@table/subsref.m index 71d8045c5e..58b91abb99 100644 --- a/modules/table/functions/@table/subsref.m +++ b/modules/table/functions/@table/subsref.m @@ -66,43 +66,47 @@ end %============================================================================= function R = braceSubsref(T, sref) + + if (length(sref(1).subs) ~= 2) + error('Unsupported subscript type. Brace indexing requires two subscripts (row and column).'); + end + st = struct(T); - - if (length(sref(1).subs) == 2) - rowIdx = sref(1).subs{1}; - colSub = sref(1).subs{2}; + + rowIdx = sref(1).subs{1}; + colSub = sref(1).subs{2}; - if ischar(rowIdx) || isstring(rowIdx) || iscellstr(rowIdx) - if (isscalar(rowIdx) && rowIdx == ":") - rowIdx = 1:height(T); - else - rowIdx = find(ismember(st.Properties.RowNames, rowIdx)); - if any(rowIdx == 0) - error(_('One or more row names not found.')); - end - end - elseif (isnumeric(rowIdx) || islogical(rowIdx)) - rowIdx = rowIdx(:); + if ischar(rowIdx) || isstring(rowIdx) || iscellstr(rowIdx) + if (isscalar(rowIdx) && rowIdx == ":") + rowIdx = 1:height(T); else - error(_('Invalid row subscript type.')); + rowIdx = find(ismember(st.Properties.RowNames, rowIdx)); + if any(rowIdx == 0) + error(_('One or more row names not found.')); + end end + elseif (isnumeric(rowIdx) || islogical(rowIdx)) + rowIdx = rowIdx(:); + else + error(_('Invalid row subscript type.')); + end - if (ischar(colSub) || isstring(colSub) || iscellstr(colSub)) - if (isscalar(colSub) && colSub == ":") - colSub = 1:width(T); - else - colSub = find(contains(st.Properties.VariableNames, colSub) == true); - end - elseif (isnumeric(colSub) || islogical(colSub)) - colSub = colSub(:); + if (ischar(colSub) || isstring(colSub) || iscellstr(colSub)) + if (isscalar(colSub) && colSub == ":") + colSub = 1:width(T); else - error(_('Invalid column subscript type.')); + colSub = find(contains(st.Properties.VariableNames, colSub) == true); end + elseif (isnumeric(colSub) || islogical(colSub)) + colSub = colSub(:); else - error(_('Unsupported subscript type. Brace indexing requires two subscripts (row and column).')); + error(_('Invalid column subscript type.')); end + numRows = length(rowIdx); + numCols = length(colSub); R = []; - for i = 1:length(colSub) + + for i = 1:numCols variable = st.Properties.VariableNames{colSub(i)}; tempdata = st.data.(variable); if isnumeric(tempdata) || islogical(tempdata) || isstring(tempdata) || ischar(tempdata) diff --git a/modules/types/src/cpp/ArrayOf_StringType.cpp b/modules/types/src/cpp/ArrayOf_StringType.cpp index 2ab7e5b2d8..1be61df111 100644 --- a/modules/types/src/cpp/ArrayOf_StringType.cpp +++ b/modules/types/src/cpp/ArrayOf_StringType.cpp @@ -90,7 +90,7 @@ ArrayOf::stringArrayConstructorAllMissing(Dimensions& dims) } //============================================================================= ArrayOf -ArrayOf::stringArrayConstructor(const stringVector& values, Dimensions& dims) +ArrayOf::stringArrayConstructor(const stringVector& values, const Dimensions& dims) { ArrayOf* elements = nullptr; size_t nbElements = dims.getElementCount(); @@ -111,7 +111,7 @@ ArrayOf::stringArrayConstructor(const stringVector& values, Dimensions& dims) } //============================================================================= ArrayOf -ArrayOf::stringArrayConstructor(const wstringVector& values, Dimensions& dims) +ArrayOf::stringArrayConstructor(const wstringVector& values, const Dimensions& dims) { ArrayOf* elements = nullptr; size_t nbElements = dims.getElementCount(); diff --git a/modules/types/src/include/ArrayOf.hpp b/modules/types/src/include/ArrayOf.hpp index adf0801460..b75f61c552 100644 --- a/modules/types/src/include/ArrayOf.hpp +++ b/modules/types/src/include/ArrayOf.hpp @@ -1392,7 +1392,7 @@ class NLSTYPES_IMPEXP ArrayOf * @return An ArrayOf object representing the string array. */ static ArrayOf - stringArrayConstructor(const stringVector& values, Dimensions& dims); + stringArrayConstructor(const stringVector& values, const Dimensions& dims); /** * @brief Constructs a string array from a vector of wstrings and dimensions. @@ -1401,7 +1401,7 @@ class NLSTYPES_IMPEXP ArrayOf * @return An ArrayOf object representing the string array. */ static ArrayOf - stringArrayConstructor(const wstringVector& values, Dimensions& dims); + stringArrayConstructor(const wstringVector& values, const Dimensions& dims); /** * Converts a variable to a string array with the content diff --git a/tools/missing_help/help_ignore.txt b/tools/missing_help/help_ignore.txt index 959036d98e..d3c908b8af 100644 --- a/tools/missing_help/help_ignore.txt +++ b/tools/missing_help/help_ignore.txt @@ -1,175 +1,181 @@ -COM_class -COM_disp -COM_isvalid -MPI_Comm_disp -MPI_Comm_isvalid -QObject_delete -QObject_disp -QObject_fieldnames -QObject_invoke -QObject_ismethod -QObject_isprop -QObject_isvalid -QObject_properties -audioplayer_disp -audioplayer_ismethod -audioplayer_isprop -audioplayer_isvalid -audioplayer_play -audioplayer_playblocking -audioplayer_properties -audioplayer_resume -dllib_delete -dllib_disp -dllib_fieldnames -dllib_get -dllib_ismethod -dllib_isprop -dllib_isvalid -dlsym_disp -dlsym_fieldnames -dlsym_get -dlsym_ismethod -dlsym_isprop -dlsym_isvalid -function_handle_disp -function_handle_extraction -function_handle_fieldnames -function_handle_isequal -function_handle_isequaln -function_handle_isequalto -generic_eq_handle -handle_delete -handle_disp -handle_eq_generic -handle_eq_handle -handle_fieldnames -handle_get -handle_horzcat_handle -handle_invoke -handle_isequal -handle_isequaln -handle_isequalto -handle_ismethod -handle_isprop -handle_isvalid -handle_methods -handle_properties -handle_set -handle_vertcat_handle -libpointer_disp -libpointer_fieldnames -libpointer_get -libpointer_ismethod -libpointer_isprop -libpointer_isvalid -sparsedouble_ctranspose -sparsedouble_disp -sparsedouble_horzcat_sparsedouble -sparsedouble_imag -sparsedouble_real -sparsedouble_transpose -sparsedouble_uminus -sparsedouble_vertcat_sparsedouble -sparselogical_ctranspose -sparselogical_disp -sparselogical_horzcat -sparselogical_imag -sparselogical_real -sparselogical_transpose -sparselogical_uminus -sparselogical_vertcat -test_parsetags -cell_vertcat_struct -sparsedouble_abs -sparsedouble_acos -sparsedouble_asin -sparsedouble_atan -sparsedouble_ceil -sparsedouble_conj -sparsedouble_cos -sparsedouble_cosh -sparsedouble_exp -sparsedouble_expm -sparsedouble_fix -sparsedouble_floor -sparsedouble_int2str -sparsedouble_isapprox -sparsedouble_isequal -sparsedouble_isequaln -sparsedouble_isequalto -sparsedouble_isfinite -sparsedouble_isinf -sparsedouble_isnan -sparsedouble_mat2str -sparsedouble_prod -sparsedouble_round -sparsedouble_sin -sparsedouble_sinh -sparsedouble_tan -sparsedouble_tanh -sparselogical_abs -sparselogical_int2str -sparselogical_isequal -sparselogical_isequaln -sparselogical_isequalto -sparselogical_isfinite -sparselogical_isinf -sparselogical_isnan -sparselogical_mat2str -sparselogical_prod -dllib_eq_dllib -dllib_horzcat -dllib_isequal -dllib_isequaln -dllib_isequalto -dllib_ne_dllib -dllib_vertcat -dlsym_eq_dlsym -dlsym_horzcat -dlsym_isequal -dlsym_isequaln -dlsym_isequalto -dlsym_ne_dlsym -dlsym_vertcat -libpointer_eq -libpointer_horzcat -libpointer_isequal -libpointer_isequaln -libpointer_isequalto -libpointer_plus_generic -libpointer_vertcat -sparsedouble_datevec -generic_ne_handle -handle_ne_generic -handle_ne_handle -QObject_eq_QObject -QObject_eq_generic -QObject_horzcat -QObject_isequal -QObject_isequaln -QObject_isequalto -QObject_ne_QObject -QObject_ne_generic -QObject_vertcat -generic_eq_QObject -generic_ne_QObject -COM_eq_COM -COM_eq_generic -COM_horzcat_COM -COM_isequal -COM_isequaln -COM_isequalto -COM_ne_COM -COM_ne_generic -COM_vertcat_COM +@AfterAllFuture/delete +@AfterAllFuture/disp +@AfterAllFuture/display +@AfterAllFuture/get +@AfterEachFuture/delete +@AfterEachFuture/disp +@AfterEachFuture/display +@AfterEachFuture/get +@COM/class +@COM/delete +@COM/disp +@COM/display +@COM/fieldnames +@COM/get +@COM/invoke +@COM/ismethod +@COM/isprop +@COM/isvalid +@COM/methods +@COM/set +@FevalFuture/delete +@FevalFuture/disp +@FevalFuture/display +@FevalFuture/get +@FevalQueue/delete +@FevalQueue/disp +@FevalQueue/display +@FevalQueue/get +@MException/fieldnames +@PythonEnvironment/disp +@PythonEnvironment/display +@PythonEnvironment/get +@PythonEnvironment/set +@PythonEnvironment/struct +@QObject/delete +@QObject/disp +@QObject/display +@QObject/fieldnames +@QObject/get +@QObject/invoke +@QObject/ismethod +@QObject/isprop +@QObject/isvalid +@QObject/methods +@QObject/properties +@QObject/set +@audioplayer/delete +@audioplayer/disp +@audioplayer/display +@audioplayer/fieldnames +@audioplayer/get +@audioplayer/ismethod +@audioplayer/isprop +@audioplayer/isvalid +@audioplayer/pause +@audioplayer/play +@audioplayer/playblocking +@audioplayer/properties +@audioplayer/resume +@audioplayer/set +@audioplayer/stop +@backgroundPool/delete +@backgroundPool/disp +@backgroundPool/display +@backgroundPool/fieldnames +@backgroundPool/get +@backgroundPool/struct +@class/subsasgn +@dllib/delete +@dllib/disp +@dllib/display +@dllib/fieldnames +@dllib/get +@dllib/ismethod +@dllib/isprop +@dllib/isvalid +@dlsym/delete +@dlsym/disp +@dlsym/display +@dlsym/fieldnames +@dlsym/get +@dlsym/ismethod +@dlsym/isprop +@dlsym/isvalid +@function_handle/disp +@function_handle/display +@function_handle/fieldnames +@function_handle/isequal +@function_handle/isequaln +@function_handle/isequalto +@function_handle/subsref +@graphics_object/delete +@graphics_object/disp +@graphics_object/display +@graphics_object/eq +@graphics_object/get +@graphics_object/isequal +@graphics_object/isequaln +@graphics_object/isequalto +@graphics_object/isprop +@graphics_object/properties +@graphics_object/set +@handle/delete +@handle/disp +@handle/display +@handle/eq +@handle/fieldnames +@handle/get +@handle/horzcat +@handle/invoke +@handle/isequal +@handle/isequaln +@handle/isequalto +@handle/ismethod +@handle/isprop +@handle/isvalid +@handle/methods +@handle/properties +@handle/set +@handle/vertcat +@libpointer/delete +@libpointer/disp +@libpointer/display +@libpointer/fieldnames +@libpointer/get +@libpointer/isNull +@libpointer/ismethod +@libpointer/isprop +@libpointer/isvalid +@libpointer/plus +@libpointer/reshape +@libpointer/setdatatype +@py/class +@py/disp +@py/display +@py/get +@py/invoke +@py/isequal +@py/isequalto +@sparsedouble/horzcat +@sparsedouble/imag +@sparsedouble/real +@sparsedouble/vertcat +@sparselogical/horzcat +@sparselogical/imag +@sparselogical/real +@sparselogical/vertcat +AfterAllFuture_used +AfterEachFuture_used +FevalFuture_used +FevalFuture_used +FevalQueue_used +SLICOTWrapper +backgroundPool_used +webREST +__subsref__ +mfilename +test_runfile +SEEK_CUR +SEEK_END +SEEK_SET +dos +stderr +stdin +stdout +endfunction +history_manager COM_xlsformat -generic_eq_COM -generic_ne_COM -MPI_Comm_horzcat -MPI_Comm_ismethod -MPI_Comm_isprop -MPI_Comm_vertcat -audioplayer_horzcat -audioplayer_vertcat -webREST \ No newline at end of file +nig_header_license +nig_version +test_parsetags +qhelpgenerator +qcollectiongenerator +nmm_build_dependencies +case +catch +else +elseif +otherwise +parfor +return From bbffaf99950bddf8078c98ce3d107feaa664e905 Mon Sep 17 00:00:00 2001 From: Allan CORNET Date: Sun, 8 Dec 2024 16:55:08 +0100 Subject: [PATCH 3/4] readmatrix function --- .../builtin/c/nlsSpreadsheet_builtin.vcxproj | 2 + .../c/nlsSpreadsheet_builtin.vcxproj.filters | 6 + modules/spreadsheet/builtin/cpp/Gateway.cpp | 3 +- .../builtin/cpp/readmatrixBuiltin.cpp | 123 ++++++++ .../builtin/include/readmatrixBuiltin.hpp | 20 ++ .../spreadsheet/src/c/nlsSpreadsheet.vcxproj | 6 + .../src/c/nlsSpreadsheet.vcxproj.filters | 18 ++ .../spreadsheet/src/cpp/CSVTypeConverters.cpp | 243 ++++++++++++++++ .../spreadsheet/src/cpp/CSVTypeConverters.hpp | 61 ++++ .../src/cpp/DetectImportOptions.cpp | 20 +- modules/spreadsheet/src/cpp/ReadCell.cpp | 197 ++----------- .../spreadsheet/src/cpp/ReadLinesFromFile.cpp | 54 ++++ .../spreadsheet/src/cpp/ReadLinesFromFile.hpp | 21 ++ modules/spreadsheet/src/cpp/ReadMatrix.cpp | 270 ++++++++++++++++++ modules/spreadsheet/src/cpp/ReadTable.cpp | 144 +--------- .../spreadsheet/src/include/ReadMatrix.hpp | 21 ++ modules/spreadsheet/tests/readmatrix_1.csv | 5 + modules/spreadsheet/tests/readmatrix_2.csv | 5 + modules/spreadsheet/tests/readmatrix_3.csv | 6 + modules/spreadsheet/tests/readmatrix_4.csv | 6 + modules/spreadsheet/tests/readmatrix_5.csv | 6 + modules/spreadsheet/tests/readmatrix_6.csv | 2 + modules/spreadsheet/tests/readmatrix_7.csv | 2 + modules/spreadsheet/tests/test_readmatrix.m | 64 +++++ 24 files changed, 992 insertions(+), 313 deletions(-) create mode 100644 modules/spreadsheet/builtin/cpp/readmatrixBuiltin.cpp create mode 100644 modules/spreadsheet/builtin/include/readmatrixBuiltin.hpp create mode 100644 modules/spreadsheet/src/cpp/CSVTypeConverters.cpp create mode 100644 modules/spreadsheet/src/cpp/CSVTypeConverters.hpp create mode 100644 modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp create mode 100644 modules/spreadsheet/src/cpp/ReadLinesFromFile.hpp create mode 100644 modules/spreadsheet/src/cpp/ReadMatrix.cpp create mode 100644 modules/spreadsheet/src/include/ReadMatrix.hpp create mode 100644 modules/spreadsheet/tests/readmatrix_1.csv create mode 100644 modules/spreadsheet/tests/readmatrix_2.csv create mode 100644 modules/spreadsheet/tests/readmatrix_3.csv create mode 100644 modules/spreadsheet/tests/readmatrix_4.csv create mode 100644 modules/spreadsheet/tests/readmatrix_5.csv create mode 100644 modules/spreadsheet/tests/readmatrix_6.csv create mode 100644 modules/spreadsheet/tests/readmatrix_7.csv create mode 100644 modules/spreadsheet/tests/test_readmatrix.m diff --git a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj index f324f4789e..be99a388a9 100644 --- a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj +++ b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj @@ -218,6 +218,7 @@ + @@ -227,6 +228,7 @@ + diff --git a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters index 93c863e34c..e6effe7ea4 100644 --- a/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters +++ b/modules/spreadsheet/builtin/c/nlsSpreadsheet_builtin.vcxproj.filters @@ -53,6 +53,9 @@ Source Files + + Source Files + @@ -76,5 +79,8 @@ Header Files + + Header Files + \ No newline at end of file diff --git a/modules/spreadsheet/builtin/cpp/Gateway.cpp b/modules/spreadsheet/builtin/cpp/Gateway.cpp index 837820ddf4..e1b18eb376 100644 --- a/modules/spreadsheet/builtin/cpp/Gateway.cpp +++ b/modules/spreadsheet/builtin/cpp/Gateway.cpp @@ -9,6 +9,7 @@ //============================================================================= #include "NelsonGateway.hpp" #include "readcellBuiltin.hpp" +#include "readmatrixBuiltin.hpp" #include "readtableBuiltin.hpp" #include "dlmreadBuiltin.hpp" #include "dlmwriteBuiltin.hpp" @@ -21,6 +22,7 @@ const std::wstring gatewayName = L"spreadsheet"; //============================================================================= static const nlsGateway gateway[] = { { "readcell", (ptrBuiltin)Nelson::SpreadsheetGateway::readcellBuiltin, 1, 1 }, + { "readmatrix", (ptrBuiltin)Nelson::SpreadsheetGateway::readmatrixBuiltin, 1, 1 }, { "readtable", (ptrBuiltin)Nelson::SpreadsheetGateway::readtableBuiltin, 1, 1 }, { "dlmread", (ptrBuiltin)Nelson::SpreadsheetGateway::dlmreadBuiltin, 1, 4 }, { "dlmwrite", (ptrBuiltin)Nelson::SpreadsheetGateway::dlmwriteBuiltin, 0, -3, @@ -28,7 +30,6 @@ static const nlsGateway gateway[] = { { "writetable", (ptrBuiltin)Nelson::SpreadsheetGateway::writetableBuiltin, 0, 4 }, { "detectImportOptions", (ptrBuiltin)Nelson::SpreadsheetGateway::detectImportOptionsBuiltin, 1, -1 }, - }; //============================================================================= NLSGATEWAYFUNC(gateway) diff --git a/modules/spreadsheet/builtin/cpp/readmatrixBuiltin.cpp b/modules/spreadsheet/builtin/cpp/readmatrixBuiltin.cpp new file mode 100644 index 0000000000..fa396aec26 --- /dev/null +++ b/modules/spreadsheet/builtin/cpp/readmatrixBuiltin.cpp @@ -0,0 +1,123 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include +#include "readmatrixBuiltin.hpp" +#include "Error.hpp" +#include "InputOutputArgumentsCheckers.hpp" +#include "ReadMatrix.hpp" +#include "DetectImportOptions.hpp" +//============================================================================= +using namespace Nelson; +//============================================================================= +static NelsonType +convertToNelsonType(const std::wstring& typeStr); +static void +populateImportOptions(const ArrayOf& importOptionsObj, detectImportOptions& options); +//============================================================================= +ArrayOfVector +Nelson::SpreadsheetGateway::readmatrixBuiltin(int nLhs, const ArrayOfVector& argIn) +{ + ArrayOfVector retval; + nargoutcheck(nLhs, 0, 1); + nargincheck(argIn, 1, 4); + std::wstring filename = argIn[0].getContentAsWideString(); + std::string errorMessage; + detectImportOptions options; + NelsonType OutputType = NLS_DOUBLE; + initializeDetectImportOptions(options); + + switch (argIn.size()) { + case 1: { + // readmatrix(filename) + analyzeFileFormatImportOptions(filename, 4096, options, errorMessage); + options.CommentStyle.clear(); + if (!errorMessage.empty()) { + Error(errorMessage); + } + } break; + case 2: { + // readmatrix(filename, options) + if (argIn[1].isClassType() && argIn[1].getClassType() == "DelimitedTextImportOptions") { + populateImportOptions(argIn[1], options); + } else { + Error(_W("Import options object expected.")); + } + } break; + case 3: { + // readmatrix(filename, 'fieldname', fieldvalue) + std::wstring fieldname = argIn[1].getContentAsWideString(); + if (fieldname != L"OutputType") { + Error(_W("OutputType name expected.")); + } + OutputType = convertToNelsonType(argIn[2].getContentAsWideString()); + analyzeFileFormatImportOptions(filename, 4096, options, errorMessage); + options.CommentStyle.clear(); + if (!errorMessage.empty()) { + Error(errorMessage); + } + } break; + case 4: { + // readmatrix(filename, options, 'fieldname', fieldvalue) + if (argIn[1].isClassType() && argIn[1].getClassType() == "DelimitedTextImportOptions") { + populateImportOptions(argIn[1], options); + } else { + Error(_W("Import options object expected.")); + } + std::wstring fieldname = argIn[2].getContentAsWideString(); + if (fieldname != L"OutputType") { + Error(_W("OutputType name expected.")); + } + OutputType = convertToNelsonType(argIn[3].getContentAsWideString()); + } break; + default: { + } break; + } + + retval << ReadMatrix(filename, options, OutputType, errorMessage); + if (!errorMessage.empty()) { + Error(errorMessage); + } + return retval; +} +//============================================================================= +NelsonType +convertToNelsonType(const std::wstring& typeStr) +{ + static const std::map typeMap + = { { L"double", NLS_DOUBLE }, { L"single", NLS_SINGLE }, { L"string", NLS_STRING_ARRAY }, + { L"char", NLS_CELL_ARRAY }, { L"int8", NLS_INT8 }, { L"int16", NLS_INT16 }, + { L"int32", NLS_INT32 }, { L"int64", NLS_INT64 }, { L"uint8", NLS_UINT8 }, + { L"uint16", NLS_UINT16 }, { L"uint32", NLS_UINT32 }, { L"uint64", NLS_UINT64 } }; + + auto it = typeMap.find(typeStr); + if (it != typeMap.end()) { + return it->second; + } + + Error(_W("Unsupported type.")); + return NLS_DOUBLE; +} +//============================================================================= +void +populateImportOptions(const ArrayOf& importOptionsObj, detectImportOptions& options) +{ + options.Delimiter = importOptionsObj.getField("Delimiter").getContentAsCStringRowVector(); + options.LineEnding = importOptionsObj.getField("LineEnding").getContentAsCStringRowVector(); + options.CommentStyle = importOptionsObj.getField("CommentStyle").getContentAsCStringRowVector(); + options.EmptyLineRule = importOptionsObj.getField("EmptyLineRule").getContentAsCString(); + options.VariableNamesLine + = (int)importOptionsObj.getField("VariableNamesLine").getContentAsDoubleScalar(); + options.VariableNames + = importOptionsObj.getField("VariableNames").getContentAsCStringRowVector(); + options.RowNamesColumn + = (int)importOptionsObj.getField("RowNamesColumn").getContentAsDoubleScalar(); + options.DataLines = importOptionsObj.getField("DataLines").getContentAsDoubleVector(); +} +//============================================================================= diff --git a/modules/spreadsheet/builtin/include/readmatrixBuiltin.hpp b/modules/spreadsheet/builtin/include/readmatrixBuiltin.hpp new file mode 100644 index 0000000000..ce78d66679 --- /dev/null +++ b/modules/spreadsheet/builtin/include/readmatrixBuiltin.hpp @@ -0,0 +1,20 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +//============================================================================= +namespace Nelson::SpreadsheetGateway { +//============================================================================= +ArrayOfVector +readmatrixBuiltin(int nLhs, const ArrayOfVector& argIn); +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj index 75a63c8662..6fc04a47f5 100644 --- a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj +++ b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj @@ -208,11 +208,14 @@ + + + @@ -221,8 +224,10 @@ + + @@ -231,6 +236,7 @@ + diff --git a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters index 61bc5d33b9..ab4357d02d 100644 --- a/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters +++ b/modules/spreadsheet/src/c/nlsSpreadsheet.vcxproj.filters @@ -62,6 +62,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -106,5 +115,14 @@ Header Files + + Header Files + + + Header Files + + + Header Files + \ No newline at end of file diff --git a/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp b/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp new file mode 100644 index 0000000000..e4ef415552 --- /dev/null +++ b/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp @@ -0,0 +1,243 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include "CSVTypeConverters.hpp" +#include +#include +//============================================================================= +namespace Nelson { +//============================================================================= +void +ConvertStringToDouble(const std::string& pStr, double& Val) +{ + if (ConvertToDouble(pStr, Val)) { + return; + } + Val = std::nan("NaN"); +} +//============================================================================= +void +ConvertStringToSingle(const std::string& pStr, single& Val) +{ + double value; + if (ConvertToDouble(pStr, value)) { + Val = (single)value; + return; + } + Val = (single)std::nan("NaN"); +} +//============================================================================= +void +ConvertStringToDoubleComplex(const std::string& pStr, std::complex& pVal) +{ + if (ConvertToDoubleComplex(pStr, pVal)) { + return; + } + double value; + if (ConvertToDouble(pStr, value)) { + pVal.real(value); + pVal.imag(0.); + return; + } + pVal.real(std::nan("NaN")); + pVal.imag(0.); +} +//============================================================================= +void +ConvertStringToSingleComplex(const std::string& pStr, std::complex& Val) +{ + std::complex value; + ConvertStringToDoubleComplex(pStr, value); + Val = (std::complex)value; +} +//============================================================================= +template +void +ConvertStringToInteger(const std::string& pStr, T& Val) +{ + double dval; + if (ConvertToDouble(pStr, dval)) { + if (std::isnan(dval)) { + Val = std::numeric_limits::min(); + return; + } + if (std::isinf(dval)) { + if (dval > 0) { + Val = std::numeric_limits::max(); + } else { + Val = std::numeric_limits::min(); + } + return; + } + if (dval < static_cast(std::numeric_limits::min())) { + Val = std::numeric_limits::min(); + return; + } + if (dval > static_cast(std::numeric_limits::max())) { + Val = std::numeric_limits::max(); + return; + } + Val = static_cast(dval); + return; + } + Val = std::numeric_limits::min(); +} +//============================================================================= +void +ConvertStringToInt8(const std::string& pStr, int8& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToInt16(const std::string& pStr, int16& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToInt32(const std::string& pStr, int32& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToInt64(const std::string& pStr, int64& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToUInt8(const std::string& pStr, uint8& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToUInt16(const std::string& pStr, uint16& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToUInt32(const std::string& pStr, uint32& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +void +ConvertStringToUInt64(const std::string& pStr, uint64& Val) +{ + ConvertStringToInteger(pStr, Val); +} +//============================================================================= +struct ComplexPatterns +{ + // Regex for special values (Inf, NaN) + static inline const std::string special_re = R"((?:[Nn][Aa][Nn]|[Ii][Nn][Ff]))"; + + // Full regex patterns combining numbers and special values + static inline const std::regex full_complex { R"(([+-]?(?:\d*\.?\d+|)" + special_re + + R"())([+-](?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; + static inline const std::regex real_only { + R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())(?![ij]))", std::regex::optimize + }; + static inline const std::regex imag_only { R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())[ij])", + std::regex::optimize }; +}; +//============================================================================= +bool +ConvertToDouble(const std::string& pStr, double& pVal) +{ + fast_float::parse_options options { fast_float::chars_format::fortran }; + + const char* first = pStr.data(); + const char* last = pStr.data() + pStr.size(); + if (!pStr.empty() && pStr.front() == '+') { + first += 1; + } + + auto answer = fast_float::from_chars_advanced(first, last, pVal, options); + + if (answer.ec != std::errc() || answer.ptr != last) { + return false; + } + return true; +} +//============================================================================= +bool +ConvertToDoubleComplex(const std::string& str, std::complex& pVal) +{ + char lastChar = '\0'; + if (!str.empty()) { + lastChar = str.back(); + } + if ((lastChar != '\0') && lastChar == 'I' || lastChar == 'J' || lastChar == 'i' + || lastChar == 'j') { + std::smatch matches; + if (std::regex_match(str, matches, ComplexPatterns::full_complex)) { + bool isNegativeReal = false; + bool isNegativeImag = false; + std::string realStr = matches[1].str(); + std::string imagStr = matches[2].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); + } + + double realPart, imagPart; + + bool res = ConvertToDouble(realStr, realPart); + if (!res) { + return res; + } + res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return res; + } + if (isNegativeReal) { + realPart = -realPart; + } + if (isNegativeImag) { + imagPart = -imagPart; + } + pVal = { realPart, imagPart }; + return true; + } else if (std::regex_match(str, matches, ComplexPatterns::imag_only)) { + bool isNegativeImag = false; + std::string imagStr = matches[1].str(); + if (imagStr.front() == L'+' || imagStr.front() == L'-') { + if (imagStr.front() == L'-') { + isNegativeImag = true; + } + imagStr.erase(0, 1); + } + + double imagPart; + bool res = ConvertToDouble(imagStr, imagPart); + if (!res) { + return false; + } + if (isNegativeImag) { + imagPart = -imagPart; + } + pVal = { 0., imagPart }; + return true; + } else { + return false; + } + } + return false; +} +//============================================================================= +} +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/CSVTypeConverters.hpp b/modules/spreadsheet/src/cpp/CSVTypeConverters.hpp new file mode 100644 index 0000000000..67888755bf --- /dev/null +++ b/modules/spreadsheet/src/cpp/CSVTypeConverters.hpp @@ -0,0 +1,61 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include +#include +#include +//============================================================================= +namespace Nelson { +//============================================================================= +bool +ConvertToDouble(const std::string& pStr, double& pVal); +//============================================================================= +bool +ConvertToDoubleComplex(const std::string& str, std::complex& pVal); +//============================================================================= +void +ConvertStringToDouble(const std::string& pStr, double& Val); +//============================================================================= +void +ConvertStringToSingle(const std::string& pStr, single& Val); +//============================================================================= +void +ConvertStringToDoubleComplex(const std::string& pStr, std::complex& Val); +//============================================================================= +void +ConvertStringToSingleComplex(const std::string& pStr, std::complex & Val); +//============================================================================= +void +ConvertStringToInt8(const std::string& pStr, int8& Val); +//============================================================================= +void +ConvertStringToInt16(const std::string& pStr, int16& Val); +//============================================================================= +void +ConvertStringToInt32(const std::string& pStr, int32& Val); +//============================================================================= +void +ConvertStringToInt64(const std::string& pStr, int64& Val); +//============================================================================= +void +ConvertStringToUInt8(const std::string& pStr, uint8& Val); +//============================================================================= +void +ConvertStringToUInt16(const std::string& pStr, uint16& Val); +//============================================================================= +void +ConvertStringToUInt32(const std::string& pStr, uint32& Val); +//============================================================================= +void +ConvertStringToUInt64(const std::string& pStr, uint64& Val); +//============================================================================= +} +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/DetectImportOptions.cpp b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp index 28295ad657..85c74ae3a1 100644 --- a/modules/spreadsheet/src/cpp/DetectImportOptions.cpp +++ b/modules/spreadsheet/src/cpp/DetectImportOptions.cpp @@ -15,12 +15,13 @@ #include "DetectImportOptions.hpp" #include "i18n.hpp" #include "characters_encoding.hpp" +#include //============================================================================= namespace Nelson { //============================================================================= // Constants namespace { - const stringVector DELIMITERS = { ",", "\t", ";", "|" }; + const stringVector DELIMITERS = { ",", "\t", ";", "|", " " }; const stringVector LINE_ENDINGS = { "\r\n", "\n", "\r" }; const stringVector COMMENT_STYLES = { "%", "#", "//", "--" }; const size_t MIN_HEADER_SAMPLE = 5; @@ -358,12 +359,21 @@ isNumeric(const std::string& str) { if (str.empty()) return false; + fast_float::parse_options options { fast_float::chars_format::fortran }; - std::istringstream iss(str); - double value; - iss >> std::noskipws >> value; + double val; + const char* first = str.data(); + const char* last = str.data() + str.size(); + if (!str.empty() && str.front() == '+') { + first += 1; + } + + auto answer = fast_float::from_chars_advanced(first, last, val, options); - return iss.eof() && !iss.fail(); + if (answer.ec != std::errc() || answer.ptr != last) { + return false; + } + return true; } //============================================================================= static bool diff --git a/modules/spreadsheet/src/cpp/ReadCell.cpp b/modules/spreadsheet/src/cpp/ReadCell.cpp index 293172e60e..a1cdb58320 100644 --- a/modules/spreadsheet/src/cpp/ReadCell.cpp +++ b/modules/spreadsheet/src/cpp/ReadCell.cpp @@ -14,189 +14,54 @@ #include "ReadCell.hpp" #include "characters_encoding.hpp" #include "nlsBuildConfig.h" +#include "CSVTypeConverters.hpp" +#include "ReadLinesFromFile.hpp" //============================================================================= namespace Nelson { //============================================================================= -struct ComplexPatterns -{ - // Regex for special values (Inf, NaN) - static inline const std::string special_re = R"((?:[Nn][Aa][Nn]|[Ii][Nn][Ff]))"; - - // Full regex patterns combining numbers and special values - static inline const std::regex full_complex { R"(([+-]?(?:\d*\.?\d+|)" + special_re - + R"())([+-](?:\d*\.?\d+|)" + special_re + R"())[ij])", - std::regex::optimize }; - static inline const std::regex real_only { - R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())(?![ij]))", std::regex::optimize - }; - static inline const std::regex imag_only { R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())[ij])", - std::regex::optimize }; -}; -//============================================================================= -static bool -ConvertToDouble(const std::string& pStr, double& pVal) -{ - fast_float::parse_options options { fast_float::chars_format::fortran }; - - const char* first = pStr.data(); - const char* last = pStr.data() + pStr.size(); - if (!pStr.empty() && pStr.front() == '+') { - first += 1; - } - - auto answer = fast_float::from_chars_advanced(first, last, pVal, options); - - if (answer.ec != std::errc() || answer.ptr != last) { - return false; - } - return true; -} -//============================================================================= -static bool -ConvertToDoubleComplex(const std::string& str, std::complex& pVal) -{ - char lastChar = '\0'; - if (!str.empty()) { - lastChar = str.back(); - } - if ((lastChar != '\0') && lastChar == 'I' || lastChar == 'J' || lastChar == 'i' - || lastChar == 'j') { - std::smatch matches; - if (std::regex_match(str, matches, ComplexPatterns::full_complex)) { - bool isNegativeReal = false; - bool isNegativeImag = false; - std::string realStr = matches[1].str(); - std::string imagStr = matches[2].str(); - if (imagStr.front() == L'+' || imagStr.front() == L'-') { - if (imagStr.front() == L'-') { - isNegativeImag = true; - } - imagStr.erase(0, 1); - } - - double realPart, imagPart; - - bool res = ConvertToDouble(realStr, realPart); - if (!res) { - return res; - } - res = ConvertToDouble(imagStr, imagPart); - if (!res) { - return res; - } - if (isNegativeReal) { - realPart = -realPart; - } - if (isNegativeImag) { - imagPart = -imagPart; - } - pVal = { realPart, imagPart }; - return true; - } else if (std::regex_match(str, matches, ComplexPatterns::imag_only)) { - bool isNegativeImag = false; - std::string imagStr = matches[1].str(); - if (imagStr.front() == L'+' || imagStr.front() == L'-') { - if (imagStr.front() == L'-') { - isNegativeImag = true; - } - imagStr.erase(0, 1); - } - - double imagPart; - bool res = ConvertToDouble(imagStr, imagPart); - if (!res) { - return false; - } - if (isNegativeImag) { - imagPart = -imagPart; - } - pVal = { 0., imagPart }; - return true; - } else { - return false; - } - } else { - double valueReal; - bool res = ConvertToDouble(str, valueReal); - if (res) { - pVal = { valueReal, 0 }; - return true; - } - } - return false; -} -//============================================================================= static void ConvertToArrayOfCharacter(const std::string& pStr, ArrayOf& pVal) { - std::complex value; - if (ConvertToDoubleComplex(pStr, value)) { - if (value.imag() == 0) { - pVal = ArrayOf::doubleConstructor(value.real()); - } else { - pVal = ArrayOf::dcomplexConstructor(value.real(), value.imag()); - } - } else { - if (pStr == "") { - Dimensions dims(1, 1); - pVal = ArrayOf::stringArrayConstructorAllMissing(dims); + double value; + if (ConvertToDouble(pStr, value)) { + pVal = ArrayOf::doubleConstructor(value); + return; + } + std::complex cvalue; + if (ConvertToDoubleComplex(pStr, cvalue)) { + if (cvalue.imag() == 0) { + pVal = ArrayOf::doubleConstructor(cvalue.real()); } else { - pVal = ArrayOf::characterArrayConstructor(pStr); + pVal = ArrayOf::dcomplexConstructor(cvalue.real(), cvalue.imag()); } + return; + } + if (pStr == "") { + Dimensions dims(1, 1); + pVal = ArrayOf::stringArrayConstructorAllMissing(dims); + return; } + pVal = ArrayOf::characterArrayConstructor(pStr); } //============================================================================= static void ConvertToArrayOfString(const std::string& pStr, ArrayOf& pVal) { - std::complex value; - if (ConvertToDoubleComplex(pStr, value)) { - if (value.imag() == 0) { - pVal = ArrayOf::doubleConstructor(value.real()); - } else { - pVal = ArrayOf::dcomplexConstructor(value.real(), value.imag()); - } - } else { - pVal = ArrayOf::stringArrayConstructor(pStr); + double value; + if (ConvertToDouble(pStr, value)) { + pVal = ArrayOf::doubleConstructor(value); + return; } -} -//============================================================================= -static std::stringstream -readLinesFromFile(const std::wstring& filename, const detectImportOptions& options) -{ - std::ifstream file; -#ifdef _MSC_VER - file.open(filename); -#else - file.open(wstring_to_utf8(filename)); -#endif - - std::string line; - int currentLine = 1; - std::stringstream normalizedStream; - - while (currentLine < (int)options.DataLines[0] && std::getline(file, line)) { - currentLine++; - } - - auto normalizeLineEnding = [](const std::string& inputLine) -> std::string { - std::string normalized = inputLine; - normalized.erase(std::remove(normalized.begin(), normalized.end(), '\r'), normalized.end()); - return normalized; - }; - - if (std::isinf(options.DataLines[1])) { - while (std::getline(file, line)) { - normalizedStream << normalizeLineEnding(line) << '\n'; - currentLine++; - } - } else { - while (currentLine <= (int)options.DataLines[1] && std::getline(file, line)) { - normalizedStream << normalizeLineEnding(line) << '\n'; - currentLine++; + std::complex cvalue; + if (ConvertToDoubleComplex(pStr, cvalue)) { + if (cvalue.imag() == 0) { + pVal = ArrayOf::doubleConstructor(cvalue.real()); + } else { + pVal = ArrayOf::dcomplexConstructor(cvalue.real(), cvalue.imag()); } + return; } - return normalizedStream; + pVal = ArrayOf::stringArrayConstructor(pStr); } //============================================================================= ArrayOf diff --git a/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp b/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp new file mode 100644 index 0000000000..7e3aafa901 --- /dev/null +++ b/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp @@ -0,0 +1,54 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include +#include "ReadLinesFromFile.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +std::stringstream +readLinesFromFile(const std::wstring& filename, const detectImportOptions& options) +{ + std::ifstream file; +#ifdef _MSC_VER + file.open(filename); +#else + file.open(wstring_to_utf8(filename)); +#endif + + std::string line; + int currentLine = 1; + std::stringstream normalizedStream; + + while (currentLine < (int)options.DataLines[0] && std::getline(file, line)) { + currentLine++; + } + + auto normalizeLineEnding = [](const std::string& inputLine) -> std::string { + std::string normalized = inputLine; + normalized.erase(std::remove(normalized.begin(), normalized.end(), '\r'), normalized.end()); + return normalized; + }; + + if (std::isinf(options.DataLines[1])) { + while (std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } else { + while (currentLine <= (int)options.DataLines[1] && std::getline(file, line)) { + normalizedStream << normalizeLineEnding(line) << '\n'; + currentLine++; + } + } + return normalizedStream; +} +//============================================================================= +} +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/ReadLinesFromFile.hpp b/modules/spreadsheet/src/cpp/ReadLinesFromFile.hpp new file mode 100644 index 0000000000..243adf4ca2 --- /dev/null +++ b/modules/spreadsheet/src/cpp/ReadLinesFromFile.hpp @@ -0,0 +1,21 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include +#include "DetectImportOptions.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +std::stringstream +readLinesFromFile(const std::wstring& filename, const detectImportOptions& options); +//============================================================================= +} +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/ReadMatrix.cpp b/modules/spreadsheet/src/cpp/ReadMatrix.cpp new file mode 100644 index 0000000000..78920037cb --- /dev/null +++ b/modules/spreadsheet/src/cpp/ReadMatrix.cpp @@ -0,0 +1,270 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#include +#include +#include +#include "ReadMatrix.hpp" +#include "characters_encoding.hpp" +#include "nlsBuildConfig.h" +#if WITH_OPENMP +#include +#endif +#include "CSVTypeConverters.hpp" +#include "ReadLinesFromFile.hpp" +//============================================================================= +namespace Nelson { +//============================================================================= +ArrayOf +ReadMatrix(const std::wstring& filename, const detectImportOptions& options, NelsonType OutputType, + std::string& errorMessage) +{ + char separator = options.Delimiter[0][0]; + bool pHasCR = false; + rapidcsv::SeparatorParams separatorParams + = rapidcsv::SeparatorParams(separator, true, pHasCR, false, false); + + rapidcsv::ConverterParams converterParams; + converterParams.mHasDefaultConverter = false; + converterParams.mNumericLocale = false; + + rapidcsv::LineReaderParams lineReaderParams; + lineReaderParams.mSkipCommentLines = !options.CommentStyle.empty(); + if (options.CommentStyle.empty()) { + lineReaderParams.mCommentPrefix = '\0'; + lineReaderParams.mSkipCommentLines = false; + } else { + lineReaderParams.mCommentPrefix = options.CommentStyle[0][0]; + lineReaderParams.mSkipCommentLines = true; + } + lineReaderParams.mSkipEmptyLines = options.EmptyLineRule == "skip"; + + rapidcsv::LabelParams labelParams(-1, -1); + try { + std::stringstream stream = readLinesFromFile(filename, options); + rapidcsv::Document doc( + stream, labelParams, separatorParams, converterParams, lineReaderParams); + stringVector columnNames = doc.GetColumnNames(); + stringVector rowNames = doc.GetRowNames(); + size_t nbRows = doc.GetRowCount(); + size_t nbColumns = doc.GetColumnCount(); + size_t nbElements = nbRows + * (options.VariableNames.size() > nbColumns ? options.VariableNames.size() : nbColumns); + + ArrayOf* elements = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, nbElements); + Dimensions dims(nbRows, + options.VariableNames.size() > nbColumns ? options.VariableNames.size() : nbColumns); + ArrayOf result = ArrayOf(NLS_CELL_ARRAY, dims, elements); + + ompIndexType nbAvailableElements = (ompIndexType)(nbColumns * nbRows); + + switch (OutputType) { + case NLS_DOUBLE: { + if (doc.IsIJLastChar()) { + std::complex* ptr = (std::complex*)ArrayOf::allocateArrayOf( + NLS_DCOMPLEX, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_DCOMPLEX, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] + = doc.GetCell>(i, j, ConvertStringToDoubleComplex); + } + return matrix; + } else { + double* ptr = (double*)ArrayOf::allocateArrayOf(NLS_DOUBLE, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_DOUBLE, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToDouble); + } + + return matrix; + } + } break; + case NLS_SINGLE: { + if (doc.IsIJLastChar()) { + std::complex* ptr = (std::complex*)ArrayOf::allocateArrayOf( + NLS_SCOMPLEX, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_SCOMPLEX, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] + = doc.GetCell>(i, j, ConvertStringToSingleComplex); + } + return matrix; + } else { + single* ptr = (single*)ArrayOf::allocateArrayOf(NLS_SINGLE, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_SINGLE, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToSingle); + } + return matrix; + } + } break; + case NLS_STRING_ARRAY: { + ArrayOf* ptr + = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_STRING_ARRAY, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_STRING_ARRAY, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = ArrayOf::characterArrayConstructor(doc.GetCell(i, j)); + } + return matrix; + + } break; + case NLS_CELL_ARRAY: { + ArrayOf* ptr + = (ArrayOf*)ArrayOf::allocateArrayOf(NLS_CELL_ARRAY, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_CELL_ARRAY, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = ArrayOf::characterArrayConstructor(doc.GetCell(i, j)); + } + return matrix; + } break; + case NLS_INT8: { + int8* ptr = (int8*)ArrayOf::allocateArrayOf(NLS_INT8, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_INT8, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToInt8); + } + return matrix; + } break; + case NLS_INT16: { + int16* ptr = (int16*)ArrayOf::allocateArrayOf(NLS_INT16, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_INT16, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToInt16); + } + return matrix; + } break; + case NLS_INT32: { + int32* ptr = (int32*)ArrayOf::allocateArrayOf(NLS_INT32, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_INT32, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToInt32); + } + return matrix; + } break; + case NLS_INT64: { + int64* ptr = (int64*)ArrayOf::allocateArrayOf(NLS_INT64, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_INT64, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToInt64); + } + return matrix; + } break; + case NLS_UINT8: { + uint8* ptr = (uint8*)ArrayOf::allocateArrayOf(NLS_UINT8, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_UINT8, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToUInt8); + } + return matrix; + } break; + case NLS_UINT16: { + uint16* ptr = (uint16*)ArrayOf::allocateArrayOf(NLS_UINT16, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_UINT16, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToUInt16); + } + return matrix; + } break; + case NLS_UINT32: { + uint32* ptr = (uint32*)ArrayOf::allocateArrayOf(NLS_UINT32, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_UINT32, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToUInt32); + } + return matrix; + } break; + case NLS_UINT64: { + uint64* ptr = (uint64*)ArrayOf::allocateArrayOf(NLS_UINT64, dims.getElementCount()); + ArrayOf matrix = ArrayOf(NLS_UINT64, dims, ptr); +#if WITH_OPENMP +#pragma omp parallel for +#endif + for (ompIndexType index = 0; index < nbAvailableElements; ++index) { + size_t i = index / nbRows; + size_t j = index % nbRows; + ptr[index] = doc.GetCell(i, j, ConvertStringToUInt64); + } + return matrix; + } break; + default: { + } break; + } + } catch (const std::exception& e) { + errorMessage = e.what(); + } + return {}; +} +//============================================================================= +} // namespace Nelson +//============================================================================= diff --git a/modules/spreadsheet/src/cpp/ReadTable.cpp b/modules/spreadsheet/src/cpp/ReadTable.cpp index 6debdfc887..0a8381ec2d 100644 --- a/modules/spreadsheet/src/cpp/ReadTable.cpp +++ b/modules/spreadsheet/src/cpp/ReadTable.cpp @@ -17,6 +17,8 @@ #if WITH_OPENMP #include #endif +#include "CSVTypeConverters.hpp" +#include "ReadLinesFromFile.hpp" //============================================================================= namespace Nelson { //============================================================================= @@ -28,108 +30,6 @@ struct DoubleDoubleComplexString NelsonType nelsonType; }; //============================================================================= -struct ComplexPatterns -{ - // Regex for special values (Inf, NaN) - static inline const std::string special_re = R"((?:[Nn][Aa][Nn]|[Ii][Nn][Ff]))"; - - // Full regex patterns combining numbers and special values - static inline const std::regex full_complex { R"(([+-]?(?:\d*\.?\d+|)" + special_re - + R"())([+-](?:\d*\.?\d+|)" + special_re + R"())[ij])", - std::regex::optimize }; - static inline const std::regex real_only { - R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())(?![ij]))", std::regex::optimize - }; - static inline const std::regex imag_only { R"(([+-]?(?:\d*\.?\d+|)" + special_re + R"())[ij])", - std::regex::optimize }; -}; -//============================================================================= -static bool -ConvertToDouble(const std::string& pStr, double& pVal) -{ - fast_float::parse_options options { fast_float::chars_format::fortran }; - - const char* first = pStr.data(); - const char* last = pStr.data() + pStr.size(); - if (!pStr.empty() && pStr.front() == '+') { - first += 1; - } - - auto answer = fast_float::from_chars_advanced(first, last, pVal, options); - - if (answer.ec != std::errc() || answer.ptr != last) { - return false; - } - return true; -} -//============================================================================= -static bool -ConvertToDoubleComplex(const std::string& str, std::complex& pVal) -{ - char lastChar = '\0'; - if (!str.empty()) { - lastChar = str.back(); - } - if ((lastChar != '\0') && lastChar == 'I' || lastChar == 'J' || lastChar == 'i' - || lastChar == 'j') { - std::smatch matches; - if (std::regex_match(str, matches, ComplexPatterns::full_complex)) { - bool isNegativeReal = false; - bool isNegativeImag = false; - std::string realStr = matches[1].str(); - std::string imagStr = matches[2].str(); - if (imagStr.front() == L'+' || imagStr.front() == L'-') { - if (imagStr.front() == L'-') { - isNegativeImag = true; - } - imagStr.erase(0, 1); - } - - double realPart, imagPart; - - bool res = ConvertToDouble(realStr, realPart); - if (!res) { - return res; - } - res = ConvertToDouble(imagStr, imagPart); - if (!res) { - return res; - } - if (isNegativeReal) { - realPart = -realPart; - } - if (isNegativeImag) { - imagPart = -imagPart; - } - pVal = { realPart, imagPart }; - return true; - } else if (std::regex_match(str, matches, ComplexPatterns::imag_only)) { - bool isNegativeImag = false; - std::string imagStr = matches[1].str(); - if (imagStr.front() == L'+' || imagStr.front() == L'-') { - if (imagStr.front() == L'-') { - isNegativeImag = true; - } - imagStr.erase(0, 1); - } - - double imagPart; - bool res = ConvertToDouble(imagStr, imagPart); - if (!res) { - return false; - } - if (isNegativeImag) { - imagPart = -imagPart; - } - pVal = { 0., imagPart }; - return true; - } else { - return false; - } - } - return false; -} -//============================================================================= static void ConvertToArrayOfCharacter(const std::string& pStr, struct DoubleDoubleComplexString& structValue) { @@ -153,44 +53,6 @@ ConvertToArrayOfCharacter(const std::string& pStr, struct DoubleDoubleComplexStr structValue.nelsonType = NLS_CHAR; } //============================================================================= -static std::stringstream -readLinesFromFile(const std::wstring& filename, const detectImportOptions& options) -{ - std::ifstream file; -#ifdef _MSC_VER - file.open(filename); -#else - file.open(wstring_to_utf8(filename)); -#endif - - std::string line; - int currentLine = 1; - std::stringstream normalizedStream; - - while (currentLine < (int)options.DataLines[0] && std::getline(file, line)) { - currentLine++; - } - - auto normalizeLineEnding = [](const std::string& inputLine) -> std::string { - std::string normalized = inputLine; - normalized.erase(std::remove(normalized.begin(), normalized.end(), '\r'), normalized.end()); - return normalized; - }; - - if (std::isinf(options.DataLines[1])) { - while (std::getline(file, line)) { - normalizedStream << normalizeLineEnding(line) << '\n'; - currentLine++; - } - } else { - while (currentLine <= (int)options.DataLines[1] && std::getline(file, line)) { - normalizedStream << normalizeLineEnding(line) << '\n'; - currentLine++; - } - } - return normalizedStream; -} -//============================================================================= ArrayOf ReadTable( const std::wstring& filename, const detectImportOptions& options, std::string& errorMessage) @@ -323,4 +185,4 @@ ReadTable( } //============================================================================= } // namespace Nelson - //============================================================================= +//============================================================================= diff --git a/modules/spreadsheet/src/include/ReadMatrix.hpp b/modules/spreadsheet/src/include/ReadMatrix.hpp new file mode 100644 index 0000000000..b25ae48ecb --- /dev/null +++ b/modules/spreadsheet/src/include/ReadMatrix.hpp @@ -0,0 +1,21 @@ +//============================================================================= +// Copyright (c) 2016-present Allan CORNET (Nelson) +//============================================================================= +// This file is part of the Nelson. +//============================================================================= +// LICENCE_BLOCK_BEGIN +// SPDX-License-Identifier: LGPL-3.0-or-later +// LICENCE_BLOCK_END +//============================================================================= +#pragma once +//============================================================================= +#include "ArrayOf.hpp" +#include "nlsSpreadsheet_exports.h" +#include "DetectImportOptions.hpp" +//============================================================================= +namespace Nelson { +NLSSPREADSHEET_IMPEXP ArrayOf +ReadMatrix(const std::wstring& filename, const detectImportOptions& options, + NelsonType OutputType, std::string& errorMessage); +}; +//============================================================================= diff --git a/modules/spreadsheet/tests/readmatrix_1.csv b/modules/spreadsheet/tests/readmatrix_1.csv new file mode 100644 index 0000000000..ec5b2a562d --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_1.csv @@ -0,0 +1,5 @@ +6,8,3,1 +5,4,7,3 +1,6,7,10 +4,2,8,2 +2,7,5,9 \ No newline at end of file diff --git a/modules/spreadsheet/tests/readmatrix_2.csv b/modules/spreadsheet/tests/readmatrix_2.csv new file mode 100644 index 0000000000..1baca66339 --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_2.csv @@ -0,0 +1,5 @@ +6,8,3,1 +5,4,7,3 +1,6,7,10 +4,2.4+5.6i,8,2 +2,7,5,9 \ No newline at end of file diff --git a/modules/spreadsheet/tests/readmatrix_3.csv b/modules/spreadsheet/tests/readmatrix_3.csv new file mode 100644 index 0000000000..c958cd1b08 --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_3.csv @@ -0,0 +1,6 @@ +Row,Age,Height,Weight,BloodPressure_1,BloodPressure_2 +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119,122,80 diff --git a/modules/spreadsheet/tests/readmatrix_4.csv b/modules/spreadsheet/tests/readmatrix_4.csv new file mode 100644 index 0000000000..1737bd79d0 --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_4.csv @@ -0,0 +1,6 @@ +Row,Age,Height,Weight,BloodPressure_1,BloodPressure_2 +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119i,122,80 diff --git a/modules/spreadsheet/tests/readmatrix_5.csv b/modules/spreadsheet/tests/readmatrix_5.csv new file mode 100644 index 0000000000..c958cd1b08 --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_5.csv @@ -0,0 +1,6 @@ +Row,Age,Height,Weight,BloodPressure_1,BloodPressure_2 +Smith,38,71,176,124,93 +Johnson,43,69,163,109,77 +Williams,38,64,131,125,83 +Jones,40,67,133,117,75 +Brown,49,64,119,122,80 diff --git a/modules/spreadsheet/tests/readmatrix_6.csv b/modules/spreadsheet/tests/readmatrix_6.csv new file mode 100644 index 0000000000..a2463cf24a --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_6.csv @@ -0,0 +1,2 @@ +Nan 1 Nan +-Inf Inf 4 diff --git a/modules/spreadsheet/tests/readmatrix_7.csv b/modules/spreadsheet/tests/readmatrix_7.csv new file mode 100644 index 0000000000..e9e4ac2c42 --- /dev/null +++ b/modules/spreadsheet/tests/readmatrix_7.csv @@ -0,0 +1,2 @@ +Nan 1.3i Nan +-Inf Inf -4e3 diff --git a/modules/spreadsheet/tests/test_readmatrix.m b/modules/spreadsheet/tests/test_readmatrix.m new file mode 100644 index 0000000000..5cc57a8f76 --- /dev/null +++ b/modules/spreadsheet/tests/test_readmatrix.m @@ -0,0 +1,64 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +assert_isequal(nargin('readmatrix'), 1); +assert_isequal(nargout('readmatrix'), 1); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_1.csv']; +R = readmatrix(csv_filename); +REF = [ 6 8 3 1; +5 4 7 3; +1 6 7 10; +4 2 8 2; +2 7 5 9]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_2.csv']; +R = readmatrix(csv_filename); +REF = [ 6.0000 + 0.0000i 8.0000 + 0.0000i 3.0000 + 0.0000i 1.0000 + 0.0000i; +5.0000 + 0.0000i 4.0000 + 0.0000i 7.0000 + 0.0000i 3.0000 + 0.0000i; +1.0000 + 0.0000i 6.0000 + 0.0000i 7.0000 + 0.0000i 10.0000 + 0.0000i; +4.0000 + 0.0000i 2.4000 + 5.6000i 8.0000 + 0.0000i 2.0000 + 0.0000i; +2.0000 + 0.0000i 7.0000 + 0.0000i 5.0000 + 0.0000i 9.0000 + 0.0000i]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_3.csv']; +R = readmatrix(csv_filename); +REF = [NaN 38 71 176 124 93; + NaN 43 69 163 109 77; + NaN 38 64 131 125 83; + NaN 40 67 133 117 75; + NaN 49 64 119 122 80]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_4.csv']; +R = readmatrix(csv_filename); +REF = [complex(NaN, 0) 0.3800 + 0.0000i 0.7100 + 0.0000i 1.7600 + 0.0000i 1.2400 + 0.0000i 0.9300 + 0.0000i; +complex(NaN, 0) 0.4300 + 0.0000i 0.6900 + 0.0000i 1.6300 + 0.0000i 1.0900 + 0.0000i 0.7700 + 0.0000i; +complex(NaN, 0) 0.3800 + 0.0000i 0.6400 + 0.0000i 1.3100 + 0.0000i 1.2500 + 0.0000i 0.8300 + 0.0000i; +complex(NaN, 0) 0.4000 + 0.0000i 0.6700 + 0.0000i 1.3300 + 0.0000i 1.1700 + 0.0000i 0.7500 + 0.0000i; +complex(NaN, 0) 0.4900 + 0.0000i 0.6400 + 0.0000i 0.0000 + 1.1900i 1.2200 + 0.0000i 0.8000 + 0.0000i]; +assert_isequal(real(R)/ 1.0e+02 , real(REF)); +assert_isequal(imag(R)/ 1.0e+02 , imag(REF)); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_5.csv']; +R = readmatrix(csv_filename); +REF = [NaN 38 71 176 124 93; +NaN 43 69 163 109 77; +NaN 38 64 131 125 83; +NaN 40 67 133 117 75; +NaN 49 64 119 122 80]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_6.csv']; +R = readmatrix(csv_filename); +REF = [ NaN 1 NaN; +-Inf Inf 4]; +assert_isequal(R, REF); +%============================================================================= From 8583a0b9d86ebc2c89bf6ab2d7a7e31ae685a1d7 Mon Sep 17 00:00:00 2001 From: Allan CORNET Date: Sun, 8 Dec 2024 19:41:01 +0100 Subject: [PATCH 4/4] readmatrix function --- CHANGELOG.md | 3 +- .../help/en_US/xml/detectImportOptions.xml | 16 +++ .../spreadsheet/help/en_US/xml/readmatrix.xml | 123 ++++++++++++++++++ .../spreadsheet/src/cpp/CSVTypeConverters.cpp | 9 +- .../spreadsheet/src/cpp/ReadLinesFromFile.cpp | 1 + .../tests/bench_write_read_matrix.m | 16 +++ modules/spreadsheet/tests/test_readmatrix.m | 57 ++++++++ 7 files changed, 221 insertions(+), 4 deletions(-) create mode 100644 modules/spreadsheet/help/en_US/xml/readmatrix.xml create mode 100644 modules/spreadsheet/tests/bench_write_read_matrix.m diff --git a/CHANGELOG.md b/CHANGELOG.md index e2c48f9f56..4f3265ce42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `detectImportOptions`: Generate import options from the file's content. -- `readcell`: Read cell array from file. - `readtable`: Read table from file. - `writetable`: Write table to file. +- `readcell`: Read cell array from file. - `writecell`: write cell array to file. +- `readmatrix`: read matrix from file. - `writematrix`: write matrix to file. - `csvread`: Read comma-separated value (CSV) file. - `csvwrite`: Write comma-separated value (CSV) file. diff --git a/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml b/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml index 2d03e87a02..26b9966abd 100644 --- a/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml +++ b/modules/spreadsheet/help/en_US/xml/detectImportOptions.xml @@ -35,6 +35,22 @@ >readmatrix to control how Nelson imports data as a table, cell array, or matrix.

The type of the returned options object depends on the file's extension.

+ +

+

Properties:

+

Delimiter: Field delimiter characters. example: {','}

+

LineEnding: End-of-line characters. example: {'\r\n'}

+

CommentStyle: Style of comments. example: {'#'}

+

EmptyLineRule: Procedure to handle empty lines. example: 'skip'

+

VariableNamesLine: Variable names location. example: 1

+

VariableNames: Variable names. example: {'Names' 'Age' 'Height' 'Weight'}

+

RowNamesColumn: Row names location. example: 0

+

DataLines: Data location, [l1 l2] Indicate the range of lines containing the data. l1 refers to the first line with data, while l2 refers to the last line. example: [2 Inf]

+ diff --git a/modules/spreadsheet/help/en_US/xml/readmatrix.xml b/modules/spreadsheet/help/en_US/xml/readmatrix.xml new file mode 100644 index 0000000000..e7133b79e5 --- /dev/null +++ b/modules/spreadsheet/help/en_US/xml/readmatrix.xml @@ -0,0 +1,123 @@ + + + SAME AS NELSON SOFTWARE + + en_US + readmatrix + Create matrix array from file. + + + M = readmatrix(filename) + M = readmatrix(filename, opts) + M = readmatrix(filename, opts, 'OutputType', type) + + + + + + filename + a string: an existing filename source. + + + + opts + DelimitedTextImportOptions object + + + + type + a string: 'double', 'single', 'char', 'string', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'. + + + + + + + M + a matrix. + + + + + +

M = readmatrix(filename) creates a matrix array by importing column-oriented data from a text or spreadsheet file.

+

M = readmatrix(filename, opts) creates a matrix array using the settings defined in the opts import options object. The import options object allows you to customize how readmatrix interprets the file, offering greater control, improved performance, and the ability to reuse the configuration compared to the default syntax.

+
+ + + + + + + + nelson + + +filename = [tempdir,'readmatrix_1.csv']; +Names = {'John'; 'Alice'; 'Bob'; 'Diana'}; +Age = [28; 34; 22; 30]; +Height = [175; 160; 180; 165]; +Weight = [70; 55; 80; 60]; +T = table(Names, Age, Height, Weight); +writetable(T, filename) +M = readmatrix(filename) + + + + + + nelson + + +filename = [tempdir,'readmatrix_2.csv']; +M = magic(6); +writematrix(M, filename) +options = detectImportOptions(filename) +options.DataLines = [2 4]; +M2 = readmatrix(filename, options, 'OutputType', 'int64') +M3 = readmatrix(filename, options, 'OutputType', 'char') + + + + + + + + + writematrix + + + detectImportOptions + + + writetable + + + readtable + + + fileread + + + + + + 1.10.0 + initial version + + + + + Allan CORNET + +
diff --git a/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp b/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp index e4ef415552..bcfb4041ed 100644 --- a/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp +++ b/modules/spreadsheet/src/cpp/CSVTypeConverters.cpp @@ -64,7 +64,7 @@ ConvertStringToInteger(const std::string& pStr, T& Val) double dval; if (ConvertToDouble(pStr, dval)) { if (std::isnan(dval)) { - Val = std::numeric_limits::min(); + Val = static_cast(0.); return; } if (std::isinf(dval)) { @@ -86,7 +86,7 @@ ConvertStringToInteger(const std::string& pStr, T& Val) Val = static_cast(dval); return; } - Val = std::numeric_limits::min(); + Val = static_cast(0.); } //============================================================================= void @@ -156,11 +156,14 @@ struct ComplexPatterns bool ConvertToDouble(const std::string& pStr, double& pVal) { + if (pStr.empty()) { + return false; + } fast_float::parse_options options { fast_float::chars_format::fortran }; const char* first = pStr.data(); const char* last = pStr.data() + pStr.size(); - if (!pStr.empty() && pStr.front() == '+') { + if (pStr[0] == '+') { first += 1; } diff --git a/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp b/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp index 19e0f077fa..f183840869 100644 --- a/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp +++ b/modules/spreadsheet/src/cpp/ReadLinesFromFile.cpp @@ -8,6 +8,7 @@ // LICENCE_BLOCK_END //============================================================================= #include +#include #include "ReadLinesFromFile.hpp" #include "characters_encoding.hpp" //============================================================================= diff --git a/modules/spreadsheet/tests/bench_write_read_matrix.m b/modules/spreadsheet/tests/bench_write_read_matrix.m new file mode 100644 index 0000000000..9a7540d867 --- /dev/null +++ b/modules/spreadsheet/tests/bench_write_read_matrix.m @@ -0,0 +1,16 @@ +%============================================================================= +% Copyright (c) 2016-present Allan CORNET (Nelson) +%============================================================================= +% This file is part of the Nelson. +%============================================================================= +% LICENCE_BLOCK_BEGIN +% SPDX-License-Identifier: LGPL-3.0-or-later +% LICENCE_BLOCK_END +%============================================================================= +rng(5489); +filename = [tempdir(), 'bench_write_read_matrix.csv']; +A = rand(3000, 3000); +tic();writematrix(A, filename);toc() +options = detectImportOptions(filename); +tic();B = readmatrix(filename,options);toc() +isapprox(A, B, 1e-3) diff --git a/modules/spreadsheet/tests/test_readmatrix.m b/modules/spreadsheet/tests/test_readmatrix.m index 5cc57a8f76..0064708832 100644 --- a/modules/spreadsheet/tests/test_readmatrix.m +++ b/modules/spreadsheet/tests/test_readmatrix.m @@ -62,3 +62,60 @@ -Inf Inf 4]; assert_isequal(R, REF); %============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_7.csv']; +R = readmatrix(csv_filename); +REF = [ complex(NaN,0) 0.0000 + 0.0013i complex(NaN,0) +complex(-Inf, 0) complex(Inf,0) -4.0000 + 0.0000i] * 1e3; +assert_isequal(real(R), real(REF)); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_3.csv']; +options = detectImportOptions(csv_filename); +options.DataLines = [3 5]; +R = readmatrix(csv_filename, options); +REF = [NaN 43 69 163 109 77; +NaN 38 64 131 125 83; +NaN 40 67 133 117 75]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_3.csv']; +options = detectImportOptions(csv_filename); +options.DataLines = [3 5]; +R = readmatrix(csv_filename, options, 'OutputType', 'char'); +REF = {'Johnson', '43', '69', '163', '109', '77'; +'Williams', '38', '64', '131', '125', '83'; +'Jones', '40', '67', '133', '117', '75'}; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_3.csv']; +options = detectImportOptions(csv_filename); +options.DataLines = [3 5]; +R = readmatrix(csv_filename, options, 'OutputType', 'string'); +REF = ["Johnson" "43" "69" "163" "109" "77"; +"Williams" "38" "64" "131" "125" "83"; +"Jones" "40" "67" "133" "117" "75"]; +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_3.csv']; +options = detectImportOptions(csv_filename); +options.DataLines = [3 5]; +R = readmatrix(csv_filename, options, 'OutputType', 'int8'); +REF = [ 0 43 69 127 109 77; +0 38 64 127 125 83; +0 40 67 127 117 75]; +REF = int8(REF); +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_6.csv']; +R = readmatrix(csv_filename, 'OutputType', 'int8'); +REF = [ 0 1 0; +-128 127 4]; +REF = int8(REF); +assert_isequal(R, REF); +%============================================================================= +csv_filename = [modulepath('spreadsheet'), '/tests/readmatrix_6.csv']; +R = readmatrix(csv_filename, 'OutputType', 'uint8'); +REF = [0 1 0; +0 255 4]; +REF = uint8(REF); +assert_isequal(R, REF); +%=============================================================================