diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c60cf605..2483dc798 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ set(CMAKE_CONFIGURATION_TYPES Release) option(NOAVX OFF) option(NOSSE3 OFF) option(NONEON OFF) +option(NOZLIB OFF) #------------------------------------------------------------------------------- # SSE MACROS @@ -305,16 +306,25 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") # LIBCurl support #------------------------------------------------------------------------------- find_package(CURL) -if(${CURL_FOUND} AND NOT APPLE) - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - set(DEFAULT_LIBRARIES crypto curl ssl) - else(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - set(DEFAULT_LIBRARIES dl crypto curl ssl) - endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") +if(${CURL_FOUND}) + #if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + # set(DEFAULT_LIBRARIES " crypto curl ssl") + #else(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + # set(DEFAULT_LIBRARIES "dl crypto curl ssl") + #endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + message(${CURL_LIBRARIES}) + string(APPEND DEFAULT_LIBRARIES " " ${CURL_LIBRARIES}) add_definitions (-D__HYPHYCURL__) -endif(${CURL_FOUND} AND NOT APPLE) - - +endif(${CURL_FOUND}) + +if(NOT NOZLIB) + find_package(ZLIB 1.2.9) + if(${ZLIB_FOUND}) + string(APPEND DEFAULT_LIBRARIES " " ${ZLIB_LIBRARIES}) + include_directories(${ZLIB_INCLUDE_DIRS}) + add_definitions (-D__ZLIB__) + endif(${ZLIB_FOUND}) +endif(NOT NOZLIB) #------------------------------------------------------------------------------- # uninstall target diff --git a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf index a6cb1601a..cbc1b80b2 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/FEL.bf @@ -286,7 +286,7 @@ selection.io.startTimer (fel.json [terms.json.timers], "FEL analysis", 2); function fel.apply_proportional_site_constraint (tree_name, node_name, alpha_parameter, beta_parameter, alpha_factor, beta_factor, branch_length) { fel.branch_length = (branch_length[terms.parameters.synonymous_rate])[terms.fit.MLE]; - + node_name = tree_name + "." + node_name; ExecuteCommands (" diff --git a/res/TemplateBatchFiles/libv3/tasks/trees.bf b/res/TemplateBatchFiles/libv3/tasks/trees.bf index 041214e3e..1e79ca8bc 100644 --- a/res/TemplateBatchFiles/libv3/tasks/trees.bf +++ b/res/TemplateBatchFiles/libv3/tasks/trees.bf @@ -383,6 +383,8 @@ lfunction trees.KillZeroBranches (tree, estimates, branch_set, zero_internal) { if ((estimates[branch])[^"terms.fit.MLE"] < 1e-10) { zero_internal + branch; } + } else { + zero_internal + branch; } } } diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 052131f5a..3db3d105b 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -2656,7 +2656,7 @@ void _ElementaryCommand::ExecuteCase4 (_ExecutionList& chain) { void _ElementaryCommand::ExecuteCase5 (_ExecutionList& chain) { chain.currentCommand++; - FILE* df; + hyFile* df; _String fName (*GetIthParameter(1)); _DataSet*ds; @@ -2679,7 +2679,7 @@ void _ElementaryCommand::ExecuteCase5 (_ExecutionList& chain) { } SetStatusLine ("Loading Data"); - df = doFileOpen (fName.get_str(),"rb"); + df = hyFile::openFile (fName.get_str(),"rb"); if (df==nil) { // try reading this file as a string formula fName = GetStringFromFormula ((_String*)parameters(1),chain.nameSpacePrefix); @@ -2689,7 +2689,7 @@ void _ElementaryCommand::ExecuteCase5 (_ExecutionList& chain) { return; } - df = doFileOpen (fName.get_str(),"rb"); + df = hyFile::openFile (fName.get_str(),"rb"); if (df==nil) { HandleApplicationError ((_String ("Could not find source dataset file ") & ((_String*)parameters(1))->Enquote('"') & " (resolved to '" & fName & "')\nPath stack:\n\t" & GetPathStack ("\n\t"))); @@ -2697,7 +2697,10 @@ void _ElementaryCommand::ExecuteCase5 (_ExecutionList& chain) { } } ds = ReadDataSetFile (df,0,nil,nil,chain.nameSpacePrefix?chain.nameSpacePrefix->GetName():nil); - fclose (df); + if (df) { + df->close(); + delete df; + } } } @@ -5099,7 +5102,7 @@ void ReadBatchFile (_String& fName, _ExecutionList& target) { FetchVar(LocateVarByName (optimizationPrecision))->SetValue(&precvalue); #endif*/ - FILE *f = doFileOpen (fName.get_str (), "rb"); + hyFile *f = hyFile::openFile (fName.get_str (), "rb"); SetStatusLine ("Parsing File"); if (!f) { HandleApplicationError (_String("Could not read batch file '") & fName & "'.\nPath stack:\n\t" & GetPathStack("\n\t")); @@ -5112,8 +5115,9 @@ void ReadBatchFile (_String& fName, _ExecutionList& target) { target.BuildList (source_file); target.sourceFile = fName; } - fclose (f); + f->close(); } + if (f) delete f; } diff --git a/src/core/dataset.cpp b/src/core/dataset.cpp index 09c8b32a1..319f1f0d0 100644 --- a/src/core/dataset.cpp +++ b/src/core/dataset.cpp @@ -1614,7 +1614,7 @@ void FilterRawString (_String& s, FileState* fs, _DataSet & ds) { //_________________________________________________________________________________________________ -void ProcessTree (FileState *fState, FILE* f, _StringBuffer& CurrentLine) { +void ProcessTree (FileState *fState, hyFile * f, _StringBuffer& CurrentLine) { // TODO SLKP 20180921 this does extra work to read in the tree string multiple times; // the solution is to have a proper buffer wrapper, and to @@ -1622,7 +1622,7 @@ void ProcessTree (FileState *fState, FILE* f, _StringBuffer& CurrentLine) { class _MultilineBuffer : public _StringBuffer { public: - _MultilineBuffer (_String const& current_line, FileState *fs, FILE* f) : _StringBuffer (current_line) { + _MultilineBuffer (_String const& current_line, FileState *fs, hyFile* f) : _StringBuffer (current_line) { file_state = fs; file = f; } @@ -1642,7 +1642,7 @@ void ProcessTree (FileState *fState, FILE* f, _StringBuffer& CurrentLine) { } FileState *file_state; - FILE * file; + hyFile * file; }; @@ -1809,7 +1809,7 @@ bool SkipLine (_StringBuffer& theLine, FileState* fS) { //_________________________________________________________ -void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) { +void ReadNextLine (hyFile * fp, _StringBuffer *s, FileState* fs, bool, bool upCase) { _StringBuffer tempBuffer (1024L); fs->currentFileLine ++; @@ -1817,7 +1817,7 @@ void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) char lastc; if (fp) { - lastc = getc_unlocked (fp); + lastc = fp->getc(); } else { lastc = fs->pInSrctheSource->length()?fs->theSource->char_at(fs->pInSrc++):0; } @@ -1825,12 +1825,12 @@ void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) if (fs->fileType != 3) { // not NEXUS - do not skip [..] if (fp) - while ( !feof_unlocked(fp) && lastc!=10 && lastc!=13 ) { + while ( !fp->feof() && lastc!=10 && lastc!=13 ) { if (lastc) { tempBuffer << lastc; } - lastc = getc_unlocked(fp); + lastc = fp->getc(); } else while (lastc && lastc!=10 && lastc!=13 ) { @@ -1843,7 +1843,7 @@ void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) lastc = toupper(lastc); } - while (((fp&&(!feof_unlocked(fp)))||(fs->theSource&&(fs->pInSrc<=fs->theSource->length ()))) && lastc!='\r' && lastc!='\n') { + while (((fp&&!fp->feof())||(fs->theSource&&(fs->pInSrc<=fs->theSource->length ()))) && lastc!='\r' && lastc!='\n') { if (lastc=='[') { if (fs->isSkippingInNEXUS) { ReportWarning ("Nested comments in NEXUS really shouldn't be used."); @@ -1862,9 +1862,9 @@ void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) if (fp) { if (upCase) { - lastc = toupper(fgetc(fp)); + lastc = toupper(fp->getc()); } else { - lastc = getc_unlocked(fp); + lastc = fp->getc(); } } else { if (upCase) { @@ -1883,7 +1883,7 @@ void ReadNextLine (FILE* fp, _StringBuffer *s, FileState* fs, bool, bool upCase) tempBuffer.TrimSpace(); - if ( (fp && feof_unlocked (fp)) || (fs->theSource && fs->pInSrc >= fs->theSource->length()) ) { + if ( (fp && fp->feof ()) || (fs->theSource && fs->pInSrc >= fs->theSource->length()) ) { if (tempBuffer.empty ()) { *s = ""; return; @@ -1918,7 +1918,7 @@ void TrimPhylipLine (_String& CurrentLine, _DataSet& ds) { //_________________________________________________________ -_DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, _String* namespaceID, _TranslationTable* dT, _ExecutionList* ex) { +_DataSet* ReadDataSetFile (hyFile *f, char execBF, _String* theS, _String* bfName, _String* namespaceID, _TranslationTable* dT, _ExecutionList* ex) { static const _String kNEXUS ("#NEXUS"), kDefSeqNamePrefix ("Species"); @@ -1928,7 +1928,8 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, try { - if (f) flockfile (f); + //if (f) flockfile (f); + if (f) f->lock(); hy_env::EnvVariableSet(hy_env::data_file_tree_string, new _Matrix, false); @@ -1971,13 +1972,13 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, fState.pInSrc = 0; fState.theNamespace = namespaceID; - if (!(f||theS)) { + if (! f && !theS) { throw _String ("ReadDataSetFile received null file AND string references. At least one must be specified"); } // done initializing if (f) { - rewind (f); + f->rewind (); } _StringBuffer CurrentLine; @@ -2029,7 +2030,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, if (fState.fileType==1) { // PHYLIP if ((filePosition<0)&&(fState.autoDetect)) { filePosition = (f? - ftell (f) + f->tell () #ifdef __WINDOZE__ -1 #endif @@ -2086,7 +2087,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, fState.autoDetect = true; if(f) { - fseek (f, filePosition, SEEK_SET); + f->seek (filePosition, SEEK_SET); } else { fState.pInSrc = filePosition; } @@ -2228,7 +2229,7 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, { _TranslationTable *trialTable = new _TranslationTable (hy_default_translation_table); trialTable->baseLength = 2; - if (f) funlockfile (f); + if (f) f->unlock(); _DataSet * res2 = ReadDataSetFile (f, execBF, theS, bfName, namespaceID, trialTable); if (res2->GetNoTypes()) { DeleteObject (result); @@ -2298,11 +2299,11 @@ _DataSet* ReadDataSetFile (FILE*f, char execBF, _String* theS, _String* bfName, } } catch (const _String& err) { DeleteObject (result); - if (f) funlockfile (f); + if (f) f->unlock(); HandleApplicationError(err); result = nil; } - if (f) funlockfile (f); + if (f) f->unlock(); return result; } diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 4f654ded3..b3ade5eba 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -54,6 +54,7 @@ #endif #endif + #include #include #include @@ -121,7 +122,7 @@ namespace hy_global { kErrorStringDatasetRefIndexError ("Dataset index reference out of range"), kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), - kHyPhyVersion = _String ("2.5.30"), + kHyPhyVersion = _String ("2.5.31"), kNoneToken = "None", kNullToken = "null", @@ -988,4 +989,128 @@ namespace hy_global { return true; } + //____________________________________________________________________________________ + hyFile* hyFile::openFile (const char * file_path, const char * mode , bool error, long buffer) { + hyFile* f = new hyFile; +#ifdef __ZLIB__ + if (file_path) { + f->_fileReference = gzopen (file_path, mode); + if (!f->_fileReference && error) { + HandleApplicationError (_String("Could not open file '") & *file_path & "' with mode '" & *mode & "'."); + } + } +#else + f->_fileReference = doFileOpen(file_path, mode, error); +#endif + if (!f->_fileReference ) { + delete f; + f = nil; + } + return f; + + } + + //____________________________________________________________________________________ + void hyFile::close (void) { + if (valid()) { + #ifdef __ZLIB__ + gzclose (_fileReference); + #else + fclose (_fileReference); + #endif + _fileReference = NULL; + } + } + + //____________________________________________________________________________________ + void hyFile::lock (void) { + if (valid()) { + #ifdef __ZLIB__ + //gzclose (_fileReference); + #else + flockfile (_fileReference); + #endif + } + } + + //____________________________________________________________________________________ + void hyFile::unlock (void) { + if (valid()) { + #ifdef __ZLIB__ + //gzclose (_fileReference); + #else + funlockfile (_fileReference); + #endif + } + } + + //____________________________________________________________________________________ + void hyFile::rewind (void) { + if (valid()) { + #ifdef __ZLIB__ + gzrewind (_fileReference); + #else + ::rewind (_fileReference); + #endif + } + } + + //____________________________________________________________________________________ + void hyFile::seek (long pos, int whence) { + if (valid()) { + #ifdef __ZLIB__ + gzseek (_fileReference, pos, whence); + #else + fseek (_fileReference, pos, whence); + #endif + } + } + + //____________________________________________________________________________________ + + size_t hyFile::tell (void) { + if (valid()) { + #ifdef __ZLIB__ + return gztell (_fileReference); + #else + return ftell (_fileReference); + #endif + } + return 0; + } + //____________________________________________________________________________________ + bool hyFile::feof (void) { + if (valid()) { + #ifdef __ZLIB__ + return gzeof (_fileReference); + #else + return feof_unlocked (_fileReference); + #endif + } + return true; + } + //____________________________________________________________________________________ + int hyFile::getc (void) { + if (valid()) { + #ifdef __ZLIB__ + return gzgetc (_fileReference); + #else + return getc_unlocked (_fileReference); + #endif + } + return 0; + } + + //____________________________________________________________________________________ + unsigned long hyFile::read (void* buffer, unsigned long size, unsigned long items) { + if (valid()) { + #ifdef __ZLIB__ + return gzfread (buffer, size, items, _fileReference); + #else + return ::fread (buffer, size, items, _fileReference); + #endif + } + return 0; + } + } // namespace close diff --git a/src/core/include/dataset.h b/src/core/include/dataset.h index 6889f849e..3cb82ce07 100644 --- a/src/core/include/dataset.h +++ b/src/core/include/dataset.h @@ -162,7 +162,7 @@ class _DataSet : public _List // a complete data set _SimpleList const &DuplicateMap(void) const { return theMap; } friend class _DataSetFilter; - friend _DataSet *ReadDataSetFile(FILE *, char, _String *, _String *, + friend _DataSet *ReadDataSetFile(hyFile *, char, _String *, _String *, _String *, _TranslationTable *, _ExecutionList *); friend long ProcessLine(_String &s, FileState *fs, _DataSet &ds); @@ -200,17 +200,17 @@ class _DataSet : public _List // a complete data set bool useHorizontalRep; }; -void ReadNextLine(FILE *fp, _StringBuffer *s, FileState *fs, bool append = false, +void ReadNextLine(hyFile *fp, _StringBuffer *s, FileState *fs, bool append = false, bool upCase = true); -_DataSet *ReadDataSetFile(FILE *, char = 0, _String * = nil, _String * = nil, +_DataSet *ReadDataSetFile(hyFile *, char = 0, _String * = nil, _String * = nil, _String * = nil, _TranslationTable * = &hy_default_translation_table, _ExecutionList *target = nil); bool StoreADataSet(_DataSet *, _String *); -void ReadNexusFile (FileState& fState, FILE*f, _DataSet& result); +void ReadNexusFile (FileState& fState, hyFile*f, _DataSet& result); extern _StringBuffer nexusBFBody; diff --git a/src/core/include/global_things.h b/src/core/include/global_things.h index 6e8371ea7..94b69ec01 100644 --- a/src/core/include/global_things.h +++ b/src/core/include/global_things.h @@ -53,7 +53,11 @@ #include #ifdef __HYPHYMPI__ -#include + #include +#endif + +#ifdef __ZLIB__ + #include #endif class _Variable; // forward decl @@ -121,6 +125,30 @@ namespace hy_global { */ char get_platform_directory_char (void); + + /* pass-through structure for reading / writing from a file that may or may not be compressed */ + + class hyFile { + public: + hyFile (void) {_fileReference = NULL;} + static hyFile* openFile (const char * file_path, const char * mode , bool error = false, long buffer = 1024*128); + inline bool valid (void) const {return _fileReference != NULL;} + void lock (void); + void unlock (void); + void rewind (void); + void seek (long, int); + void close (); + bool feof (void); + unsigned long read (void* buffer, unsigned long size, unsigned long items); + size_t tell (); + int getc (); +#ifdef __ZLIB__ + gzFile _fileReference; +#else + FILE* _fileReference; +#endif + }; + /** Open the file located at file_path using mode 'mode' diff --git a/src/core/include/hy_string_buffer.h b/src/core/include/hy_string_buffer.h index 86e842071..5b85868c9 100644 --- a/src/core/include/hy_string_buffer.h +++ b/src/core/include/hy_string_buffer.h @@ -42,7 +42,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. //#pragma once #include "hy_strings.h" - +#include "global_things.h" +using namespace hy_global; #define HY_STRING_BUFFER_ALLOCATION_CHUNK 16UL #define kAppendAnAssignmentToBufferPlain 0x00 @@ -178,6 +179,15 @@ class _StringBuffer : public _String { - SLKP 20170920 initial implementation */ _StringBuffer(_String* buffer); + + /** + * A constructor that read a possibly compressed file + * @param file the source file + + * Revision history + - SLKP 20210411 initial implementation + */ + _StringBuffer(hyFile* file); /** * Default constructor initializer diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 8cf77943b..71b366460 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1572,7 +1572,7 @@ _Matrix* _LikelihoodFunction::ConstructCategoryMatrix (const _SimpleList& whi } // all sites else { - vDim += BlockLength(i); + vDim += BlockLength(whichParts.get (i)); } // only unique patterns for now diff --git a/src/core/nexus.cpp b/src/core/nexus.cpp index 7d3e70264..c83186814 100644 --- a/src/core/nexus.cpp +++ b/src/core/nexus.cpp @@ -69,15 +69,15 @@ void PadLine (FileState& fState, _DataSet& result); void ISelector (FileState& fState, _StringBuffer& CurrentLine, _DataSet& result); bool SkipLine (_StringBuffer& theLine, FileState* fS); void TrimPhylipLine (_StringBuffer& CurrentLine, _DataSet& ds); -bool ProcessNexusData (FileState&, long, FILE*, _StringBuffer&, _DataSet&); -void ProcessNexusHYPHY (FileState&, long, FILE*, _StringBuffer&, _DataSet&); -void ProcessNexusAssumptions (FileState&, long, FILE*, _StringBuffer&, _DataSet&); -void ProcessNexusTaxa (FileState&,long, FILE*, _StringBuffer&, _DataSet&); -void ProcessNexusTrees (FileState&, long, FILE*, _StringBuffer&, _DataSet&); -bool FindNextNexusToken (FileState& fState, FILE* f, _StringBuffer& CurrentLine, long pos); -bool SkipUntilNexusBlockEnd (FileState& fState, FILE* f, _StringBuffer& CurrentLine, long pos); -bool ReadNextNexusStatement (FileState&, FILE* , _StringBuffer&, long, _StringBuffer&, bool, bool = true, bool = true, bool = false, bool = false, bool = false); -long ReadNextNexusEquate (FileState&, FILE* , _StringBuffer&, long, _String&, bool = false, bool = true); +bool ProcessNexusData (FileState&, long, hyFile*, _StringBuffer&, _DataSet&); +void ProcessNexusHYPHY (FileState&, long, hyFile*, _StringBuffer&, _DataSet&); +void ProcessNexusAssumptions (FileState&, long, hyFile*, _StringBuffer&, _DataSet&); +void ProcessNexusTaxa (FileState&,long, hyFile*, _StringBuffer&, _DataSet&); +void ProcessNexusTrees (FileState&, long, hyFile*, _StringBuffer&, _DataSet&); +bool FindNextNexusToken (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos); +bool SkipUntilNexusBlockEnd (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos); +bool ReadNextNexusStatement (FileState&, hyFile* , _StringBuffer&, long, _StringBuffer&, bool, bool = true, bool = true, bool = false, bool = false, bool = false); +long ReadNextNexusEquate (FileState&, hyFile* , _StringBuffer&, long, _String&, bool = false, bool = true); void NexusParseEqualStatement (_StringBuffer&); static auto error_conext = [] (_String const& buffer, long position) -> const _String {return (buffer.Cut (0,position) & " <=? " & buffer.Cut (position+1,kStringEnd)).Enquote();}; @@ -85,7 +85,7 @@ static auto error_conext = [] (_String const& buffer, long position) -> const _ //_________________________________________________________ -bool FindNextNexusToken (FileState& fState, FILE* f, _StringBuffer& CurrentLine, long pos) { +bool FindNextNexusToken (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos) { pos = CurrentLine.FirstNonSpaceIndex (pos,-1,kStringDirectionForward); if (pos==kNotFound) { @@ -102,7 +102,7 @@ bool FindNextNexusToken (FileState& fState, FILE* f, _StringBuffer& CurrentLi //_________________________________________________________ -bool SkipUntilNexusBlockEnd (FileState& fState, FILE* file, _StringBuffer& CurrentLine, long pos) { +bool SkipUntilNexusBlockEnd (FileState& fState, hyFile* file, _StringBuffer& CurrentLine, long pos) { static const _String endMark ("END"); pos = CurrentLine.Find (endMark,pos+1,kStringEnd); while (pos == kNotFound) { @@ -143,7 +143,7 @@ void NexusParseEqualStatement (_StringBuffer& source) } //_________________________________________________________ -bool ReadNextNexusStatement (FileState& fState, FILE* f, _StringBuffer& CurrentLine, long pos, _StringBuffer & blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes) { +bool ReadNextNexusStatement (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos, _StringBuffer & blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes) { bool done = false, insideLiteral = false, startedReading = false; @@ -229,7 +229,7 @@ bool ReadNextNexusStatement (FileState& fState, FILE* f, _StringBuffer& CurrentL //_________________________________________________________ -long ReadNextNexusEquate (FileState& fState, FILE* f, _StringBuffer& CurrentLine, long pos2, _String& blank, bool resetP, bool demandSemicolon) { +long ReadNextNexusEquate (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos2, _String& blank, bool resetP, bool demandSemicolon) { long pos = blank.Find ('=',pos2,-1), res; if (pos>=0) { if (poslength()<=fState.pInSrc))) { + if ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) { break; } offset = 0; @@ -815,7 +815,7 @@ void ProcessNexusTrees (FileState& fState, long pos, FILE*f, _StringBuffer& C //_________________________________________________________ -void ProcessNexusHYPHY (FileState& fState, long pos, FILE*file, _StringBuffer& CurrentLine, _DataSet&) { +void ProcessNexusHYPHY (FileState& fState, long pos, hyFile*file, _StringBuffer& CurrentLine, _DataSet&) { static _String const endMark ("END;"); _StringBuffer bfBody (128UL); @@ -863,7 +863,7 @@ void ProcessNexusHYPHY (FileState& fState, long pos, FILE*file, _StringBuffer //_________________________________________________________ -bool ProcessNexusData (FileState& fState, long pos, FILE*f, _StringBuffer& CurrentLine, _DataSet& result) { +bool ProcessNexusData (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) { static const _String key1 ("DIMENSIONS"), key11 ("NTAX"), key12 ("NCHAR"), key2 ("FORMAT"),key21 ("DATATYPE"), key22 ("MISSING"), key23 ("GAP"), key24 ("SYMBOLS"), key25 ("EQUATE"), key26 ("MATCHCHAR"), key27 ("NOLABELS"), key28 ("INTERLEAVE"), key3 ("MATRIX"), keyEnd ("END"); @@ -1217,7 +1217,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _StringBuffer& Cu break; // finished reading } - if ((f&&feof(f))||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) { + if ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) { break; } } @@ -1258,7 +1258,7 @@ bool ProcessNexusData (FileState& fState, long pos, FILE*f, _StringBuffer& Cu //_________________________________________________________ -void ReadNexusFile (FileState& fState, FILE*file, _DataSet& result) { +void ReadNexusFile (FileState& fState, hyFile*file, _DataSet& result) { bool dataRead = false, lookForEnd = false; long f,g, file_line = fState.currentFileLine; diff --git a/src/core/string_buffer.cpp b/src/core/string_buffer.cpp index 26588c592..821b5488d 100644 --- a/src/core/string_buffer.cpp +++ b/src/core/string_buffer.cpp @@ -210,6 +210,25 @@ _StringBuffer::~_StringBuffer (void ){ s_data = allocated_ptr; sa_length = 0L; } + +//============================================================= +_StringBuffer::_StringBuffer(hyFile* file): _String () { + const unsigned long buffer_size = 65535; + this->Initialize(); + char buffer [buffer_size+1L]; + unsigned long items_read; + _String buffer_str (buffer_size, buffer); + do { + items_read = file->read (buffer, 1, buffer_size); + if (items_read < buffer_size) break; + (*this) << buffer_str; + } while (items_read == buffer_size); + if (items_read) { + buffer[items_read] = 0; + (*this) << buffer; + } +} + /* ============================================================== Cloners and Copiers diff --git a/tests/hbltests/UnitTests/HBLCommands/DataSet.bf b/tests/hbltests/UnitTests/HBLCommands/DataSet.bf index 233fa7352..0974b8734 100644 --- a/tests/hbltests/UnitTests/HBLCommands/DataSet.bf +++ b/tests/hbltests/UnitTests/HBLCommands/DataSet.bf @@ -20,6 +20,7 @@ function runTest () { DataSet 2fas = ReadDataFile (PATH_TO_CURRENT_BF + '/../../data/2.fas'); DataSet cd2Phylip = ReadDataFile(PATH_TO_CURRENT_BF + '/../../data/CD2.phylip'); + //--------------------------------------------------------------------------------------------------------- // ERROR HANDLING @@ -34,7 +35,9 @@ function runTest () { assert (runCommandWithSoftErrors ('DataSet list_ds = ReadFromString(T1);', "The format of the sequence file has not been recognized and may be invalid"), "Failed error checking for trying to create a data set with ReadFromString(topology)"); assert (runCommandWithSoftErrors ('DataSet list_ds = ReadFromString(TT1);', "The format of the sequence file has not been recognized and may be invalid"), "Failed error checking for trying to create a data set with ReadFromString(tree)"); + assert (runCommandWithSoftErrors ("DataSet thisIsntAValidFilePath = ReadDataFile('./ThisFileDoesNotExist.txt');", "Could not find source dataset file"), "Failed error checking for trying to create a data set with ReadDataFile with an invalid path"); + assert (runCommandWithSoftErrors ("DataSet thisFileIsntInAValidFormat = ReadDataFile(PATH_TO_CURRENT_BF + '/assert.bf');", "The format of the sequence file has not been recognized and may be invalid"), "Failed error checking for trying to create a data set with ReadDataFile with a file in an invalid format"); assert (runCommandWithSoftErrors ("DataSet newickFile = ReadDataFile (PATH_TO_CURRENT_BF + '/../../data/CD2.newick');", "The format of the sequence file has not been recognized and may be invalid"), "Failed error checking for trying to create a data set with ReadDataFile with a file file containing a newick string but no sequences");