Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-33064 Check files size consistency before reading #19329

Merged
merged 1 commit into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions dali/base/dafdesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,10 @@ class CFileDescriptorBase: public CInterface
virtual IFileDescriptor &querySelf() = 0;
virtual unsigned copyClusterNum(unsigned partidx, unsigned copy,unsigned *replicate=NULL) = 0;

IPropertyTree & queryAttributes() const
{
return *attr;
}
};

class CPartDescriptor : implements IPartDescriptor
Expand Down Expand Up @@ -819,6 +823,74 @@ protected: friend class CFileDescriptor;
return ismulti;
}

IPropertyTree & queryAttributes() const
{
return *props;
}

offset_t getFileSize(bool allowphysical,bool forcephysical)
{
offset_t ret = (offset_t)((forcephysical&&allowphysical)?-1:queryAttributes().getPropInt64("@size", -1));
if (allowphysical&&(ret==(offset_t)-1))
ret = getSize(true);
return ret;
}

offset_t getDiskSize(bool allowphysical,bool forcephysical)
{
if (!::isCompressed(parent.queryAttributes()))
return getFileSize(allowphysical, forcephysical);

if (forcephysical && allowphysical)
return getSize(false); // i.e. only if force, because all compressed should have @compressedSize attribute

// NB: compressSize is disk size
return queryAttributes().getPropInt64("@compressedSize", -1);
}

offset_t getSize(bool checkCompressed)
{
offset_t ret = (offset_t)-1;
StringBuffer firstname;
bool compressed = ::isCompressed(parent.queryAttributes());
unsigned nc=parent.numCopies(partIndex);
for (unsigned copy=0;copy<nc;copy++)
{
RemoteFilename rfn;
try
{
Owned<IFile> partfile = createIFile(getFilename(copy, rfn));
if (checkCompressed && compressed)
{
Owned<IFileIO> partFileIO = partfile->open(IFOread);
if (partFileIO)
{
Owned<ICompressedFileIO> compressedIO = createCompressedFileReader(partFileIO);
if (compressedIO)
ret = compressedIO->size();
else
throw makeStringExceptionV(DFSERR_PhysicalCompressedPartInvalid, "Compressed part is not in the valid format: %s", partfile->queryFilename());
}
}
else
ret = partfile->size();
if (ret!=(offset_t)-1)
return ret;
}
catch (IException *e)
{
StringBuffer s("CDistributedFilePart::getSize ");
rfn.getRemotePath(s);
EXCLOG(e, s.str());
e->Release();
}
if (copy==0)
rfn.getRemotePath(firstname);
}
throw makeStringExceptionV(DFSERR_CannotFindPartFileSize, "Cannot find physical file size for %s", firstname.str());;
}


RemoteMultiFilename &getMultiFilename(unsigned copy, RemoteMultiFilename &rmfn)
{
if (ismulti) {
Expand Down
3 changes: 3 additions & 0 deletions dali/base/dafdesc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ interface IPartDescriptor: extends IInterface
virtual const char *queryOverrideName() = 0; // for non-standard files
virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL)=0; // map copy number to cluster (and optionally replicate number)
virtual IReplicatedFile *getReplicatedFile()=0;

virtual offset_t getFileSize(bool allowphysical,bool forcephysical)=0; // gets the part filesize (NB this will be the *expanded* size)
virtual offset_t getDiskSize(bool allowphysical,bool forcephysical)=0; // gets the part size on disk (NB this will be the compressed size)
};
typedef IArrayOf<IPartDescriptor> CPartDescriptorArray;
typedef IIteratorOf<IPartDescriptor> IPartDescriptorIterator;
Expand Down
11 changes: 11 additions & 0 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8735,6 +8735,17 @@ bool CHThorDiskReadBaseActivity::openNext()
{
inputfile.setown(createIFile(rfilename));

if (curPart)
{
offset_t expectedSize = curPart->getDiskSize(false, false);
if (expectedSize != unknownFileSize)
{
offset_t actualSize = inputfile->size();
if(actualSize != expectedSize)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trivial/formatting: space after if

throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", inputfile->queryFilename(), expectedSize, actualSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trivial: makeStringExceptionV

}
}

if (compressed)
{
Owned<IExpander> eexp;
Expand Down
2 changes: 2 additions & 0 deletions roxie/ccd/ccdfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3909,6 +3909,8 @@ class CcdFileTest : public CppUnit::TestFixture
virtual const char *queryOverrideName() { UNIMPLEMENTED; }
virtual unsigned copyClusterNum(unsigned copy,unsigned *replicate=NULL) { UNIMPLEMENTED; }
virtual IReplicatedFile *getReplicatedFile() { UNIMPLEMENTED; }
virtual offset_t getFileSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; }
virtual offset_t getDiskSize(bool allowphysical,bool forcephysical) { UNIMPLEMENTED; }
};

void testCopy()
Expand Down
8 changes: 8 additions & 0 deletions thorlcr/activities/diskread/thdiskreadslave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,14 @@ void CDiskRecordPartHandler::open()

rwFlags |= DEFAULT_RWFLAGS;

offset_t expectedSize = partDesc->getDiskSize(false, false);
if (expectedSize != unknownFileSize)
{
offset_t actualSize = iFile->size();
if(actualSize != expectedSize)
throw MakeStringException(0, "File size mismatch: file %s was supposed to be %" I64F "d bytes but appears to be %" I64F "d bytes", iFile->queryFilename(), expectedSize, actualSize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trivial/formatting: space between if/(actualSize
MakeStringException -> makeStringExceptionV

}

if (compressed)
{
rwFlags |= rw_compress;
Expand Down
Loading