Skip to content

Commit

Permalink
Fix for issue IBM#1003
Browse files Browse the repository at this point in the history
  • Loading branch information
dlherms-ibm committed Apr 30, 2021
1 parent 12d9fdf commit f53ec4d
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 17 deletions.
41 changes: 28 additions & 13 deletions bb/src/LVUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ namespace bs = boost::system;

char MINIMUM_LOGICAL_VOLUME_SIZE[4] = "16M";

typedef std::pair<uint32_t, filehandle*> FileHandleEntry;

extern thread_local uid_t threadLocaluid;
extern thread_local gid_t threadLocalgid;

Expand Down Expand Up @@ -1985,7 +1987,7 @@ int processContrib(const uint64_t pNumContrib, uint32_t pContrib[])
}


int setupTransfer(BBTransferDef* transfer, Uuid &lvuuid, const uint64_t pJobId, const uint64_t pHandle, const uint32_t pContribId, vector<struct stat*>& pStats, const uint32_t pPerformOperation)
int setupTransfer(BBTransferDef* transfer, Uuid &lvuuid, const uint64_t pJobId, const uint64_t pHandle, const uint32_t pContribId, vector<struct stat*>& pStats, vector<FileHandleEntry>* pFileHandlesAdded, const uint32_t pPerformOperation)
{
ENTRY(__FILE__,__FUNCTION__);

Expand Down Expand Up @@ -2197,18 +2199,28 @@ int setupTransfer(BBTransferDef* transfer, Uuid &lvuuid, const uint64_t pJobId,
// transfer processing.
srcfile_ptr->setRestartInProgress();
}
addFilehandle(srcfile_ptr, pJobId, pHandle, pContribId, e.sourceindex);

if (l_SourceFileIsLocal && (!l_SimulateFileStageIn))
if (!addFilehandle(srcfile_ptr, pJobId, pHandle, pContribId, e.sourceindex))
{
// Local cp or stageout processing...
// If not already done, get stats for the local source file on the SSD...
if (pStats[e.sourceindex] == 0)
pFileHandlesAdded->push_back(std::make_pair(e.sourceindex, srcfile_ptr));
if (l_SourceFileIsLocal && (!l_SimulateFileStageIn))
{
pStats[e.sourceindex] = new(struct stat);
srcfile_ptr->getstats(*pStats[e.sourceindex]);
// Local cp or stageout processing...
// If not already done, get stats for the local source file on the SSD...
if (pStats[e.sourceindex] == 0)
{
pStats[e.sourceindex] = new(struct stat);
srcfile_ptr->getstats(*pStats[e.sourceindex]);
}
}
}
else
{
LOG(bb,error) << "Adding of the filehandle to the registry for srcfile " << transfer->files[e.sourceindex] << " failed";
rc = -1;

delete srcfile_ptr;
break;
}
}
}
else
Expand Down Expand Up @@ -2270,6 +2282,7 @@ int setupTransfer(BBTransferDef* transfer, Uuid &lvuuid, const uint64_t pJobId,
if (dstfile_ptr->getfd() >= 0)
{
addFilehandle(dstfile_ptr, pJobId, pHandle, pContribId, e.targetindex);
pFileHandlesAdded->push_back(std::make_pair(e.targetindex, dstfile_ptr));
}
else
{
Expand Down Expand Up @@ -2549,6 +2562,7 @@ int setupTransfer(BBTransferDef* transfer, Uuid &lvuuid, const uint64_t pJobId,
if (dstfile_ptr->getfd() >= 0)
{
addFilehandle(dstfile_ptr, pJobId, pHandle, pContribId, e.targetindex);
pFileHandlesAdded->push_back(std::make_pair(e.targetindex, dstfile_ptr));
}
else
{
Expand Down Expand Up @@ -2759,6 +2773,7 @@ int startTransfer(BBTransferDef* transfer, const uint64_t pJobId, const uint64_t
uint32_t l_MarkFailed = 0;

vector<struct stat*> l_Stats;
vector<FileHandleEntry> l_FileHandlesAdded;
char l_Empty = '\0';
bool releaseFileHandles = false;

Expand All @@ -2777,7 +2792,7 @@ int startTransfer(BBTransferDef* transfer, const uint64_t pJobId, const uint64_t
if ((!l_PerformOperation) || transfer->files.size())
{
Uuid lv_uuid;
rc = setupTransfer(transfer, lv_uuid, pJobId, pHandle, pContribId, l_Stats, l_PerformOperation);
rc = setupTransfer(transfer, lv_uuid, pJobId, pHandle, pContribId, l_Stats, &l_FileHandlesAdded, l_PerformOperation);
if (rc)
{
// NOTE: errstate should already be filled in, but if not, set error text here...
Expand Down Expand Up @@ -2926,13 +2941,13 @@ int startTransfer(BBTransferDef* transfer, const uint64_t pJobId, const uint64_t

FileHandleRegistryLock();

for (unsigned int index=0; index<transfer->files.size(); index++)
for (auto& e: l_FileHandlesAdded)
{
LOOP_COUNT(__FILE__,__FUNCTION__,"released_handles");
try
{
filehandle* fh;
if (!removeFilehandle(fh, pJobId, pHandle, pContribId, index))
filehandle* fh = e.second;
if (!removeFilehandle(fh, pJobId, pHandle, pContribId, e.first))
{
string l_FileName = fh->getfn();
LOG(bb,info) << "Releasing filehandle '" << l_FileName << "'";
Expand Down
42 changes: 38 additions & 4 deletions bb/src/fh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ map<filehandleLocator, filehandle*> fhregistry;

int addFilehandle(filehandle* fh, uint64_t jobid, uint64_t handle, uint32_t contrib, uint32_t index)
{
int rc = 0;

filehandleLocator fl;
fl.jobid = jobid;
fl.handle = handle;
Expand All @@ -178,13 +180,47 @@ int addFilehandle(filehandle* fh, uint64_t jobid, uint64_t handle, uint32_t cont
// FileHandleRegistryLock();

LOG(bb,debug) << "addFilehandle: fh=" << fh << " jobid=" << jobid << " handle=" << handle << " contribid=" << contrib << " index=" << index;

filehandle* fh2 = 0;
int rc2 = findFilehandle(fh2, jobid, handle, contrib, index);
#if BBSERVER
if (rc2 == 0)
{
LOG(bb,info) << "addFilehandle: Input fh=" << fh << " jobid=" << jobid << " handle=" << handle << " contribid=" << contrib << " index=" << index \
<< ". However, file handle " << fh << " to be added already exists as " << fh2 \
<< ". The existing file handle is being released and the associated fd closed. "
<< ". This is most likely due to an interrupted start transfer operation and an existing stored transfer definition being reused.";
try
{
delete fh2;
fh2 = 0;
}
catch(exception& e)
{
LOG(bb,error) << "addFilehandle: fh=" << fh2 << " jobid=" << jobid << " handle=" << handle << " contribid=" << contrib << " index=" << index \
<< ". Exception occurred when attempting to release the file handle, " << e.what() \
<< ". Processing will continue...";
}
rc2 = -1;
}
#endif
if (rc2 != 0)
{
// File handle entry does not already exist
fhregistry[fl] = fh;
}
else
{
// File handle entry already exists
rc = -1;
LOG(bb,error) << "addFilehandle: Input fh=" << fh << " jobid=" << jobid << " handle=" << handle << " contribid=" << contrib << " index=" << index \
<< ". However, file handle " << fh << " to be added already exists as " << fh2 \
<< ". This is mostly likely due to an incorrect handle and/or contribid and/or index value being specified for this transfer within the job.";
}

// FileHandleRegistryUnlock();

return 0;
return rc;
}

int numActiveFileTransfers(uint64_t jobid, uint64_t handle, uint64_t& count)
Expand Down Expand Up @@ -482,15 +518,13 @@ int filehandle::close()
::close(fd);
#if (BBSERVER || BBPROXY)
BB_GetTimeDifference(l_Time);
#endif
fd = -1;
#if (BBSERVER || BBPROXY)
LOG(bb,info) << "Closed file " << filename << ", fd=" << fd << ", time=" << (double)l_Time/(double)g_TimeBaseScale << " seconds";
FL_Write(FLProxy, CloseFile, "Close for filehandle %ld, ticks %ld",(uint64_t)fd,l_Time,0,0);
#else
LOG(bb,info) << "Closing file " << filename << ", fd=" << fd;
FL_Write(FLProxy, CloseFile_, "Close for filehandle %ld",(uint64_t)fd,0,0,0);
#endif
fd = -1;
}

return 0;
Expand Down

0 comments on commit f53ec4d

Please sign in to comment.