diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index ed5d250e581..859e2324226 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -245,6 +245,27 @@ extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_ty return writeCost; } + +extern da_decl cost_type updateCostAndNumReads(IDistributedFile *file, stat_type numDiskReads, cost_type curReadCost) +{ + const IPropertyTree & fileAttr = file->queryAttributes(); + + if (!curReadCost) + curReadCost = calcFileAccessCost(file, 0, numDiskReads); + cost_type legacyReadCost = 0; + if (!fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost))) + { + if (!isFileKey(fileAttr)) + { + stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); + legacyReadCost = calcFileAccessCost(file, 0, prevDiskReads); + } + } + file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); + file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), numDiskReads); + return curReadCost; +} + // JCSMORE - I suspect this function should be removed/deprecated. It does not deal with dirPerPart or striping. // makePhysicalPartName supports both, but does not deal with groups/endpoints) RemoteFilename &constructPartFilename(IGroup *grp,unsigned partno,unsigned partmax,const char *name,const char *partmask,const char *partdir,unsigned copy,ClusterPartDiskMapSpec &mspec,RemoteFilename &rfn) @@ -4985,49 +5006,29 @@ protected: friend class CDistributedFilePart; } virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) override { + atRestCost = 0; + accessCost = 0; CDateTime dt; getModificationTime(dt); double fileAgeDays = difftime(time(nullptr), dt.getSimple())/(24*60*60); double sizeGB = getDiskSize(true, false) / ((double)1024 * 1024 * 1024); const IPropertyTree *attrs = root->queryPropTree("Attr"); - bool doLegacyAccessCostCalc = false; - __int64 numDiskWrites = 0, numDiskReads = 0; - if (attrs) - { - if (hasReadWriteCostFields(*attrs)) - { - // Newer files have readCost and writeCost attributes - accessCost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)); - } - else - { - // Costs need to be calculated from numDiskReads and numDiskWrites for legacy files - numDiskWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); - doLegacyAccessCostCalc = true; - // NB: Costs of index reading can not be reliably estimated based on 'numDiskReads' - if (!isFileKey(*attrs)) - numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); - } - } + if (isEmptyString(cluster)) { StringArray clusterNames; unsigned countClusters = getClusterNames(clusterNames); for (unsigned i = 0; i < countClusters; i++) atRestCost += calcFileAtRestCost(clusterNames[i], sizeGB, fileAgeDays); - if (countClusters && doLegacyAccessCostCalc) - { - // NB: numDiskReads/numDiskWrites are stored at the file level, not per cluster. - // So cannot calculate accessCost per cluster, assume cost is based on 1st. - accessCost = calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads); - } + if (countClusters) + cluster = clusterNames[0]; } else { - atRestCost += calcFileAtRestCost(cluster, sizeGB, fileAgeDays); - if (doLegacyAccessCostCalc) - accessCost = calcFileAccessCost(cluster, numDiskWrites, numDiskReads); + atRestCost = calcFileAtRestCost(cluster, sizeGB, fileAgeDays); } + if (attrs) + accessCost = getReadCost(*attrs, cluster) + getWriteCost(*attrs, cluster); } }; @@ -13456,25 +13457,10 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu cost_type atRestCost = calcFileAtRestCost(nodeGroup, sizeGB, fileAgeDays); file->setPropInt64(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost); - // Dyamically calc and set the access cost field and for legacy files set read/write cost fields - cost_type accessCost = 0; - if (hasReadWriteCostFields(*file)) - { - accessCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)); - } - else // Calc access cost from numDiskRead & numDiskWrites for Legacy files - { - cost_type legacyReadCost = getLegacyReadCost(*file, nodeGroup); - file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost); - - cost_type legacyWriteCost = getLegacyWriteCost(*file, nodeGroup); - file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), legacyWriteCost); - - accessCost = legacyReadCost + legacyWriteCost; - } + cost_type readCost = getReadCost(*file, nodeGroup, true); + cost_type writeCost = getWriteCost(*file, nodeGroup, true); + cost_type accessCost = readCost + writeCost; file->setPropInt64(getDFUQResultFieldName(DFUQRFaccessCost), accessCost); - - // Dymically calc and set the total cost field file->setPropInt64(getDFUQResultFieldName(DFUQRFcost), atRestCost + accessCost); } diff --git a/dali/base/dadfs.hpp b/dali/base/dadfs.hpp index 6cde9f34875..cae1ae13625 100644 --- a/dali/base/dadfs.hpp +++ b/dali/base/dadfs.hpp @@ -890,39 +890,93 @@ extern da_decl cost_type calcFileAtRestCost(const char * cluster, double sizeGB, extern da_decl cost_type calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads); extern da_decl cost_type calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads); extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_type numDiskWrites); +extern da_decl cost_type updateCostAndNumReads(IDistributedFile *file, stat_type numDiskReads, cost_type curReadCost=0); // Update readCost and numDiskReads - return calculated read cost constexpr bool defaultPrivilegedUser = true; constexpr bool defaultNonPrivilegedUser = false; extern da_decl void configurePreferredPlanes(); -inline bool hasReadWriteCostFields(const IPropertyTree & fileAttr) + +// Get read cost from readCost field or calculate legacy read cost +// - migrateLegacyCost: if true, update readCost field with legacy read cost +template +inline cost_type getReadCost(IPropertyTree & fileAttr, Source source, bool migrateLegacyCost = false) { - return fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) || fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost)); + if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost))) + return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0); + else + { + // Calculate legacy read cost from numDiskReads + // (However, it is not possible to accurately calculate read cost for key + // files, as the reads may have been from page cache and not from disk.) + if (!isFileKey(fileAttr) && source) + { + stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); + cost_type readCost = calcFileAccessCost(source, 0, numDiskReads); + if (migrateLegacyCost) + fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), readCost); + return readCost; + } + } + return 0; } +// Get read cost from readCost field or calculate legacy read cost template -inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source) +inline cost_type getReadCost(const IPropertyTree & fileAttr, Source source) { - // Legacy files do not have @readCost attribute, so calculate from numDiskRead - // NB: Costs of index reading can not be reliably estimated based on 'numDiskReads' - if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads)) - && !isFileKey(fileAttr)) + if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost))) + return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0); + else { - stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); - return calcFileAccessCost(source, 0, prevDiskReads); + // Calculate legacy read cost from numDiskReads + // (However, it is not possible to accurately calculate read cost for key + // files, as the reads may have been from page cache and not from disk.) + if (!isFileKey(fileAttr) && source) + { + stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); + return calcFileAccessCost(source, 0, numDiskReads); + } } - else - return 0; + return 0; } + +// Get write cost from writeCost field or calculate legacy write cost +// - migrateLegacyCost: if true, update writeCost field with legacy write cost template -inline cost_type getLegacyWriteCost(const IPropertyTree & fileAttr, Source source) +inline cost_type getWriteCost(IPropertyTree & fileAttr, Source source, bool migrateLegacyCost = false) { - // Legacy files do not have @writeCost attribute, so calculate from numDiskWrites - if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) + if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost))) + return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0); + else { - stat_type prevDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); - return calcFileAccessCost(source, prevDiskWrites, 0); + // Calculate legacy write cost from numDiskWrites + if (source) + { + stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); + cost_type writeCost = calcFileAccessCost(source, numDiskWrites, 0); + if (migrateLegacyCost) + fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost); + return writeCost; + } } + return 0; +} + +// Get write cost from writeCost field or calculate legacy write cost +template +inline cost_type getWriteCost(const IPropertyTree & fileAttr, Source source) +{ + if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost))) + return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0); else - return 0; + { + // Calculate legacy write cost from numDiskWrites + if (source) + { + stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); + return calcFileAccessCost(source, numDiskWrites, 0); + } + } + return 0; } #endif diff --git a/dali/ft/filecopy.cpp b/dali/ft/filecopy.cpp index 3e13295dc45..47e953d2113 100644 --- a/dali/ft/filecopy.cpp +++ b/dali/ft/filecopy.cpp @@ -3851,10 +3851,10 @@ cost_type FileSprayer::updateSourceProperties() // Update file readCost and numReads in file properties and do the same for subfiles if (distributedSource) { - cost_type totalReadCost = 0; IDistributedSuperFile * superSrc = distributedSource->querySuperFile(); if (superSrc && superSrc->numSubFiles() > 0) { + cost_type totalReadCost = 0; Owned fDesc = superSrc->getFileDescriptor(); ISuperFileDescriptor *superFDesc = fDesc->querySuperFileDescriptor(); ForEachItemIn(idx, partition) @@ -3876,11 +3876,7 @@ cost_type FileSprayer::updateSourceProperties() // so query the first (and only) subfile subfile = &superSrc->querySubFile(0); } - cost_type curReadCost = calcFileAccessCost(subfile, 0, curProgress.numReads); - subfile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curProgress.numReads); - cost_type legacyReadCost = getLegacyReadCost(subfile->queryAttributes(), subfile); - subfile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); - totalReadCost += curReadCost; + totalReadCost += updateCostAndNumReads(subfile, curProgress.numReads); } else { @@ -3888,15 +3884,12 @@ cost_type FileSprayer::updateSourceProperties() totalReadCost += calcFileAccessCost(distributedSource, 0, curProgress.numReads); } } + return updateCostAndNumReads(distributedSource, totalNumReads, totalReadCost); } else { - totalReadCost = calcFileAccessCost(distributedSource, 0, totalNumReads); + return updateCostAndNumReads(distributedSource, totalNumReads); } - distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), totalNumReads); - cost_type legacyReadCost = getLegacyReadCost(distributedSource->queryAttributes(), distributedSource); - distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + totalReadCost); - return totalReadCost; // return the total cost of this file operation (exclude previous and legacy read costs) } return 0; } diff --git a/ecl/hthor/hthor.cpp b/ecl/hthor/hthor.cpp index 428c18a29b9..f0ede6c0308 100644 --- a/ecl/hthor/hthor.cpp +++ b/ecl/hthor/hthor.cpp @@ -8547,12 +8547,7 @@ void CHThorDiskReadBaseActivity::closepart() dFile = &(super->querySubFile(subfile, true)); } } - IPropertyTree & fileAttr = dFile->queryAttributes(); - cost_type legacyReadCost = getLegacyReadCost(fileAttr, dFile); - cost_type curReadCost = calcFileAccessCost(dFile, 0, curDiskReads); - - dFile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); - dFile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads); + updateCostAndNumReads(dFile, curDiskReads); } numDiskReads += curDiskReads; } diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index a13db163620..a43aebba417 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -657,7 +657,6 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) { StringBuffer clusterName; file->getClusterName(0, clusterName); - IPropertyTree & fileAttr = file->queryAttributes(); cost_type curReadCost = 0; stat_type curDiskReads = stats.getStatisticSum(StNumDiskReads); if(useJhtreeCacheStats) @@ -671,18 +670,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) curReadCost = calcFileAccessCost(clusterName, 0, curDiskReads); if (updateFileProps) - { - cost_type legacyReadCost = 0; - // Legacy files will not have the readCost stored as an attribute - if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) - { - // Legacy file: calculate readCost using prev disk reads and new disk reads - stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); - legacyReadCost = calcFileAccessCost(clusterName, 0, prevDiskReads); - } - file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); - file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads); - } + updateCostAndNumReads(file, curDiskReads); return curReadCost; }; cost_type readCost = 0;