Skip to content

Commit

Permalink
Merge pull request #526 from sstsimulator/devel
Browse files Browse the repository at this point in the history
Automatically Merged using masterautomerge.py
  • Loading branch information
sst-autotester authored Dec 2, 2016
2 parents 4a7bac9 + 71a3ae6 commit b676760
Show file tree
Hide file tree
Showing 62 changed files with 3,473 additions and 1,911 deletions.
2 changes: 2 additions & 0 deletions src/sst/elements/memHierarchy/L1CoherenceController.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ bool L1CoherenceController::isRetryNeeded(MemEvent * event, CacheLine * cacheLin
case GetS:
case GetX:
case GetSEx:
case FlushLine:
case FlushLineInv:
return true;
case PutS:
case PutE:
Expand Down
2 changes: 1 addition & 1 deletion src/sst/elements/memHierarchy/MESICoherenceController.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ CacheAction MESIController::handleInv(MemEvent* event, CacheLine* cacheLine, boo
case IS:
case IM:
case I_B:
return DONE; // Eviction raced with Inv, IS/IM only happen if we don't use AckPuts
return IGNORE; // Eviction raced with Inv, IS/IM only happen if we don't use AckPuts
case S_B:
case S:
if (cacheLine->numSharers() > 0) {
Expand Down
18 changes: 17 additions & 1 deletion src/sst/elements/memHierarchy/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ EXTRA_DIST = \
Sieve/tests/ompsievetest.c \
Sieve/tests/sieve-test.py \
tests/example.py \
tests/exampleM5.xml \
tests/sdl-1.py \
tests/sdl2-1.py \
tests/sdl-2.py \
Expand All @@ -110,6 +109,23 @@ EXTRA_DIST = \
tests/sdl8-4.py \
tests/sdl9-1.py \
tests/sdl9-2.py \
tests/testBackendChaining.py \
tests/testBackendDelayBuffer.py \
tests/testBackendPagedMulti.py \
tests/testBackendReorderRow.py \
tests/testBackendReorderSimple.py \
tests/testBackendSimpleDRAM-1.py \
tests/testBackendSimpleDRAM-2.py \
tests/testBackendVaultSim.py \
tests/testDistributedCaches.py \
tests/testFlushes.py \
tests/testFlushes-2.py \
tests/testHashXor.py \
tests/testIncoherent.py \
tests/testNoninclusive-1.py \
tests/testNoninclusive-2.py \
tests/testPrefetchParams.py \
tests/testThroughputThrottling.py \
tests/DDR3_micron_32M_8B_x4_sg125.ini \
tests/system.ini

Expand Down
2 changes: 1 addition & 1 deletion src/sst/elements/memHierarchy/cacheArray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ unsigned int DualSetAssociativeArray::preReplaceCache(const Addr baseAddr) {
int setBegin = set * cacheAssociativity_;

for (unsigned int id = 0; id < cacheAssociativity_; id++) {
int dirIndex = dataLines_[id+setBegin]->getDirLine()->getIndex();
int dirIndex = dataLines_[id+setBegin]->getDirLine() ? dataLines_[id+setBegin]->getDirLine()->getIndex() : -1;
if (dirIndex == -1) {
cacheSetStates[id] = I;
cacheSetSharers[id] = 0;
Expand Down
9 changes: 9 additions & 0 deletions src/sst/elements/memHierarchy/cacheController.cc
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,18 @@ void Cache::processCacheFlush(MemEvent* event, Addr baseAddr, bool replay) {
return;
}



MemEvent * origRequest = NULL;
if (mshr_->exists(baseAddr)) origRequest = mshr_->lookupFront(baseAddr);

// Generally we should not nack this request without checking for races
// But if no possible races and handling this will fill MSHR, nack it
if (!origRequest && mshr_->isAlmostFull()) {
sendNACK(event);
return;
}

CacheAction action = coherenceMgr->handleReplacement(event, line, origRequest, replay);

/* Action returned is for the origRequest if it exists, otherwise for the flush */
Expand Down
26 changes: 25 additions & 1 deletion src/sst/elements/memHierarchy/cacheEventProcessing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ void Cache::processNoncacheable(MemEvent* event, Command cmd, Addr baseAddr) {
case GetS:
case GetX:
case GetSEx:
case FlushLine:
case FlushLineInv: // Note that noncacheable flushes currently ignore the cache - they just flush any buffers at memory
#ifdef __SST_DEBUG_OUTPUT__
if (cmd == GetSEx) d_->debug(_WARNING_, "WARNING: Noncachable atomics have undefined behavior; atomicity not preserved\n");
#endif
Expand All @@ -338,14 +340,36 @@ void Cache::processNoncacheable(MemEvent* event, Command cmd, Addr baseAddr) {
case GetSResp:
case GetXResp:
origRequest = mshrNoncacheable_->removeFront(baseAddr);
if (origRequest->getID().second != event->getResponseToID().second) {
if (origRequest->getID().first != event->getResponseToID().first || origRequest->getID().second != event->getResponseToID().second) {
d_->fatal(CALL_INFO, -1, "%s, Error: noncacheable response received does not match request at front of mshr. Resp cmd = %s, Resp addr = 0x%" PRIx64 ", Req cmd = %s, Req addr = 0x%" PRIx64 ", Time = %" PRIu64 "\n",
getName().c_str(),CommandString[cmd],baseAddr, CommandString[origRequest->getCmd()], origRequest->getBaseAddr(),getCurrentSimTimeNano());
}
coherenceMgr->sendResponseUp(origRequest, NULLST, &event->getPayload(), true, 0);
delete origRequest;
delete event;
break;
case FlushLineResp: {
// Flushes can be returned out of order since they don't neccessarily require a memory access so we need to actually search the MSHRs
vector<mshrType> * entries = mshrNoncacheable_->getAll(baseAddr);
for (vector<mshrType>::iterator it = entries->begin(); it != entries->end(); it++) {
MemEvent * candidate = boost::get<MemEvent*>(it->elem);
if (candidate->getCmd() == FlushLine || candidate->getCmd() == FlushLineInv) { // All entries are events so no checking for pointer vs event needed
if (candidate->getID().first == event->getResponseToID().first && candidate->getID().second == event->getResponseToID().second) {
origRequest = candidate;
break;
}
}
}
if (origRequest == nullptr) {
d_->fatal(CALL_INFO, -1, "%s, Error: noncacheable response received does not match any request in the mshr. Resp cmd = %s, Resp addr = 0x%" PRIx64 ", Req cmd = %s, Req addr = 0x%" PRIx64 ", Time = %" PRIu64 "\n",
getName().c_str(),CommandString[cmd],baseAddr, CommandString[origRequest->getCmd()], origRequest->getBaseAddr(),getCurrentSimTimeNano());
}
coherenceMgr->sendResponseUp(origRequest, NULLST, &event->getPayload(), true, 0);
mshrNoncacheable_->removeElement(baseAddr, origRequest);
delete origRequest;
delete event;
break;
}
default:
d_->fatal(CALL_INFO, -1, "Command does not exist. Command: %s, Src: %s\n", CommandString[cmd], event->getSrc().c_str());
}
Expand Down
11 changes: 6 additions & 5 deletions src/sst/elements/memHierarchy/cacheFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,9 @@ Cache::Cache(ComponentId_t id, Params &params, CacheConfig config) : Component(i
errorChecking();

d2_ = new Output();
d2_->init("", params.find<int>("debug_level", 1), 0,(Output::output_location_t)params.find<int>("debug", SST::Output::STDOUT));

d2_->init("", params.find<int>("debug_level", 1), 0,(Output::output_location_t)params.find<int>("debug", SST::Output::NONE));

Output out("", 1, 0, Output::STDOUT);

int stats = params.find<int>("statistics", 0);
accessLatency_ = params.find<uint64_t>("access_latency_cycles", 0);
Expand Down Expand Up @@ -248,8 +249,7 @@ Cache::Cache(ComponentId_t id, Params &params, CacheConfig config) : Component(i
this->Component::getName().c_str(), accessLatency_);

if (stats != 0) {
SST::Output outputStd("",1,0,SST::Output::STDOUT);
outputStd.output("%s, **WARNING** The 'statistics' parameter is deprecated: memHierarchy statistics have been moved to the Statistics API. Please see sst-info for available statistics and update your configuration accordingly.\nNO statistics will be printed otherwise!\n", this->Component::getName().c_str());
out.output("%s, **WARNING** The 'statistics' parameter is deprecated: memHierarchy statistics have been moved to the Statistics API. Please see sst-info for available statistics and update your configuration accordingly.\nNO statistics will be printed otherwise!\n", this->Component::getName().c_str());
}
UnitAlgebra packetSize_ua(packetSize);
if (!packetSize_ua.hasUnits("B")) {
Expand Down Expand Up @@ -603,7 +603,8 @@ void Cache::intrapolateMSHRLatency() {
}
mshrLatency_ = y[accessLatency_];

d2_->verbose(CALL_INFO, 1, 0, "%s: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to %" PRIu64 " cycles.\n", getName().c_str(), mshrLatency_);
Output out("", 1, 0, Output::STDOUT);
out.verbose(CALL_INFO, 1, 0, "%s: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to %" PRIu64 " cycles.\n", getName().c_str(), mshrLatency_);
}

}}
10 changes: 5 additions & 5 deletions src/sst/elements/memHierarchy/directoryController.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ void DirectoryController::processPacket(MemEvent * ev) {
break;
case FlushLineInv:
handleFlushLineInv(ev);
break;
case FlushLine:
handleFlushLine(ev);
break;
Expand Down Expand Up @@ -602,7 +603,6 @@ void DirectoryController::handleGetX(MemEvent * ev) {
}

MemEvent * respEv;

State state = entry->getState();
switch (state) {
case I:
Expand Down Expand Up @@ -745,7 +745,7 @@ void DirectoryController::handleFlushLine(MemEvent * ev) {
bool inMSHR = mshr->elementIsHit(ev->getBaseAddr(), ev);
bool mshrConflict = !inMSHR && mshr->isHit(ev->getBaseAddr());

int srcID = node_name_to_id(ev->getSrc());
int srcID = node_id(ev->getSrc());
State state = entry->getState();

switch(state) {
Expand Down Expand Up @@ -852,7 +852,7 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
bool inMSHR = mshr->elementIsHit(ev->getBaseAddr(), ev);
bool mshrConflict = !inMSHR && mshr->isHit(ev->getBaseAddr());

int srcID = node_name_to_id(ev->getSrc());
int srcID = node_id(ev->getSrc());
State state = entry->getState();

switch (state) {
Expand All @@ -873,6 +873,7 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
}
if (entry->isSharer(srcID)) entry->removeSharer(srcID);
if (entry->getSharerCount() == 0) {
entry->setState(I);
forwardFlushRequest(ev);
} else {
entry->setState(S_Inv);
Expand Down Expand Up @@ -933,10 +934,9 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
} else if (!inMSHR && !mshr->insert(ev->getBaseAddr(), ev)) mshrNACKRequest(ev);
break;
default:
dbg.fatal(CALL_INFO, -1, "%s, Error: Directory received FlushLine but state is %s. Addr = 0x%" PRIx64 ", Src = %s. Time = %" PRIu64 "ns\n",
dbg.fatal(CALL_INFO, -1, "%s, Error: Directory received FlushLineInv but state is %s. Addr = 0x%" PRIx64 ", Src = %s. Time = %" PRIu64 "ns\n",
getName().c_str(), StateString[state], ev->getBaseAddr(), ev->getSrc().c_str(), getCurrentSimTimeNano());
}

}


Expand Down
14 changes: 7 additions & 7 deletions src/sst/elements/memHierarchy/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,30 @@ class HashFunction{
HashFunction() {};
virtual ~HashFunction() {};

virtual uint64_t hash(uint32_t _ID, uint64_t _value) = 0;
virtual uint64_t hash(uint32_t ID, uint64_t value) = 0;
};

/* Simplest ID hashing */
class PureIdHashFunction : public HashFunction {
public:
inline uint64_t hash(uint32_t _ID, uint64_t _value) {
return _value;
inline uint64_t hash(uint32_t ID, uint64_t value) {
return value;
}
};

/* This function is taken from the C99 standard's RNG and should uniquely map
each input to an output. */
class LinearHashFunction : public HashFunction {
public:
uint64_t hash(uint32_t _ID, uint64_t x) {
return 1103515245*x + 12345;
uint64_t hash(uint32_t ID, uint64_t x) {
return 1103515245*x + 12345;
}
};

/* Just a simple xor-based hash. */
class XorHashFunction : public HashFunction {
public:
uint64_t hash(uint32_t _ID, uint64_t x) {
uint64_t hash(uint32_t ID, uint64_t x) {
unsigned char b[8];
for (unsigned i = 0; i < 8; ++i)
b[i] = (x >> (i*8))&0xff;
Expand All @@ -68,7 +68,7 @@ class XorHashFunction : public HashFunction {
uint64_t result = 0;
for (unsigned i = 0; i < 8; ++i)
result |= (b[i]<<(i*8));

return result;
}
};
Expand Down
15 changes: 9 additions & 6 deletions src/sst/elements/memHierarchy/libmemHierarchy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,15 @@ static const ElementInfoPort cpu_ports[] = {

static const ElementInfoParam cpu_params[] = {
{"verbose", "Determine how verbose the output from the CPU is", "1"},
{"clock", "Clock frequency", "1GHz"},
{"rngseed", "Set a seed for the random generation of addresses", "7"},
{"commFreq", "How often to do a memory operation."},
{"memSize", "Size of physical memory."},
{"lineSize", "Size of a cache line - used for flushes"},
{"maxOutstanding", "Maximum Number of Outstanding memory requests."},
{"reqsPerIssue", "Maximum number of requests to issue at a time"},
{"do_write", "Enable writes to memory (versus just reads).", "1"},
{"do_flush", "Enable flushes", "0"},
{"num_loadstore", "Stop after this many reads and writes.", "-1"},
{"noncacheableRangeStart", "Beginning of range of addresses that are noncacheable.", "0x0"},
{"noncacheableRangeEnd", "End of range of addresses that are noncacheable.", "0x0"},
Expand Down Expand Up @@ -473,7 +477,6 @@ static const ElementInfoParam memctrl_params[] = {
{"trace_file", "File name (optional) of a trace-file to generate.", ""},
{"debug", "0 (default): No debugging, 1: STDOUT, 2: STDERR, 3: FILE.", "0"},
{"debug_level", "Debugging level: 0 to 10", "0"},
{"debug_addr", "Optional, int - Address (in decimal) to be debugged, if not specified or specified as -1, debug output for all addresses will be printed","-1"},
{"listenercount", "Counts the number of listeners attached to this controller, these are modules for tracing or components like prefetchers", "0"},
{"listener%(listenercount)d", "Loads a listener module into the controller", ""},
{"network_bw", "Network link bandwidth.", NULL},
Expand Down Expand Up @@ -515,10 +518,10 @@ static const ElementInfoStatistic memBackendConvertor_statistics[] = {
{ "requests_received_GetX", "Number of GetX (read) requests received", "requests", 1 },
{ "requests_received_PutM", "Number of PutM (write) requests received", "requests", 1 },
{ "outstanding_requests", "Total number of outstanding requests each cycle", "requests", 1 },
{ "latency_GetS", "Total latency of handled GetS requests", "ns", 1 },
{ "latency_GetSEx", "Total latency of handled GetSEx requests", "ns", 1 },
{ "latency_GetX", "Total latency of handled GetX requests", "ns", 1 },
{ "latency_PutM", "Total latency of handled PutM requests", "ns", 1 },
{ "latency_GetS", "Total latency of handled GetS requests", "cycles", 1 },
{ "latency_GetSEx", "Total latency of handled GetSEx requests", "cycles", 1 },
{ "latency_GetX", "Total latency of handled GetX requests", "cycles", 1 },
{ "latency_PutM", "Total latency of handled PutM requests", "cycles", 1 },
{ NULL, NULL, NULL, 0 }
};

Expand Down Expand Up @@ -582,7 +585,7 @@ static SubComponent* create_Mem_DelayBuffer(Component * comp, Params& params) {
}

static const ElementInfoParam delayBuffer_params[] = {
{"verbose", "Sets teh verbosity of the backend output", "0" },
{"verbose", "Sets the verbosity of the backend output", "0" },
{"backend", "Backend memory system", "memHierarchy.simpleMem"},
{"request_delay", "Constant delay to be added to requests with units (e.g., 1us)", "0ns"},
{NULL, NULL, NULL}
Expand Down
Loading

0 comments on commit b676760

Please sign in to comment.