Merge pull request #526 from sstsimulator/devel

Automatically Merged using masterautomerge.py
sstsimulator · Dec 2, 2016 · b676760 · b676760
2 parents 4a7bac9 + 71a3ae6
commit b676760
Show file tree

Hide file tree

Showing 62 changed files with 3,473 additions and 1,911 deletions.
diff --git a/src/sst/elements/memHierarchy/L1CoherenceController.cc b/src/sst/elements/memHierarchy/L1CoherenceController.cc
@@ -199,6 +199,8 @@ bool L1CoherenceController::isRetryNeeded(MemEvent * event, CacheLine * cacheLin
         case GetS:
         case GetX:
         case GetSEx:
+        case FlushLine:
+        case FlushLineInv:
             return true;
         case PutS:
         case PutE:

diff --git a/src/sst/elements/memHierarchy/MESICoherenceController.cc b/src/sst/elements/memHierarchy/MESICoherenceController.cc
@@ -1101,7 +1101,7 @@ CacheAction MESIController::handleInv(MemEvent* event, CacheLine* cacheLine, boo
         case IS:
         case IM:
         case I_B:
-            return DONE;    // Eviction raced with Inv, IS/IM only happen if we don't use AckPuts
+            return IGNORE;    // Eviction raced with Inv, IS/IM only happen if we don't use AckPuts
         case S_B:
         case S:
             if (cacheLine->numSharers() > 0) {

diff --git a/src/sst/elements/memHierarchy/Makefile.am b/src/sst/elements/memHierarchy/Makefile.am
@@ -94,7 +94,6 @@ EXTRA_DIST = \
         Sieve/tests/ompsievetest.c \
         Sieve/tests/sieve-test.py \
 	tests/example.py \
-	tests/exampleM5.xml \
 	tests/sdl-1.py \
 	tests/sdl2-1.py \
 	tests/sdl-2.py \
@@ -110,6 +109,23 @@ EXTRA_DIST = \
 	tests/sdl8-4.py \
 	tests/sdl9-1.py \
 	tests/sdl9-2.py \
+	tests/testBackendChaining.py \
+	tests/testBackendDelayBuffer.py \
+	tests/testBackendPagedMulti.py \
+	tests/testBackendReorderRow.py \
+	tests/testBackendReorderSimple.py \
+	tests/testBackendSimpleDRAM-1.py \
+	tests/testBackendSimpleDRAM-2.py \
+	tests/testBackendVaultSim.py \
+	tests/testDistributedCaches.py \
+	tests/testFlushes.py \
+	tests/testFlushes-2.py \
+	tests/testHashXor.py \
+	tests/testIncoherent.py \
+	tests/testNoninclusive-1.py \
+	tests/testNoninclusive-2.py \
+	tests/testPrefetchParams.py \
+	tests/testThroughputThrottling.py \
 	tests/DDR3_micron_32M_8B_x4_sg125.ini \
 	tests/system.ini
 

diff --git a/src/sst/elements/memHierarchy/cacheArray.cc b/src/sst/elements/memHierarchy/cacheArray.cc
@@ -190,7 +190,7 @@ unsigned int DualSetAssociativeArray::preReplaceCache(const Addr baseAddr) {
     int setBegin    = set * cacheAssociativity_;
 
     for (unsigned int id = 0; id < cacheAssociativity_; id++) {
-        int dirIndex = dataLines_[id+setBegin]->getDirLine()->getIndex();
+        int dirIndex = dataLines_[id+setBegin]->getDirLine() ? dataLines_[id+setBegin]->getDirLine()->getIndex() : -1;
         if (dirIndex == -1) {
             cacheSetStates[id] = I;
             cacheSetSharers[id] = 0;

diff --git a/src/sst/elements/memHierarchy/cacheController.cc b/src/sst/elements/memHierarchy/cacheController.cc
@@ -256,9 +256,18 @@ void Cache::processCacheFlush(MemEvent* event, Addr baseAddr, bool replay) {
         return;
     }
 
+
+
     MemEvent * origRequest = NULL;
     if (mshr_->exists(baseAddr)) origRequest = mshr_->lookupFront(baseAddr);
 
+    // Generally we should not nack this request without checking for races
+    // But if no possible races and handling this will fill MSHR, nack it
+    if (!origRequest && mshr_->isAlmostFull()) { 
+        sendNACK(event);
+        return;
+    }
+
     CacheAction action = coherenceMgr->handleReplacement(event, line, origRequest, replay);
 
     /* Action returned is for the origRequest if it exists, otherwise for the flush */

diff --git a/src/sst/elements/memHierarchy/cacheEventProcessing.cc b/src/sst/elements/memHierarchy/cacheEventProcessing.cc
@@ -325,6 +325,8 @@ void Cache::processNoncacheable(MemEvent* event, Command cmd, Addr baseAddr) {
         case GetS:
         case GetX:
         case GetSEx:
+        case FlushLine:
+        case FlushLineInv:  // Note that noncacheable flushes currently ignore the cache - they just flush any buffers at memory
 #ifdef __SST_DEBUG_OUTPUT__
 	    if (cmd == GetSEx) d_->debug(_WARNING_, "WARNING: Noncachable atomics have undefined behavior; atomicity not preserved\n"); 
 #endif
@@ -338,14 +340,36 @@ void Cache::processNoncacheable(MemEvent* event, Command cmd, Addr baseAddr) {
         case GetSResp:
         case GetXResp:
             origRequest = mshrNoncacheable_->removeFront(baseAddr);
-            if (origRequest->getID().second != event->getResponseToID().second) {
+            if (origRequest->getID().first != event->getResponseToID().first || origRequest->getID().second != event->getResponseToID().second) {
                 d_->fatal(CALL_INFO, -1, "%s, Error: noncacheable response received does not match request at front of mshr. Resp cmd = %s, Resp addr = 0x%" PRIx64 ", Req cmd = %s, Req addr = 0x%" PRIx64 ", Time = %" PRIu64 "\n",
                         getName().c_str(),CommandString[cmd],baseAddr, CommandString[origRequest->getCmd()], origRequest->getBaseAddr(),getCurrentSimTimeNano());
             }
             coherenceMgr->sendResponseUp(origRequest, NULLST, &event->getPayload(), true, 0);
             delete origRequest;
             delete event;
             break;
+        case FlushLineResp: {
+            // Flushes can be returned out of order since they don't neccessarily require a memory access so we need to actually search the MSHRs
+            vector<mshrType> * entries = mshrNoncacheable_->getAll(baseAddr);
+            for (vector<mshrType>::iterator it = entries->begin(); it != entries->end(); it++) {
+                MemEvent * candidate = boost::get<MemEvent*>(it->elem);
+                if (candidate->getCmd() == FlushLine || candidate->getCmd() == FlushLineInv) { // All entries are events so no checking for pointer vs event needed
+                    if (candidate->getID().first == event->getResponseToID().first && candidate->getID().second == event->getResponseToID().second) {
+                        origRequest = candidate;
+                        break;
+                    }
+                }
+            }
+            if (origRequest == nullptr) {
+                d_->fatal(CALL_INFO, -1, "%s, Error: noncacheable response received does not match any request in the mshr. Resp cmd = %s, Resp addr = 0x%" PRIx64 ", Req cmd = %s, Req addr = 0x%" PRIx64 ", Time = %" PRIu64 "\n",
+                        getName().c_str(),CommandString[cmd],baseAddr, CommandString[origRequest->getCmd()], origRequest->getBaseAddr(),getCurrentSimTimeNano());
+            }
+            coherenceMgr->sendResponseUp(origRequest, NULLST, &event->getPayload(), true, 0);
+            mshrNoncacheable_->removeElement(baseAddr, origRequest);
+            delete origRequest;
+            delete event;
+            break;
+            }
         default:
             d_->fatal(CALL_INFO, -1, "Command does not exist. Command: %s, Src: %s\n", CommandString[cmd], event->getSrc().c_str());
     }

diff --git a/src/sst/elements/memHierarchy/cacheFactory.cc b/src/sst/elements/memHierarchy/cacheFactory.cc
@@ -210,8 +210,9 @@ Cache::Cache(ComponentId_t id, Params &params, CacheConfig config) : Component(i
     errorChecking();
 
     d2_ = new Output();
-    d2_->init("", params.find<int>("debug_level", 1), 0,(Output::output_location_t)params.find<int>("debug", SST::Output::STDOUT));
-
+    d2_->init("", params.find<int>("debug_level", 1), 0,(Output::output_location_t)params.find<int>("debug", SST::Output::NONE));
+
+    Output out("", 1, 0, Output::STDOUT);
 
     int stats                   = params.find<int>("statistics", 0);
     accessLatency_              = params.find<uint64_t>("access_latency_cycles", 0);
@@ -248,8 +249,7 @@ Cache::Cache(ComponentId_t id, Params &params, CacheConfig config) : Component(i
             this->Component::getName().c_str(), accessLatency_);
 
     if (stats != 0) {
-        SST::Output outputStd("",1,0,SST::Output::STDOUT);
-        outputStd.output("%s, **WARNING** The 'statistics' parameter is deprecated: memHierarchy statistics have been moved to the Statistics API. Please see sst-info for available statistics and update your configuration accordingly.\nNO statistics will be printed otherwise!\n", this->Component::getName().c_str());
+        out.output("%s, **WARNING** The 'statistics' parameter is deprecated: memHierarchy statistics have been moved to the Statistics API. Please see sst-info for available statistics and update your configuration accordingly.\nNO statistics will be printed otherwise!\n", this->Component::getName().c_str());
     }
     UnitAlgebra packetSize_ua(packetSize);
     if (!packetSize_ua.hasUnits("B")) {
@@ -603,7 +603,8 @@ void Cache::intrapolateMSHRLatency() {
     }
     mshrLatency_ = y[accessLatency_];
 
-    d2_->verbose(CALL_INFO, 1, 0, "%s: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to %" PRIu64 " cycles.\n", getName().c_str(), mshrLatency_);
+    Output out("", 1, 0, Output::STDOUT);
+    out.verbose(CALL_INFO, 1, 0, "%s: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to %" PRIu64 " cycles.\n", getName().c_str(), mshrLatency_);
 }
 
 }}
diff --git a/src/sst/elements/memHierarchy/directoryController.cc b/src/sst/elements/memHierarchy/directoryController.cc
@@ -513,6 +513,7 @@ void DirectoryController::processPacket(MemEvent * ev) {
             break;
         case FlushLineInv:
             handleFlushLineInv(ev);
+            break;
         case FlushLine:
             handleFlushLine(ev);
             break;
@@ -602,7 +603,6 @@ void DirectoryController::handleGetX(MemEvent * ev) {
     }
 
     MemEvent * respEv;
-
     State state = entry->getState();
     switch (state) {
         case I:
@@ -745,7 +745,7 @@ void DirectoryController::handleFlushLine(MemEvent * ev) {
     bool inMSHR = mshr->elementIsHit(ev->getBaseAddr(), ev);
     bool mshrConflict = !inMSHR && mshr->isHit(ev->getBaseAddr());
 
-    int srcID = node_name_to_id(ev->getSrc());
+    int srcID = node_id(ev->getSrc());
     State state = entry->getState();
 
     switch(state) {
@@ -852,7 +852,7 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
     bool inMSHR = mshr->elementIsHit(ev->getBaseAddr(), ev);
     bool mshrConflict = !inMSHR && mshr->isHit(ev->getBaseAddr());
 
-    int srcID = node_name_to_id(ev->getSrc());
+    int srcID = node_id(ev->getSrc());
     State state = entry->getState();
 
     switch (state) {
@@ -873,6 +873,7 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
             }
             if (entry->isSharer(srcID)) entry->removeSharer(srcID);
             if (entry->getSharerCount() == 0) {
+                entry->setState(I);
                 forwardFlushRequest(ev);
             } else {
                 entry->setState(S_Inv);
@@ -933,10 +934,9 @@ void DirectoryController::handleFlushLineInv(MemEvent * ev) {
             } else if (!inMSHR && !mshr->insert(ev->getBaseAddr(), ev)) mshrNACKRequest(ev);
             break;
         default:
-            dbg.fatal(CALL_INFO, -1, "%s, Error: Directory received FlushLine but state is %s. Addr = 0x%" PRIx64 ", Src = %s. Time = %" PRIu64 "ns\n",
+            dbg.fatal(CALL_INFO, -1, "%s, Error: Directory received FlushLineInv but state is %s. Addr = 0x%" PRIx64 ", Src = %s. Time = %" PRIu64 "ns\n",
                     getName().c_str(), StateString[state], ev->getBaseAddr(), ev->getSrc().c_str(), getCurrentSimTimeNano());
     }
-
 }
 
 

diff --git a/src/sst/elements/memHierarchy/hash.h b/src/sst/elements/memHierarchy/hash.h
@@ -34,30 +34,30 @@ class HashFunction{
     HashFunction() {};
     virtual ~HashFunction() {};
 
-    virtual uint64_t hash(uint32_t _ID, uint64_t _value) = 0;
+    virtual uint64_t hash(uint32_t ID, uint64_t value) = 0;
 };
 
 /* Simplest ID hashing */
 class PureIdHashFunction : public HashFunction {
 public:
-    inline uint64_t hash(uint32_t _ID, uint64_t _value) {
-        return _value;
+    inline uint64_t hash(uint32_t ID, uint64_t value) {
+        return value;
     }
 };
 
 /* This function is taken from the C99 standard's RNG and should uniquely map
    each input to an output. */
 class LinearHashFunction : public HashFunction {
 public:
-  uint64_t hash(uint32_t _ID, uint64_t x) {
-    return 1103515245*x + 12345;
+  uint64_t hash(uint32_t ID, uint64_t x) {
+      return 1103515245*x + 12345;
   }
 };
 
 /* Just a simple xor-based hash. */
 class XorHashFunction : public HashFunction {
 public:
-  uint64_t hash(uint32_t _ID, uint64_t x) {
+  uint64_t hash(uint32_t ID, uint64_t x) {
     unsigned char b[8];
     for (unsigned i = 0; i < 8; ++i)
       b[i] = (x >> (i*8))&0xff;
@@ -68,7 +68,7 @@ class XorHashFunction : public HashFunction {
     uint64_t result = 0;
     for (unsigned i = 0; i < 8; ++i)
       result |= (b[i]<<(i*8));
-
+    
     return result;
   }
 };

diff --git a/src/sst/elements/memHierarchy/libmemHierarchy.cc b/src/sst/elements/memHierarchy/libmemHierarchy.cc
@@ -433,11 +433,15 @@ static const ElementInfoPort cpu_ports[] = {
 
 static const ElementInfoParam cpu_params[] = {
     {"verbose",                 "Determine how verbose the output from the CPU is", "1"},
+    {"clock",                   "Clock frequency", "1GHz"},
     {"rngseed",                 "Set a seed for the random generation of addresses", "7"},
     {"commFreq",                "How often to do a memory operation."},
     {"memSize",                 "Size of physical memory."},
+    {"lineSize",                "Size of a cache line - used for flushes"},
     {"maxOutstanding",          "Maximum Number of Outstanding memory requests."},
+    {"reqsPerIssue",            "Maximum number of requests to issue at a time"},
     {"do_write",                "Enable writes to memory (versus just reads).", "1"},
+    {"do_flush",                "Enable flushes", "0"},
     {"num_loadstore",           "Stop after this many reads and writes.", "-1"},
     {"noncacheableRangeStart",  "Beginning of range of addresses that are noncacheable.", "0x0"},
     {"noncacheableRangeEnd",    "End of range of addresses that are noncacheable.", "0x0"},
@@ -473,7 +477,6 @@ static const ElementInfoParam memctrl_params[] = {
     {"trace_file",          "File name (optional) of a trace-file to generate.", ""},
     {"debug",               "0 (default): No debugging, 1: STDOUT, 2: STDERR, 3: FILE.", "0"},
     {"debug_level",         "Debugging level: 0 to 10", "0"},
-    {"debug_addr",          "Optional, int      - Address (in decimal) to be debugged, if not specified or specified as -1, debug output for all addresses will be printed","-1"},
     {"listenercount",       "Counts the number of listeners attached to this controller, these are modules for tracing or components like prefetchers", "0"},
     {"listener%(listenercount)d", "Loads a listener module into the controller", ""},
     {"network_bw",          "Network link bandwidth.", NULL},
@@ -515,10 +518,10 @@ static const ElementInfoStatistic memBackendConvertor_statistics[] = {
     { "requests_received_GetX",             "Number of GetX (read) requests received",          "requests", 1 },
     { "requests_received_PutM",             "Number of PutM (write) requests received",         "requests", 1 },
     { "outstanding_requests",               "Total number of outstanding requests each cycle",  "requests", 1 },
-    { "latency_GetS",                       "Total latency of handled GetS requests",           "ns",       1 },
-    { "latency_GetSEx",                     "Total latency of handled GetSEx requests",         "ns",       1 },
-    { "latency_GetX",                       "Total latency of handled GetX requests",           "ns",       1 },
-    { "latency_PutM",                       "Total latency of handled PutM requests",           "ns",       1 },
+    { "latency_GetS",                       "Total latency of handled GetS requests",           "cycles",   1 },
+    { "latency_GetSEx",                     "Total latency of handled GetSEx requests",         "cycles",   1 },
+    { "latency_GetX",                       "Total latency of handled GetX requests",           "cycles",   1 },
+    { "latency_PutM",                       "Total latency of handled PutM requests",           "cycles",   1 },
     { NULL, NULL, NULL, 0 }
 };
 
@@ -582,7 +585,7 @@ static SubComponent* create_Mem_DelayBuffer(Component * comp, Params& params) {
 }
 
 static const ElementInfoParam delayBuffer_params[] = {
-    {"verbose",     "Sets teh verbosity of the backend output", "0" },
+    {"verbose",     "Sets the verbosity of the backend output", "0" },
     {"backend",     "Backend memory system", "memHierarchy.simpleMem"},
     {"request_delay", "Constant delay to be added to requests with units (e.g., 1us)", "0ns"},
     {NULL, NULL, NULL}