diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/cpu/base_dyn_inst_impl.hh --- a/src/cpu/base_dyn_inst_impl.hh Sat Nov 30 17:49:47 2013 -0600 +++ b/src/cpu/base_dyn_inst_impl.hh Sat Nov 30 17:51:31 2013 -0600 @@ -90,6 +90,7 @@ effAddr = 0; physEffAddr = 0; readyRegs = 0; + memReqFlags = 0; status.reset(); diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/cpu/o3/lsq_unit.hh --- a/src/cpu/o3/lsq_unit.hh Sat Nov 30 17:49:47 2013 -0600 +++ b/src/cpu/o3/lsq_unit.hh Sat Nov 30 17:51:31 2013 -0600 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -361,7 +361,7 @@ /** Constructs a store queue entry for a given instruction. */ SQEntry(DynInstPtr &_inst) : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0), - isSplit(0), canWB(0), committed(0), completed(0) + isSplit(0), canWB(0), committed(0), completed(0), isAllZeros(0) { std::memset(data, 0, sizeof(data)); } @@ -384,6 +384,11 @@ bool committed; /** Whether or not the store is completed. */ bool completed; + /** Does this request write all zeros and thus doesn't + * have any data attached to it. Used for cache block zero + * style instructs (ARM DC ZVA; ALPHA WH64) + */ + bool isAllZeros; }; private: @@ -691,13 +696,18 @@ // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr; - memcpy(data, storeQueue[store_idx].data + shift_amt, + if (storeQueue[store_idx].isAllZeros) + memset(data, 0, req->getSize()); + else + memcpy(data, storeQueue[store_idx].data + shift_amt, req->getSize()); assert(!load_inst->memData); - load_inst->memData = new uint8_t[64]; - - memcpy(load_inst->memData, + load_inst->memData = new uint8_t[req->getSize()]; + if (storeQueue[store_idx].isAllZeros) + memset(load_inst->memData, 0, req->getSize()); + else + memcpy(load_inst->memData, storeQueue[store_idx].data + shift_amt, req->getSize()); DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " @@ -777,7 +787,7 @@ load_inst->seqNum, load_inst->pcState()); assert(!load_inst->memData); - load_inst->memData = new uint8_t[64]; + load_inst->memData = new uint8_t[req->getSize()]; ++usedPorts; @@ -916,7 +926,9 @@ storeQueue[store_idx].sreqHigh = sreqHigh; unsigned size = req->getSize(); storeQueue[store_idx].size = size; - assert(size <= sizeof(storeQueue[store_idx].data)); + storeQueue[store_idx].isAllZeros = req->getFlags() & Request::CACHE_BLOCK_ZERO; + assert(size <= sizeof(storeQueue[store_idx].data) || + (req->getFlags() & Request::CACHE_BLOCK_ZERO)); // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. @@ -924,7 +936,8 @@ storeQueue[store_idx].isSplit = true; } - memcpy(storeQueue[store_idx].data, data, size); + if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO)) + memcpy(storeQueue[store_idx].data, data, size); // This function only writes the data to the store queue, so no fault // can happen here. diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh Sat Nov 30 17:49:47 2013 -0600 +++ b/src/cpu/o3/lsq_unit_impl.hh Sat Nov 30 17:51:31 2013 -0600 @@ -816,9 +816,12 @@ storeQueue[storeWBIdx].committed = true; assert(!inst->memData); - inst->memData = new uint8_t[64]; + inst->memData = new uint8_t[req->getSize()]; - memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); + if (storeQueue[storeWBIdx].isAllZeros) + memset(inst->memData, 0, req->getSize()); + else + memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); MemCmd command = req->isSwap() ? MemCmd::SwapReq : diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/cpu/simple/atomic.cc --- a/src/cpu/simple/atomic.cc Sat Nov 30 17:49:47 2013 -0600 +++ b/src/cpu/simple/atomic.cc Sat Nov 30 17:51:31 2013 -0600 @@ -399,6 +399,19 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, unsigned flags, uint64_t *res) { + + static uint8_t zero_array[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0}; + + if (data == NULL) { + assert(size <= 64); + assert(flags & Request::CACHE_BLOCK_ZERO); + // This must be a cache block cleaning request + data = zero_array; + } + // use the CPU's statically allocated write request and packet objects Request *req = &data_write_req; diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/cpu/simple/timing.cc --- a/src/cpu/simple/timing.cc Sat Nov 30 17:49:47 2013 -0600 +++ b/src/cpu/simple/timing.cc Sat Nov 30 17:51:31 2013 -0600 @@ -472,14 +472,20 @@ Addr addr, unsigned flags, uint64_t *res) { uint8_t *newData = new uint8_t[size]; - memcpy(newData, data, size); - const int asid = 0; const ThreadID tid = 0; const Addr pc = thread->instAddr(); unsigned block_size = cacheLineSize(); BaseTLB::Mode mode = BaseTLB::Write; + if (data == NULL) { + assert(flags & Request::CACHE_BLOCK_ZERO); + // This must be a cache block cleaning request + memset(newData, 0, size); + } else { + memcpy(newData, data, size); + } + if (traceData) { traceData->setAddr(addr); } diff -r 385fa35a0ed6 -r 7b245b85f5c3 src/mem/request.hh --- a/src/mem/request.hh Sat Nov 30 17:49:47 2013 -0600 +++ b/src/mem/request.hh Sat Nov 30 17:51:31 2013 -0600 @@ -114,6 +114,11 @@ /** This request is made in privileged mode. */ static const FlagsType PRIVILEGED = 0x00008000; + /** This is a write that is targeted and zeroing an entire cache block. + * There is no need for a read/modify/write + */ + static const FlagsType CACHE_BLOCK_ZERO = 0x00010000; + /** The request should not cause a memory access. */ static const FlagsType NO_ACCESS = 0x00080000; /** This request will lock or unlock the accessed memory. When used with