# Node ID 04a8d16451227e575a793042c15093ea1354eaf9 # Parent f0661f2967528b1ad58c835a890468406d3f6d9e diff --git a/src/base/types.hh b/src/base/types.hh --- a/src/base/types.hh +++ b/src/base/types.hh @@ -200,6 +200,36 @@ constexpr decltype(nullptr) NoFault = nullptr; #endif +/** + * AtomicOpFunctors are primarily used to provide support for + * GPU-style RMW atomics. A pointer to an AtomicOpFunctor is + * kept in a request and - if the request is an RMW - the + * atomic functor will be called when the request reaches the + * memory controller. The request must be embedded in a + * MemCmd::SwapReq packet. + * + * These base functors define the API for atomic ops; any + * specific atomic RMWs, e.g., CAS, increment, XOR, etc., must + * derive from one of these functors and implement the necessary + * functionality. Doing so allows the memory system to simply + * call the atomic op functor and the implementation will take + * care of providing the proper functionality. + */ +class AtomicOpFunctor +{ + public: + virtual void operator()(uint8_t *p) = 0; + virtual ~AtomicOpFunctor() {} +}; + +template +class TypedAtomicOpFunctor : public AtomicOpFunctor +{ + public: + void operator()(uint8_t *p) { execute((T *)p); } + virtual void execute(T * p) = 0; +}; + enum ByteOrder { BigEndianByteOrder, LittleEndianByteOrder diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -341,39 +341,46 @@ uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start(); if (pkt->cmd == MemCmd::SwapReq) { - std::vector overwrite_val(pkt->getSize()); - uint64_t condition_val64; - uint32_t condition_val32; + if (pkt->isAtomic()) { + if (pmemAddr) { + memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); + (*(pkt->getAtomicOp()))(hostAddr); + } + } else { + std::vector overwrite_val(pkt->getSize()); + uint64_t condition_val64; + uint32_t condition_val32; - if (!pmemAddr) - panic("Swap only works if there is real memory (i.e. null=False)"); + if (!pmemAddr) + panic("Swap only works if there is real memory (i.e. null=False)"); - bool overwrite_mem = true; - // keep a copy of our possible write value, and copy what is at the - // memory address into the packet - std::memcpy(&overwrite_val[0], pkt->getConstPtr(), - pkt->getSize()); - std::memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); + bool overwrite_mem = true; + // keep a copy of our possible write value, and copy what is at the + // memory address into the packet + std::memcpy(&overwrite_val[0], pkt->getConstPtr(), + pkt->getSize()); + std::memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); - if (pkt->req->isCondSwap()) { - if (pkt->getSize() == sizeof(uint64_t)) { - condition_val64 = pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val64, hostAddr, - sizeof(uint64_t)); - } else if (pkt->getSize() == sizeof(uint32_t)) { - condition_val32 = (uint32_t)pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val32, hostAddr, - sizeof(uint32_t)); - } else - panic("Invalid size for conditional read/write\n"); + if (pkt->req->isCondSwap()) { + if (pkt->getSize() == sizeof(uint64_t)) { + condition_val64 = pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val64, hostAddr, + sizeof(uint64_t)); + } else if (pkt->getSize() == sizeof(uint32_t)) { + condition_val32 = (uint32_t)pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val32, hostAddr, + sizeof(uint32_t)); + } else + panic("Invalid size for conditional read/write\n"); + } + + if (overwrite_mem) + std::memcpy(hostAddr, &overwrite_val[0], pkt->getSize()); + + assert(!pkt->req->isInstFetch()); + TRACE_PACKET("Read/Write"); + numOther[pkt->req->masterId()]++; } - - if (overwrite_mem) - std::memcpy(hostAddr, &overwrite_val[0], pkt->getSize()); - - assert(!pkt->req->isInstFetch()); - TRACE_PACKET("Read/Write"); - numOther[pkt->req->masterId()]++; } else if (pkt->isRead()) { assert(!pkt->isWrite()); if (pkt->isLLSC()) { diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -569,6 +569,12 @@ } /** + * Accessor function to atomic op. + */ + AtomicOpFunctor *getAtomicOp() const { return req->getAtomicOpFunctor(); } + bool isAtomicOp() const { return req->isAtomic(); } + + /** * It has been determined that the SC packet should successfully update * memory. Therefore, convert this SC packet to a normal write. */ diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -145,7 +145,9 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { LD, desc="Load"; ST, desc="Store"; - ATOMIC, desc="Atomic Load/Store"; + ATOMIC, desc="Atomic Load/Store -- depricated. use ATOMIC_RETURN or ATOMIC_NO_RETURN"; + ATOMIC_RETURN, desc="Atomic Load/Store, return data"; + ATOMIC_NO_RETURN, desc="Atomic Load/Store, do not return data"; IFETCH, desc="Instruction fetch"; IO, desc="I/O"; REPLACEMENT, desc="Replacement"; diff --git a/src/mem/request.hh b/src/mem/request.hh --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -160,6 +160,21 @@ /** The request should be marked with RELEASE. */ RELEASE = 0x00040000, + /** + * The request is an atomic RMW. In both cases the RMW operation + * is performed on the value at the address associated with the + * reqeuest, and as such both act as regular SwapReq mem commands. + * In the case of a RETURN_OP, however, the value at the address + * associated with the request, before the RMW operation, is loaded + * into the destination register. + * + * Requests that have one of these flags set must call the + * AtomicOpFunctor to perform the actual RMW when it reaches + * the memory controller. + */ + ATOMIC_RETURN_OP = 0x40000000, + ATOMIC_NO_RETURN_OP = 0x80000000, + /** The request should be marked with KERNEL. * Used to indicate a GPU kernel launch */ @@ -345,6 +360,13 @@ /** Sequence number of the instruction that creates the request */ InstSeqNum _reqInstSeqNum; + /** + * A generic base pointer to an atomic operation. See types.hh for more + * info. Primarily used for GPU-style RMWs, this functor - if set - will + * be called when the request reaches the memory controller. + */ + AtomicOpFunctor *atomicOpFunctor; + public: /** @@ -356,7 +378,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) {} Request(Addr paddr, unsigned size, Flags flags, MasterID mid, @@ -364,7 +387,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(seq_num), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(seq_num), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, curTick()); setThreadContext(cid, tid); @@ -380,7 +404,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, curTick()); } @@ -389,7 +414,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, time); } @@ -398,12 +424,12 @@ Addr pc) : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), - _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _extraData(0), _contextId(0), _threadId(0), _pc(pc), + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, time); privateFlags.set(VALID_PC); - _pc = pc; } Request(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid, @@ -411,13 +437,27 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setVirt(asid, vaddr, size, flags, mid, pc); setThreadContext(cid, tid); } - ~Request() {} + Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc, + int cid, ThreadID tid, AtomicOpFunctor *atomic_op) + : atomicOpFunctor(atomic_op) + { + setVirt(asid, vaddr, size, flags, mid, pc); + setThreadContext(cid, tid); + } + + ~Request() + { + if (hasAtomicOpFunctor()) { + delete atomicOpFunctor; + } + } /** * Set up CPU and thread numbers. @@ -541,6 +581,22 @@ return _time; } + /** + * Accessor for atomic-op functor. + */ + bool + hasAtomicOpFunctor() const + { + return atomicOpFunctor != nullptr; + } + + AtomicOpFunctor * + getAtomicOpFunctor() const + { + assert(atomicOpFunctor != nullptr); + return atomicOpFunctor; + } + /** Accessor for flags. */ Flags getFlags() @@ -749,6 +805,15 @@ bool isAcquire() const { return _flags.isSet(ACQUIRE); } bool isRelease() const { return _flags.isSet(RELEASE); } bool isKernel() const { return _flags.isSet(KERNEL); } + bool isAtomicReturn() const { return _flags.isSet(ATOMIC_RETURN_OP); } + bool isAtomicNoReturn() const { return _flags.isSet(ATOMIC_NO_RETURN_OP); } + + bool + isAtomic() const + { + return _flags.isSet(ATOMIC_RETURN_OP) || + _flags.isSet(ATOMIC_NO_RETURN_OP); + } /* * Accessor functions for extraFlags. Note that these are for testing