diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -340,7 +340,12 @@ uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start(); - if (pkt->cmd == MemCmd::SwapReq) { + if (pkt->isAtomicOp()) { + if (pmemAddr) { + memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); + (*(pkt->getAtomicOp()))(hostAddr); + } + } else if (pkt->cmd == MemCmd::SwapReq) { std::vector overwrite_val(pkt->getSize()); uint64_t condition_val64; uint32_t condition_val32; # Node ID a2df1b31d43566ed684affb92de301e9b7cb908b # Parent e9e01748776b647971831ac79374019fceec346a diff --git a/src/base/types.hh b/src/base/types.hh --- a/src/base/types.hh +++ b/src/base/types.hh @@ -200,6 +200,19 @@ constexpr decltype(nullptr) NoFault = nullptr; #endif +struct AtomicOpFunctor +{ + virtual void operator()(uint8_t *p) = 0; + virtual ~AtomicOpFunctor() {} +}; + +template +struct TypedAtomicOpFunctor : public AtomicOpFunctor +{ + void operator()(uint8_t *p) { execute((T *)p); } + virtual void execute(T * p) = 0; +}; + enum ByteOrder { BigEndianByteOrder, LittleEndianByteOrder diff --git a/src/mem/request.hh b/src/mem/request.hh --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -160,6 +160,11 @@ /** The request should be marked with RELEASE. */ RELEASE = 0x00040000, + /** The request is an atomic that returns data. */ + ATOMIC_RETURN_OP = 0x40000000, + /** The request is an atomic that does not return data. */ + ATOMIC_NO_RETURN_OP = 0x80000000, + /** The request should be marked with KERNEL. * Used to indicate a GPU kernel launch */ @@ -345,6 +350,9 @@ /** Sequence number of the instruction that creates the request */ InstSeqNum _reqInstSeqNum; + /** A pointer to an atomic operation */ + AtomicOpFunctor *atomicOpFunctor; + public: /** @@ -356,7 +364,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) {} Request(Addr paddr, unsigned size, Flags flags, MasterID mid, @@ -364,7 +373,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(seq_num), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(seq_num), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, curTick()); setThreadContext(cid, tid); @@ -380,7 +390,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, curTick()); } @@ -389,7 +400,8 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, time); } @@ -398,12 +410,12 @@ Addr pc) : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), - _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _extraData(0), _contextId(0), _threadId(0), _pc(pc), + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setPhys(paddr, size, flags, mid, time); privateFlags.set(VALID_PC); - _pc = pc; } Request(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid, @@ -411,13 +423,27 @@ : _paddr(0), _size(0), _masterId(invldMasterId), _time(0), _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0), _extraData(0), _contextId(0), _threadId(0), _pc(0), - _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0) + _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0), + accessDelta(0), depth(0) { setVirt(asid, vaddr, size, flags, mid, pc); setThreadContext(cid, tid); } - ~Request() {} + Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc, + int cid, ThreadID tid, AtomicOpFunctor *atomic_op) + : atomicOpFunctor(atomic_op) + { + setVirt(asid, vaddr, size, flags, mid, pc); + setThreadContext(cid, tid); + } + + ~Request() + { + if (hasAtomicOpFunctor()) { + delete atomicOpFunctor; + } + } /** * Set up CPU and thread numbers. @@ -541,6 +567,22 @@ return _time; } + /** + * Accessor for atomic-op functor. + */ + bool + hasAtomicOpFunctor() + { + return atomicOpFunctor != NULL; + } + + AtomicOpFunctor * + getAtomicOpFunctor() + { + assert(atomicOpFunctor != NULL); + return atomicOpFunctor; + } + /** Accessor for flags. */ Flags getFlags() @@ -672,6 +714,7 @@ { privateFlags.set(VALID_PC); _pc = pc; + atomicOpFunctor = nullptr; } bool @@ -749,6 +792,15 @@ bool isAcquire() const { return _flags.isSet(ACQUIRE); } bool isRelease() const { return _flags.isSet(RELEASE); } bool isKernel() const { return _flags.isSet(KERNEL); } + bool isAtomicReturn() const { return _flags.isSet(ATOMIC_RETURN_OP); } + bool isAtomicNoReturn() const { return _flags.isSet(ATOMIC_NO_RETURN_OP); } + + bool + isAtomic() const + { + return _flags.isSet(ATOMIC_RETURN_OP) || + _flags.isSet(ATOMIC_NO_RETURN_OP); + } /* * Accessor functions for extraFlags. Note that these are for testing diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -145,12 +145,17 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { LD, desc="Load"; ST, desc="Store"; - ATOMIC, desc="Atomic Load/Store"; + ATOMIC, desc="Atomic Load/Store -- depricated. use ATOMIC_RETURN or ATOMIC_NO_RETURN"; + ATOMIC_RETURN, desc="Atomic Load/Store, return data"; + ATOMIC_NO_RETURN, desc="Atomic Load/Store, do not return data"; IFETCH, desc="Instruction fetch"; IO, desc="I/O"; REPLACEMENT, desc="Replacement"; Load_Linked, desc=""; Store_Conditional, desc=""; + GPU_ATOMIC, desc=""; + GPU_ATOMIC_CAS, desc=""; + GPU_ATOMIC_ADD, desc=""; RMW_Read, desc=""; RMW_Write, desc=""; Locked_RMW_Read, desc=""; @@ -167,6 +172,8 @@ Default, desc="Replace this with access_types passed to the DMA Ruby object"; LD, desc="Load"; ST, desc="Store"; + ATOMIC, desc="Atomic Load/Store"; + GPU_ATOMIC, desc=""; REPLACEMENT, desc="Replacement"; FLUSH, desc="Flush request type"; NULL, desc="Invalid request type"; diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -569,6 +569,12 @@ } /** + * Accessor function to atomic op. + */ + AtomicOpFunctor *getAtomicOp() const { return req->getAtomicOpFunctor(); } + bool isAtomicOp() const { return req->isAtomic(); } + + /** * It has been determined that the SC packet should successfully update * memory. Therefore, convert this SC packet to a normal write. */