diff -r 5b9ea2a8b2d5 -r bfe6203ca3d5 src/arch/x86/decoder.hh --- a/src/arch/x86/decoder.hh Fri May 25 03:34:16 2012 -0700 +++ b/src/arch/x86/decoder.hh Fri May 25 03:45:20 2012 -0700 @@ -32,6 +32,7 @@ #define __ARCH_X86_DECODER_HH__ #include +#include #include "arch/x86/regs/misc.hh" #include "arch/x86/types.hh" @@ -58,9 +59,25 @@ static const uint8_t SizeTypeToSize[3][10]; protected: + struct InstBytes + { + StaticInstPtr si; + std::vector bytes; + std::vector masks; + int chunks; + int lastOffset; + + InstBytes() : chunks(0), lastOffset(0) + {} + }; + + static InstBytes dummy; + ThreadContext * tc; //The bytes to be predecoded MachInst fetchChunk; + InstBytes *instBytes; + int chunkIdx; //The pc of the start of fetchChunk Addr basePC; //The pc the current instruction started at @@ -69,9 +86,16 @@ int offset; //The extended machine instruction being generated ExtMachInst emi; - HandyM5Reg m5Reg; + //Predecoding state + X86Mode mode; + X86SubMode submode; + uint8_t altOp; + uint8_t defOp; + uint8_t altAddr; + uint8_t defAddr; + uint8_t stack; - inline uint8_t getNextByte() + uint8_t getNextByte() { return ((uint8_t *)&fetchChunk)[offset]; } @@ -99,24 +123,35 @@ consumeBytes(toGet); } - inline void consumeByte() + void updateOffsetState() + { + assert(offset <= sizeof(MachInst)); + if (offset == sizeof(MachInst)) { + DPRINTF(Decoder, "At the end of a chunk, idx = %d, chunks = %d.\n", + chunkIdx, instBytes->chunks); + chunkIdx++; + if (chunkIdx == instBytes->chunks) { + outOfBytes = true; + } else { + offset = 0; + fetchChunk = instBytes->bytes[chunkIdx]; + basePC += sizeof(MachInst); + } + } + } + + void consumeByte() { offset++; - assert(offset <= sizeof(MachInst)); - if(offset == sizeof(MachInst)) - outOfBytes = true; + updateOffsetState(); } - inline void consumeBytes(int numBytes) + void consumeBytes(int numBytes) { offset += numBytes; - assert(offset <= sizeof(MachInst)); - if(offset == sizeof(MachInst)) - outOfBytes = true; + updateOffsetState(); } - void doReset(); - //State machine state protected: //Whether or not we're out of bytes @@ -133,6 +168,7 @@ enum State { ResetState, + FromCacheState, PrefixState, OpcodeState, ModRMState, @@ -146,6 +182,8 @@ State state; //Functions to handle each of the states + State doResetState(); + State doFromCacheState(); State doPrefixState(uint8_t); State doOpcodeState(uint8_t); State doModRMState(uint8_t); @@ -153,15 +191,67 @@ State doDisplacementState(); State doImmediateState(); + protected: + /// Caching for decoded instruction objects. + + typedef MiscReg CacheKey; + + typedef DecodeCache::AddrMap DecodePages; + DecodePages *decodePages; + typedef m5::hash_map AddrCacheMap; + AddrCacheMap addrCacheMap; + + DecodeCache::InstMap *instMap; + typedef m5::hash_map InstCacheMap; + static InstCacheMap instCacheMap; + public: Decoder(ThreadContext * _tc) : tc(_tc), basePC(0), origPC(0), offset(0), outOfBytes(true), instDone(false), state(ResetState) { - emi.mode.mode = LongMode; - emi.mode.submode = SixtyFourBitMode; - m5Reg = 0; + mode = LongMode; + submode = SixtyFourBitMode; + emi.mode.mode = mode; + emi.mode.submode = submode; + altOp = 0; + defOp = 0; + altAddr = 0; + defAddr = 0; + stack = 0; + instBytes = &dummy; + decodePages = NULL; + instMap = NULL; + } + + void setM5Reg(HandyM5Reg m5Reg) + { + mode = (X86Mode)(uint64_t)m5Reg.mode; + submode = (X86SubMode)(uint64_t)m5Reg.submode; + emi.mode.mode = mode; + emi.mode.submode = submode; + altOp = m5Reg.altOp; + defOp = m5Reg.defOp; + altAddr = m5Reg.altAddr; + defAddr = m5Reg.defAddr; + stack = m5Reg.stack; + + AddrCacheMap::iterator amIter = addrCacheMap.find(m5Reg); + if (amIter != addrCacheMap.end()) { + decodePages = amIter->second; + } else { + decodePages = new DecodePages; + addrCacheMap[m5Reg] = decodePages; + } + + InstCacheMap::iterator imIter = instCacheMap.find(m5Reg); + if (imIter != instCacheMap.end()) { + instMap = imIter->second; + } else { + instMap = new DecodeCache::InstMap; + instCacheMap[m5Reg] = instMap; + } } void reset() @@ -217,11 +307,6 @@ } } - protected: - /// Caching for decoded instruction objects. - static DecodeCache::InstMap instMap; - static DecodeCache::AddrMap decodePages; - public: StaticInstPtr decodeInst(ExtMachInst mach_inst); @@ -229,16 +314,7 @@ /// @param mach_inst The binary instruction to decode. /// @retval A pointer to the corresponding StaticInst object. StaticInstPtr decode(ExtMachInst mach_inst, Addr addr); - - StaticInstPtr - decode(X86ISA::PCState &nextPC) - { - if (!instDone) - return NULL; - instDone = false; - updateNPC(nextPC); - return decode(emi, origPC); - } + StaticInstPtr decode(X86ISA::PCState &nextPC); }; } // namespace X86ISA diff -r 5b9ea2a8b2d5 -r bfe6203ca3d5 src/arch/x86/decoder.cc --- a/src/arch/x86/decoder.cc Fri May 25 03:34:16 2012 -0700 +++ b/src/arch/x86/decoder.cc Fri May 25 03:45:20 2012 -0700 @@ -38,10 +38,14 @@ namespace X86ISA { -void Decoder::doReset() + +Decoder::State Decoder::doResetState() { origPC = basePC + offset; DPRINTF(Decoder, "Setting origPC to %#x\n", origPC); + instBytes = &decodePages->lookup(origPC); + chunkIdx = 0; + emi.rex = 0; emi.legacy = 0; emi.opcode.num = 0; @@ -55,9 +59,14 @@ emi.modRM = 0; emi.sib = 0; - m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); - emi.mode.mode = m5Reg.mode; - emi.mode.submode = m5Reg.submode; + + if (instBytes->si) { + return FromCacheState; + } else { + instBytes->chunks = 0; + instBytes->bytes.clear(); + return PrefixState; + } } void Decoder::process() @@ -70,15 +79,19 @@ assert(!outOfBytes); assert(!instDone); + if (state == ResetState) + state = doResetState(); + if (state == FromCacheState) { + state = doFromCacheState(); + } else { + instBytes->chunks++; + instBytes->bytes.push_back(fetchChunk); + } + //While there's still something to do... - while(!instDone && !outOfBytes) - { + while (!instDone && !outOfBytes) { uint8_t nextByte = getNextByte(); - switch(state) - { - case ResetState: - doReset(); - state = PrefixState; + switch (state) { case PrefixState: state = doPrefixState(nextByte); break; @@ -105,6 +118,38 @@ } } +Decoder::State Decoder::doFromCacheState() +{ + DPRINTF(Decoder, "Looking at cache state.\n"); + if ((fetchChunk & instBytes->masks[chunkIdx]) != + instBytes->bytes[chunkIdx]) { + DPRINTF(Decoder, "Decode cache miss.\n"); + // The chached bytes didn't match what was fetched. Fall back to the + // predecoder. + instBytes->bytes[chunkIdx] = fetchChunk; + instBytes->chunks = chunkIdx + 1; + instBytes->bytes.resize(instBytes->chunks); + instBytes->si = NULL; + chunkIdx = 0; + fetchChunk = instBytes->bytes[0]; + offset = origPC % sizeof(MachInst); + basePC = origPC - offset; + return PrefixState; + } else if (chunkIdx == instBytes->chunks - 1) { + // We matched the cache, so use its value. + instDone = true; + offset = instBytes->lastOffset; + if (offset == sizeof(MachInst)) + outOfBytes = true; + return ResetState; + } else { + // We matched so far, but need to check more bytes. + chunkIdx++; + outOfBytes = true; + return FromCacheState; + } +} + //Either get a prefix and record it in the ExtMachInst, or send the //state machine on to get the opcode(s). Decoder::State Decoder::doPrefixState(uint8_t nextByte) @@ -194,9 +239,9 @@ if (emi.rex.w) logOpSize = 3; // 64 bit operand size else if (emi.legacy.op) - logOpSize = m5Reg.altOp; + logOpSize = altOp; else - logOpSize = m5Reg.defOp; + logOpSize = defOp; //Set the actual op size emi.opSize = 1 << logOpSize; @@ -205,16 +250,16 @@ //a fixed value at the decoder level. int logAddrSize; if(emi.legacy.addr) - logAddrSize = m5Reg.altAddr; + logAddrSize = altAddr; else - logAddrSize = m5Reg.defAddr; + logAddrSize = defAddr; //Set the actual address size emi.addrSize = 1 << logAddrSize; //Figure out the effective stack width. This can be overriden to //a fixed value at the decoder level. - emi.stackSize = 1 << m5Reg.stack; + emi.stackSize = 1 << stack; //Figure out how big of an immediate we'll retreive based //on the opcode. @@ -248,7 +293,7 @@ ModRM modRM; modRM = nextByte; DPRINTF(Decoder, "Found modrm byte %#x.\n", nextByte); - if (m5Reg.defOp == 1) { + if (defOp == 1) { //figure out 16 bit displacement size if ((modRM.mod == 0 && modRM.rm == 6) || modRM.mod == 2) displacementSize = 2; @@ -408,24 +453,59 @@ return nextState; } -DecodeCache::InstMap Decoder::instMap; -DecodeCache::AddrMap Decoder::decodePages; +Decoder::InstBytes Decoder::dummy; +Decoder::InstCacheMap Decoder::instCacheMap; StaticInstPtr Decoder::decode(ExtMachInst mach_inst, Addr addr) { - StaticInstPtr &si = decodePages.lookup(addr); - if (si && (si->machInst == mach_inst)) + DecodeCache::InstMap::iterator iter = instMap->find(mach_inst); + if (iter != instMap->end()) + return iter->second; + + StaticInstPtr si = decodeInst(mach_inst); + (*instMap)[mach_inst] = si; + return si; +} + +StaticInstPtr +Decoder::decode(PCState &nextPC) +{ + if (!instDone) + return NULL; + instDone = false; + updateNPC(nextPC); + + StaticInstPtr &si = instBytes->si; + if (si) return si; - DecodeCache::InstMap::iterator iter = instMap.find(mach_inst); - if (iter != instMap.end()) { - si = iter->second; - return si; + // We didn't match in the AddrMap, but we still populated an entry. Fix + // up its byte masks. + const int chunkSize = sizeof(MachInst); + + instBytes->lastOffset = offset; + + Addr firstBasePC = basePC - (instBytes->chunks - 1) * chunkSize; + Addr firstOffset = origPC - firstBasePC; + Addr totalSize = instBytes->lastOffset - firstOffset + + (instBytes->chunks - 1) * chunkSize; + int start = firstOffset; + instBytes->masks.clear(); + while (totalSize) { + int end = start + totalSize; + end = (chunkSize < end) ? chunkSize : end; + int size = end - start; + int idx = instBytes->masks.size(); + MachInst maskVal = mask(size * 8) << (start * 8); + assert(maskVal); + instBytes->masks.push_back(maskVal); + instBytes->bytes[idx] &= instBytes->masks[idx]; + totalSize -= size; + start = 0; } - si = decodeInst(mach_inst); - instMap[mach_inst] = si; + si = decode(emi, origPC); return si; } diff -r 5b9ea2a8b2d5 -r bfe6203ca3d5 src/arch/x86/isa.hh --- a/src/arch/x86/isa.hh Fri May 25 03:34:16 2012 -0700 +++ b/src/arch/x86/isa.hh Fri May 25 03:45:20 2012 -0700 @@ -50,7 +50,8 @@ protected: MiscReg regVal[NUM_MISCREGS]; void updateHandyM5Reg(Efer efer, CR0 cr0, - SegAttr csAttr, SegAttr ssAttr, RFLAGS rflags); + SegAttr csAttr, SegAttr ssAttr, RFLAGS rflags, + ThreadContext *tc); public: void clear(); diff -r 5b9ea2a8b2d5 -r bfe6203ca3d5 src/arch/x86/isa.cc --- a/src/arch/x86/isa.cc Fri May 25 03:34:16 2012 -0700 +++ b/src/arch/x86/isa.cc Fri May 25 03:45:20 2012 -0700 @@ -28,6 +28,7 @@ * Authors: Gabe Black */ +#include "arch/x86/decoder.hh" #include "arch/x86/isa.hh" #include "arch/x86/tlb.hh" #include "cpu/base.hh" @@ -39,7 +40,8 @@ void ISA::updateHandyM5Reg(Efer efer, CR0 cr0, - SegAttr csAttr, SegAttr ssAttr, RFLAGS rflags) + SegAttr csAttr, SegAttr ssAttr, RFLAGS rflags, + ThreadContext *tc) { HandyM5Reg m5reg = 0; if (efer.lma) { @@ -94,6 +96,8 @@ } regVal[MISCREG_M5_REG] = m5reg; + if (tc) + tc->getDecoderPtr()->setM5Reg(m5reg); } void @@ -177,7 +181,8 @@ newCR0, regVal[MISCREG_CS_ATTR], regVal[MISCREG_SS_ATTR], - regVal[MISCREG_RFLAGS]); + regVal[MISCREG_RFLAGS], + tc); } break; case MISCREG_CR2: @@ -218,7 +223,8 @@ regVal[MISCREG_CR0], newCSAttr, regVal[MISCREG_SS_ATTR], - regVal[MISCREG_RFLAGS]); + regVal[MISCREG_RFLAGS], + tc); } break; case MISCREG_SS_ATTR: @@ -226,7 +232,8 @@ regVal[MISCREG_CR0], regVal[MISCREG_CS_ATTR], val, - regVal[MISCREG_RFLAGS]); + regVal[MISCREG_RFLAGS], + tc); break; // These segments always actually use their bases, or in other words // their effective bases must stay equal to their actual bases. @@ -333,7 +340,8 @@ regVal[MISCREG_CR0], regVal[MISCREG_CS_ATTR], regVal[MISCREG_SS_ATTR], - regVal[MISCREG_RFLAGS]); + regVal[MISCREG_RFLAGS], + tc); return; default: break; @@ -356,7 +364,8 @@ regVal[MISCREG_CR0], regVal[MISCREG_CS_ATTR], regVal[MISCREG_SS_ATTR], - regVal[MISCREG_RFLAGS]); + regVal[MISCREG_RFLAGS], + NULL); } }