diff -r 53a406916a9c -r 2c15197c07e7 src/cpu/kvm/X86KvmCPU.py --- a/src/cpu/kvm/X86KvmCPU.py Tue Sep 10 15:22:23 2013 +0200 +++ b/src/cpu/kvm/X86KvmCPU.py Tue Sep 10 15:22:37 2013 +0200 @@ -43,3 +43,5 @@ void dumpXSave(); void dumpVCpuEvents(); ''') + + useXSave = Param.Bool(True, "Use XSave to synchronize FPU/SIMD registers") diff -r 53a406916a9c -r 2c15197c07e7 src/cpu/kvm/x86_cpu.hh --- a/src/cpu/kvm/x86_cpu.hh Tue Sep 10 15:22:23 2013 +0200 +++ b/src/cpu/kvm/x86_cpu.hh Tue Sep 10 15:22:37 2013 +0200 @@ -135,11 +135,13 @@ void updateKvmStateRegs(); void updateKvmStateSRegs(); void updateKvmStateFPU(); + void updateKvmStateXSave(); void updateKvmStateMSRs(); void updateThreadContextRegs(); void updateThreadContextSRegs(); void updateThreadContextFPU(); + void updateThreadContextXSave(); void updateThreadContextMSRs(); void updateCPUID(); @@ -164,6 +166,11 @@ bool haveDebugRegs; /** Kvm::capXSave() available? */ bool haveXSave; + /** + * Should the XSave interface be used to sync the FPU and SIMD + * registers? + */ + bool useXSave; /** Kvm::capXCRs() available? */ bool haveXCRs; /** @} */ diff -r 53a406916a9c -r 2c15197c07e7 src/cpu/kvm/x86_cpu.cc --- a/src/cpu/kvm/x86_cpu.cc Tue Sep 10 15:22:23 2013 +0200 +++ b/src/cpu/kvm/x86_cpu.cc Tue Sep 10 15:22:37 2013 +0200 @@ -53,6 +53,39 @@ #define IO_PCI_CONF_ADDR 0xCF8 #define IO_PCI_CONF_DATA_BASE 0xCFC +struct FXSave +{ + uint16_t fcw; + uint16_t fsw; + uint8_t ftwx; + uint8_t pad0; + uint16_t last_opcode; + union { + struct { + uint32_t fpu_ip; + uint16_t fpu_cs; + uint16_t pad1; + uint32_t fpu_dp; + uint16_t fpu_ds; + uint16_t pad2; + } ctrl32; + + struct { + uint64_t fpu_ip; + uint64_t fpu_dp; + } ctrl64; + }; + uint32_t mxcsr; + uint32_t mxcsr_mask; + + uint8_t fpr[8][16]; + uint8_t xmm[16][16]; + + uint64_t reserved[12]; +} M5_ATTR_PACKED; + +static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave"); + #define FOREACH_IREG() \ do { \ APPLY_IREG(rax, INTREG_RAX); \ @@ -193,24 +226,61 @@ #endif static void -dumpKvm(const struct kvm_fpu &fpu) +dumpFpuSpec(const struct FXSave &xs) { - inform("FPU registers:\n"); + inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip); + inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp); + inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask); +} + +static void +dumpFpuSpec(const struct kvm_fpu &fpu) +{ + inform("\tlast_ip: 0x%x\n", fpu.last_ip); + inform("\tlast_dp: 0x%x\n", fpu.last_dp); +} + +template +static void +dumpFpuCommon(const T &fpu) +{ + const unsigned top((fpu.fsw >> 11) & 0x7); inform("\tfcw: 0x%x\n", fpu.fcw); - inform("\tfsw: 0x%x\n", fpu.fsw); + + inform("\tfsw: 0x%x (top: %i, " + "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n", + fpu.fsw, top, + + (fpu.fsw & CC0Bit) ? "C0" : "", + (fpu.fsw & CC1Bit) ? "C1" : "", + (fpu.fsw & CC2Bit) ? "C2" : "", + (fpu.fsw & CC3Bit) ? "C3" : "", + + (fpu.fsw & IEBit) ? "I" : "", + (fpu.fsw & DEBit) ? "D" : "", + (fpu.fsw & ZEBit) ? "Z" : "", + (fpu.fsw & OEBit) ? "O" : "", + (fpu.fsw & UEBit) ? "U" : "", + (fpu.fsw & PEBit) ? "P" : "", + + (fpu.fsw & StackFaultBit) ? "SF " : "", + (fpu.fsw & ErrSummaryBit) ? "ES " : "", + (fpu.fsw & BusyBit) ? "BUSY " : "" + ); inform("\tftwx: 0x%x\n", fpu.ftwx); inform("\tlast_opcode: 0x%x\n", fpu.last_opcode); - inform("\tlast_ip: 0x%x\n", fpu.last_ip); - inform("\tlast_dp: 0x%x\n", fpu.last_dp); + dumpFpuSpec(fpu); inform("\tmxcsr: 0x%x\n", fpu.mxcsr); inform("\tFP Stack:\n"); for (int i = 0; i < 8; ++i) { - const bool empty(!((fpu.ftwx >> i) & 0x1)); + const unsigned reg_idx((i + top) & 0x7); + const bool empty(!((fpu.ftwx >> reg_idx) & 0x1)); const double value(*(__float80 *)fpu.fpr[i]); char hex[33]; - for (int j = 0; j < 16; ++j) + for (int j = 0; j < 10; ++j) snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]); - inform("\t\t%i: 0x%s (%f)%s\n", i, hex, value, empty ? " (e)" : ""); + inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx, + hex, value, empty ? " (e)" : ""); } inform("\tXMM registers:\n"); for (int i = 0; i < 16; ++i) { @@ -222,6 +292,20 @@ } static void +dumpKvm(const struct kvm_fpu &fpu) +{ + inform("FPU registers:\n"); + dumpFpuCommon(fpu); +} + +static void +dumpKvm(const struct kvm_xsave &xsave) +{ + inform("FPU registers (XSave):\n"); + dumpFpuCommon(*(FXSave *)xsave.region); +} + +static void dumpKvm(const struct kvm_msrs &msrs) { inform("MSRs:\n"); @@ -247,15 +331,6 @@ } static void -dumpKvm(const struct kvm_xsave &xsave) -{ - inform("KVM XSAVE:\n"); - - Trace::dump((Tick)-1, "xsave.region", - xsave.region, sizeof(xsave.region)); -} - -static void dumpKvm(const struct kvm_vcpu_events &events) { inform("vCPU events:\n"); @@ -277,7 +352,8 @@ } X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params) - : BaseKvmCPU(params) + : BaseKvmCPU(params), + useXSave(params->useXSave) { Kvm &kvm(vm.kvm); @@ -293,6 +369,14 @@ haveDebugRegs = kvm.capDebugRegs(); haveXSave = kvm.capXSave(); haveXCRs = kvm.capXCRs(); + + if (!haveXSave) { + warn("KVM: XSAVE not supported by host. MXCSR syncrhonization might be " + "unreliable due to kernel bugs.\n"); + useXSave = false; + } else if (!useXSave) { + warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n"); + } } X86KvmCPU::~X86KvmCPU() @@ -320,13 +404,15 @@ X86KvmCPU::dump() { dumpIntRegs(); - dumpFpuRegs(); + if (useXSave) + dumpXSave(); + else + dumpFpuRegs(); dumpSpecRegs(); dumpDebugRegs(); dumpXCRs(); dumpVCpuEvents(); dumpMSRs(); - dumpXSave(); } void @@ -424,7 +510,10 @@ { updateKvmStateRegs(); updateKvmStateSRegs(); - updateKvmStateFPU(); + if (useXSave) + updateKvmStateXSave(); + else + updateKvmStateFPU(); updateKvmStateMSRs(); DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); @@ -509,10 +598,91 @@ setSpecialRegisters(sregs); } + +template +static void +updateKvmStateFPUCommon(ThreadContext *tc, T &fpu) +{ + static_assert(sizeof(X86ISA::FloatRegBits) == 8, + "Unexpected size of X86ISA::FloatRegBits"); + + fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR); + fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW); + // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read + // with effects. + fpu.fsw = tc->readMiscReg(MISCREG_FSW); + + uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW)); + fpu.ftwx = X86ISA::convX87TagsToXTags(ftw); + + fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP); + + const unsigned top((fpu.fsw >> 11) & 0x7); + for (int i = 0; i < 8; ++i) { + const unsigned reg_idx((i + top) & 0x7); + const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx))); + DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n", + reg_idx, i, value); + *(__float80 *)fpu.fpr[i] = value; + } + + // TODO: We should update the MMX state + + for (int i = 0; i < 16; ++i) { + *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] = + tc->readFloatRegBits(FLOATREG_XMM_LOW(i)); + *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] = + tc->readFloatRegBits(FLOATREG_XMM_HIGH(i)); + } +} + void X86KvmCPU::updateKvmStateFPU() { - warn_once("X86KvmCPU::updateKvmStateFPU not implemented\n"); + struct kvm_fpu fpu; + + // There is some padding in the FP registers, so we'd better zero + // the whole struct. + memset(&fpu, 0, sizeof(fpu)); + + updateKvmStateFPUCommon(tc, fpu); + + if (tc->readMiscRegNoEffect(MISCREG_FISEG)) + warn_once("MISCREG_FISEG is non-zero.\n"); + + fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); + + if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) + warn_once("MISCREG_FOSEG is non-zero.\n"); + + fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); + + setFPUState(fpu); +} + +void +X86KvmCPU::updateKvmStateXSave() +{ + struct kvm_xsave kxsave; + FXSave &xsave(*(FXSave *)kxsave.region); + + // There is some padding and reserved fields in the structure, so + // we'd better zero the whole thing. + memset(&kxsave, 0, sizeof(kxsave)); + + updateKvmStateFPUCommon(tc, xsave); + + if (tc->readMiscRegNoEffect(MISCREG_FISEG)) + warn_once("MISCREG_FISEG is non-zero.\n"); + + xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF); + + if (tc->readMiscRegNoEffect(MISCREG_FOSEG)) + warn_once("MISCREG_FOSEG is non-zero.\n"); + + xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF); + + setXSave(kxsave); } void @@ -546,7 +716,10 @@ updateThreadContextRegs(); updateThreadContextSRegs(); - updateThreadContextFPU(); + if (useXSave) + updateThreadContextXSave(); + else + updateThreadContextFPU(); updateThreadContextMSRs(); // The M5 misc reg caches some values from other @@ -631,10 +804,72 @@ #undef APPLY_DTABLE } +template +static void +updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu) +{ + const unsigned top((fpu.fsw >> 11) & 0x7); + + static_assert(sizeof(X86ISA::FloatRegBits) == 8, + "Unexpected size of X86ISA::FloatRegBits"); + + for (int i = 0; i < 8; ++i) { + const unsigned reg_idx((i + top) & 0x7); + const double value(*(__float80 *)fpu.fpr[i]); + DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n", + reg_idx, i, value); + tc->setFloatReg(FLOATREG_FPR(reg_idx), value); + } + + // TODO: We should update the MMX state + + tc->setMiscRegNoEffect(MISCREG_X87_TOP, top); + tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr); + tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw); + tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw); + + uint64_t ftw(convX87XTagsToTags(fpu.ftwx)); + // TODO: Are these registers really the same? + tc->setMiscRegNoEffect(MISCREG_FTW, ftw); + tc->setMiscRegNoEffect(MISCREG_FTAG, ftw); + + tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode); + + for (int i = 0; i < 16; ++i) { + tc->setFloatRegBits(FLOATREG_XMM_LOW(i), + *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]); + tc->setFloatRegBits(FLOATREG_XMM_HIGH(i), + *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]); + } +} + void X86KvmCPU::updateThreadContextFPU() { - warn_once("X86KvmCPU::updateThreadContextFPU not implemented\n"); + struct kvm_fpu fpu; + getFPUState(fpu); + + updateThreadContextFPUCommon(tc, fpu); + + tc->setMiscRegNoEffect(MISCREG_FISEG, 0); + tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip); + tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); + tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp); +} + +void +X86KvmCPU::updateThreadContextXSave() +{ + struct kvm_xsave kxsave; + FXSave &xsave(*(FXSave *)kxsave.region); + getXSave(kxsave); + + updateThreadContextFPUCommon(tc, xsave); + + tc->setMiscRegNoEffect(MISCREG_FISEG, 0); + tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip); + tc->setMiscRegNoEffect(MISCREG_FOSEG, 0); + tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp); } void