diff -r 01122cdb2656 -r 46477709d58a src/arch/arm/isa.cc --- a/src/arch/arm/isa.cc Mon Nov 08 17:40:18 2010 -0600 +++ b/src/arch/arm/isa.cc Mon Nov 08 17:41:40 2010 -0600 @@ -205,7 +205,10 @@ warn("Not doing anyhting for read to miscreg %s\n", miscRegName[misc_reg]); break; - + case MISCREG_FPSCR_QC: + case MISCREG_FPSCR_EXC: + misc_reg = MISCREG_FPSCR; + break; } return readMiscRegNoEffect(misc_reg); } @@ -307,6 +310,20 @@ (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask); } break; + case MISCREG_FPSCR_QC: + { + const uint32_t fpscrQcMask = 0x08000000; + newVal = miscRegs[MISCREG_FPSCR] | (newVal & fpscrQcMask); + misc_reg = MISCREG_FPSCR; + } + break; + case MISCREG_FPSCR_EXC: + { + const uint32_t fpscrExcMask = 0x0000009F; + newVal = miscRegs[MISCREG_FPSCR] | (newVal & fpscrExcMask); + misc_reg = MISCREG_FPSCR; + } + break; case MISCREG_FPEXC: { const uint32_t fpexcMask = 0x60000000; diff -r 01122cdb2656 -r 46477709d58a src/arch/arm/isa/insts/fp.isa --- a/src/arch/arm/isa/insts/fp.isa Mon Nov 08 17:40:18 2010 -0600 +++ b/src/arch/arm/isa/insts/fp.isa Mon Nov 08 17:41:40 2010 -0600 @@ -208,7 +208,8 @@ vmsrFpscrIop = InstObjParams("vmsr", "VmsrFpscr", "FpRegRegOp", { "code": vmsrFpscrCode, "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeAfter","IsNonSpeculative"]) header_output += FpRegRegOpDeclare.subst(vmsrFpscrIop); decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); diff -r 01122cdb2656 -r 46477709d58a src/arch/arm/isa/insts/neon.isa --- a/src/arch/arm/isa/insts/neon.isa Mon Nov 08 17:40:18 2010 -0600 +++ b/src/arch/arm/isa/insts/neon.isa Mon Nov 08 17:41:40 2010 -0600 @@ -1632,12 +1632,12 @@ vqaddUCode = ''' destElem = srcElem1 + srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (destElem < srcElem1 || destElem < srcElem2) { destElem = (Element)(-1); fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) @@ -1655,7 +1655,7 @@ vqaddSCode = ''' destElem = srcElem1 + srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; bool negDest = (destElem < 0); bool negSrc1 = (srcElem1 < 0); bool negSrc2 = (srcElem2 < 0); @@ -1665,26 +1665,26 @@ destElem -= 1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) vqsubUCode = ''' destElem = srcElem1 - srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (destElem > srcElem1) { destElem = 0; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) vqsubSCode = ''' destElem = srcElem1 - srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; bool negDest = (destElem < 0); bool negSrc1 = (srcElem1 < 0); bool posSrc2 = (srcElem2 >= 0); @@ -1694,7 +1694,7 @@ destElem -= 1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) @@ -1779,7 +1779,7 @@ vqshlUCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; if (shiftAmt >= sizeof(Element) * 8) { @@ -1808,14 +1808,14 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) vqshlSCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; if (shiftAmt >= sizeof(Element) * 8) { @@ -1854,14 +1854,14 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) vqrshlUCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; Element rBit = 0; @@ -1892,14 +1892,14 @@ } } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) vqrshlSCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; Element rBit = 0; @@ -1944,7 +1944,7 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) @@ -2002,7 +2002,7 @@ threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) vqdmlalCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); Element halfNeg = maxNeg / 2; @@ -2022,12 +2022,12 @@ destElem = ~destElem; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) vqdmlslCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); Element halfNeg = maxNeg / 2; @@ -2047,12 +2047,12 @@ destElem = ~destElem; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) vqdmullCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); if (srcElem1 == srcElem2 && srcElem1 == (Element)((Element)1 << @@ -2060,7 +2060,7 @@ destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) @@ -2099,7 +2099,7 @@ threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True) vqdmulhCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> (sizeof(Element) * 8); if (srcElem1 == srcElem2 && @@ -2108,13 +2108,13 @@ destElem = ~srcElem1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) vqrdmulhCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> (sizeof(Element) * 8); @@ -2130,7 +2130,7 @@ } fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrdmulh", "VqrdmulhD", "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) @@ -2138,7 +2138,7 @@ "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) vmaxfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { @@ -2147,13 +2147,13 @@ } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; } - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode) threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode) vminfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { @@ -2162,7 +2162,7 @@ } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; } - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode) threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode) @@ -2178,10 +2178,10 @@ 4, vminfpCode, pairwise=True) vaddfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) @@ -2192,53 +2192,53 @@ 4, vaddfpCode, pairwise=True) vsubfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) vmulfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) vmlafpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); destReg = binaryOp(fpscr, mid, destReg, fpAddS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) vmlsfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); destReg = binaryOp(fpscr, destReg, mid, fpSubS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) vcgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2, vcgtfpCode, toInt = True) @@ -2246,13 +2246,13 @@ 4, vcgtfpCode, toInt = True) vcgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2, vcgefpCode, toInt = True) @@ -2260,13 +2260,13 @@ 4, vcgefpCode, toInt = True) vacgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2, vacgtfpCode, toInt = True) @@ -2274,13 +2274,13 @@ 4, vacgtfpCode, toInt = True) vacgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2, vacgefpCode, toInt = True) @@ -2288,13 +2288,13 @@ 4, vacgefpCode, toInt = True) vceqfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2, vceqfpCode, toInt = True) @@ -2302,29 +2302,29 @@ 4, vceqfpCode, toInt = True) vrecpsCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) vrsqrtsCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) vabdfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, true, true, VfpRoundNearest); destReg = fabs(mid); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) @@ -2441,7 +2441,7 @@ twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) vqshlCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 != 0) { destElem = (Element)1 << (sizeof(Element) * 8 - 1); @@ -2465,13 +2465,13 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) vqshluCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 != 0) { destElem = mask(sizeof(Element) * 8); @@ -2491,13 +2491,13 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) vqshlusCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 < 0) { destElem = 0; @@ -2528,7 +2528,7 @@ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) @@ -2555,7 +2555,7 @@ twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) vqshrnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0 && srcElem1 != -1) fpscr.qc = 1; @@ -2575,12 +2575,12 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) vqshrunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2596,13 +2596,13 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrun", "NVqshrun", "SimdShiftOp", smallUnsignedTypes, vqshrunCode) vqshrunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2623,13 +2623,13 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrun", "NVqshruns", "SimdShiftOp", smallSignedTypes, vqshrunsCode) vqrshrnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0 && srcElem1 != -1) fpscr.qc = 1; @@ -2659,13 +2659,13 @@ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", "SimdShiftOp", smallSignedTypes, vqrshrnCode) vqrshrunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2689,13 +2689,13 @@ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) vqrshrunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2726,7 +2726,7 @@ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", "SimdShiftOp", smallSignedTypes, vqrshrunsCode) @@ -2746,7 +2746,7 @@ twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) vcvt2ufxCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; if (flushToZero(srcElem1)) fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); @@ -2754,7 +2754,7 @@ destReg = vfpFpSToFixed(srcElem1, false, false, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 2, vcvt2ufxCode, toInt = True) @@ -2762,7 +2762,7 @@ 4, vcvt2ufxCode, toInt = True) vcvt2sfxCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; if (flushToZero(srcElem1)) fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); @@ -2770,7 +2770,7 @@ destReg = vfpFpSToFixed(srcElem1, true, false, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 2, vcvt2sfxCode, toInt = True) @@ -2778,13 +2778,13 @@ 4, vcvt2sfxCode, toInt = True) vcvtu2fpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 2, vcvtu2fpCode, fromInt = True) @@ -2792,13 +2792,13 @@ 4, vcvtu2fpCode, fromInt = True) vcvts2fpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 2, vcvts2fpCode, fromInt = True) @@ -2806,7 +2806,7 @@ 4, vcvts2fpCode, fromInt = True) vcvts2hCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; float srcFp1 = bitsToFp(srcElem1, (float)0.0); if (flushToZero(srcFp1)) fpscr.idc = 1; @@ -2817,19 +2817,19 @@ fpscr.ahp, srcFp1); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) vcvth2sCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) : "m" (srcElem1), "m" (destElem)); destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) @@ -2840,11 +2840,11 @@ twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) vrsqrtefpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; if (flushToZero(srcReg1)) fpscr.idc = 1; destReg = fprSqrtEstimate(fpscr, srcReg1); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) @@ -2856,11 +2856,11 @@ twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) vrecpefpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = FpscrExc; if (flushToZero(srcReg1)) fpscr.idc = 1; destReg = fpRecipEstimate(fpscr, srcReg1); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) @@ -2954,7 +2954,7 @@ twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) vqabsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { fpscr.qc = 1; destElem = ~srcElem1; @@ -2963,20 +2963,20 @@ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) vqnegCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { fpscr.qc = 1; destElem = ~srcElem1; } else { destElem = -srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) @@ -3019,13 +3019,13 @@ twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) vcgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 2, vcgtfpCode, toInt = True) @@ -3036,13 +3036,13 @@ twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) vcgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 2, vcgefpCode, toInt = True) @@ -3053,13 +3053,13 @@ twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) vceqfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 2, vceqfpCode, toInt = True) @@ -3070,13 +3070,13 @@ twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) vclefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 2, vclefpCode, toInt = True) @@ -3087,13 +3087,13 @@ twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) vcltfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 2, vcltfpCode, toInt = True) @@ -3203,7 +3203,7 @@ oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) vqmovnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = srcElem1; if ((BigElement)destElem != srcElem1) { fpscr.qc = 1; @@ -3211,24 +3211,24 @@ if (srcElem1 < 0) destElem = ~destElem; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) vqmovunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = srcElem1; if ((BigElement)destElem != srcElem1) { fpscr.qc = 1; destElem = mask(sizeof(Element) * 8); } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovun", "NVqmovun", "SimdMiscOp", smallUnsignedTypes, vqmovunCode) vqmovunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR)FpscrQc; destElem = srcElem1; if (srcElem1 < 0 || ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { @@ -3237,7 +3237,7 @@ if (srcElem1 < 0) destElem = ~destElem; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovun", "NVqmovuns", "SimdMiscOp", smallSignedTypes, vqmovunsCode) diff -r 01122cdb2656 -r 46477709d58a src/arch/arm/isa/operands.isa --- a/src/arch/arm/isa/operands.isa Mon Nov 08 17:40:18 2010 -0600 +++ b/src/arch/arm/isa/operands.isa Mon Nov 08 17:41:40 2010 -0600 @@ -203,6 +203,8 @@ 'Fpsr': ('ControlReg', 'uw', 'MISCREG_FPSR', None, 3), 'Fpsid': ('ControlReg', 'uw', 'MISCREG_FPSID', None, 3), 'Fpscr': ('ControlReg', 'uw', 'MISCREG_FPSCR', None, 3), + 'FpscrQc': ('ControlReg', 'uw', 'MISCREG_FPSCR_QC', None, 3), + 'FpscrExc': ('ControlReg', 'uw', 'MISCREG_FPSCR_EXC', None, 3), 'Cpacr': ('ControlReg', 'uw', 'MISCREG_CPACR', (None, None, 'IsControl'), 3), 'Fpexc': ('ControlReg', 'uw', 'MISCREG_FPEXC', None, 3), 'Sctlr': ('ControlReg', 'uw', 'MISCREG_SCTLR', None, 3), diff -r 01122cdb2656 -r 46477709d58a src/arch/arm/miscregs.hh --- a/src/arch/arm/miscregs.hh Mon Nov 08 17:40:18 2010 -0600 +++ b/src/arch/arm/miscregs.hh Mon Nov 08 17:41:40 2010 -0600 @@ -78,6 +78,8 @@ MISCREG_FPSR, MISCREG_FPSID, MISCREG_FPSCR, + MISCREG_FPSCR_QC, // Cumulative saturation flag + MISCREG_FPSCR_EXC, // Cumulative FP exception flags MISCREG_FPEXC, MISCREG_MVFR0, MISCREG_MVFR1, diff -r 01122cdb2656 -r 46477709d58a src/cpu/o3/commit_impl.hh --- a/src/cpu/o3/commit_impl.hh Mon Nov 08 17:40:18 2010 -0600 +++ b/src/cpu/o3/commit_impl.hh Mon Nov 08 17:41:40 2010 -0600 @@ -917,6 +917,9 @@ cpu->instDone(tid); } + // Updates misc. registers. + head_inst->updateMiscRegs(); + TheISA::advancePC(pc[tid], head_inst->staticInst); int count = 0; diff -r 01122cdb2656 -r 46477709d58a src/cpu/o3/dyn_inst.hh --- a/src/cpu/o3/dyn_inst.hh Mon Nov 08 17:40:18 2010 -0600 +++ b/src/cpu/o3/dyn_inst.hh Mon Nov 08 17:41:40 2010 -0600 @@ -111,6 +111,17 @@ /** Initializes variables. */ void initVars(); + protected: + /** Indexes of the destination misc. registers. They are needed to defer + * the write accesses to the misc. registers until the commit stage, when + * the instruction is out of its speculative state. + */ + int _destMiscRegIdx[MaxInstDestRegs]; + /** Values to be written to the destination misc. registers. */ + MiscReg _destMiscRegVal[MaxInstDestRegs]; + /** Number of destination misc. registers. */ + int _numDestMiscRegs; + public: /** Reads a miscellaneous register. */ MiscReg readMiscRegNoEffect(int misc_reg) @@ -130,7 +141,12 @@ void setMiscRegNoEffect(int misc_reg, const MiscReg &val) { this->instResult.integer = val; - return this->cpu->setMiscRegNoEffect(misc_reg, val, this->threadNumber); + /** Writes to misc. registers are recorded and deferred until the + * commit stage, when updateMiscRegs() is called. + */ + _destMiscRegIdx[_numDestMiscRegs] = misc_reg; + _destMiscRegVal[_numDestMiscRegs] = val; + _numDestMiscRegs++; } /** Sets a misc. register, including any side-effects the write @@ -138,8 +154,12 @@ */ void setMiscReg(int misc_reg, const MiscReg &val) { - return this->cpu->setMiscReg(misc_reg, val, - this->threadNumber); + /** Writes to misc. registers are recorded and deferred until the + * commit stage, when updateMiscRegs() is called. + */ + _destMiscRegIdx[_numDestMiscRegs] = misc_reg; + _destMiscRegVal[_numDestMiscRegs] = val; + _numDestMiscRegs++; } /** Reads a miscellaneous register. */ @@ -164,9 +184,14 @@ void setMiscRegOperandNoEffect(const StaticInst * si, int idx, const MiscReg &val) { this->instResult.integer = val; - return this->cpu->setMiscRegNoEffect( - si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, - val, this->threadNumber); + + /** Writes to misc. registers are recorded and deferred until the + * commit stage, when updateMiscRegs() is called. + */ + _destMiscRegIdx[_numDestMiscRegs] = si->destRegIdx(idx) - + TheISA::Ctrl_Base_DepTag; + _destMiscRegVal[_numDestMiscRegs] = val; + _numDestMiscRegs++; } /** Sets a misc. register, including any side-effects the write @@ -175,9 +200,30 @@ void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val) { - return this->cpu->setMiscReg( - si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, - val, this->threadNumber); + /** Writes to misc. registers are recorded and deferred until the + * commit stage, when updateMiscRegs() is called. + */ + _destMiscRegIdx[_numDestMiscRegs] = si->destRegIdx(idx) - + TheISA::Ctrl_Base_DepTag; + _destMiscRegVal[_numDestMiscRegs] = val; + _numDestMiscRegs++; + } + + /** Called at the commit stage to update the misc. registers. */ + void updateMiscRegs() + { + // @todo: Pretty convoluted way to avoid squashing from happening when + // using the TC during an instruction's execution (specifically for + // instructions that have side-effects that use the TC). Fix this. + // See cpu/o3/dyn_inst_impl.hh. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + for (int i = 0; i < _numDestMiscRegs; i++) + this->cpu->setMiscReg( + _destMiscRegIdx[i], _destMiscRegVal[i], this->threadNumber); + + this->thread->inSyscall = in_syscall; } void forwardOldRegs() diff -r 01122cdb2656 -r 46477709d58a src/cpu/o3/dyn_inst_impl.hh --- a/src/cpu/o3/dyn_inst_impl.hh Mon Nov 08 17:40:18 2010 -0600 +++ b/src/cpu/o3/dyn_inst_impl.hh Mon Nov 08 17:41:40 2010 -0600 @@ -71,6 +71,8 @@ this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i); this->_readySrcRegIdx[i] = 0; } + + _numDestMiscRegs = 0; } template