diff -r 7a963b8b0b30 -r 68578047307f src/arch/x86/isa/decoder/two_byte_opcodes.isa --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa Thu Mar 17 15:59:49 2011 -0700 +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa Tue Nov 03 16:55:16 2009 -0500 @@ -680,7 +680,7 @@ } // repne (0xF2) 0x8: decode OPCODE_OP_BOTTOM3 { - 0x4: WarnUnimpl::haddps_Vo_Wo(); + 0x4: HADDPS(Vo,Wo); 0x5: WarnUnimpl::hsubps_Vo_Wo(); default: UD2(); } diff -r 7a963b8b0b30 -r 68578047307f src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py Thu Mar 17 15:59:49 2011 -0700 +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py Tue Nov 03 16:55:16 2009 -0500 @@ -36,7 +36,90 @@ # Authors: Gabe Black microcode = ''' -# HADDPS +def macroop HADDPS_XMM_XMM { + movfp ufp1, xmmh + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmmh + maddf ufp3, ufp1, ufp2, size=4, ext=1 + mslli ufp3, ufp3, 32, size=8, ext=0 + + movfp ufp1, xmml + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmml + maddf ufp3, ufp1, ufp2, size=4, ext=1 + + movfp ufp1, xmmhm + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmmhm + maddf ufp4, ufp1, ufp2, size=4, ext=1 + mslli ufp4, ufp4, 32, size=8, ext=0 + + movfp ufp1, xmmlm + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmmlm + maddf ufp4, ufp1, ufp2, size=4, ext=1 + + movfp xmml, ufp3 + movfp xmmh, ufp4 +}; + +def macroop HADDPS_XMM_M { + movfp ufp1, xmmh + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmmh + maddf ufp3, ufp1, ufp2, size=4, ext=1 + mslli ufp3, ufp3, 32, size=8, ext=0 + + movfp ufp1, xmml + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmml + maddf ufp3, ufp1, ufp2, size=4, ext=1 + + movfp xmml, ufp3 + + ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8 + movfp ufp2, ufp1 + msrli ufp1, ufp1, 32, size=8, ext=0 + maddf ufp3, ufp1, ufp2, size=4, ext=1 + mslli ufp3, ufp3, 32, size=8, ext=0 + + ldfp ufp1, seg, sib, disp, dataSize=8 + movfp ufp2, ufp1 + msrli ufp1, ufp1, 32, size=8, ext=0 + maddf ufp3, ufp1, ufp2, size=4, ext=1 + + movfp xmmh, ufp3 +}; + +def macroop HADDPS_XMM_P { + rdip t7 + + movfp ufp1, xmmh + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmmh + maddf ufp3, ufp1, ufp2, size=4, ext=1 + mslli ufp3, ufp3, 32, size=8, ext=0 + + movfp ufp1, xmml + msrli ufp1, ufp1, 32, size=8, ext=0 + movfp ufp2, xmml + maddf ufp3, ufp1, ufp2, size=4, ext=1 + + movfp xmml, ufp3 + + ldfp ufp1, seg, riprel, "DISPLACEMENT+8", dataSize=8 + movfp ufp2, ufp1 + msrli ufp1, ufp1, 32, size=8, ext=0 + maddf ufp3, ufp1, ufp2, size=4, ext=1 + mslli ufp3, ufp3, 32, size=8, ext=0 + + ldfp ufp1, seg, riprel, disp, dataSize=8 + movfp ufp2, ufp1 + msrli ufp1, ufp1, 32, size=8, ext=0 + maddf ufp3, ufp1, ufp2, size=4, ext=1 + + movfp xmmh, ufp3 +}; def macroop HADDPD_XMM_XMM { maddf ufp1, xmmh , xmml, size=8, ext=Scalar