diff -r 878f2f30b12d build_opts/X86_MESI_Three_Level
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/X86_MESI_Three_Level	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,3 @@
+TARGET_ISA = 'x86'
+CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU'
+PROTOCOL = 'MESI_Three_Level'
diff -r 878f2f30b12d build_opts/X86_MESI_Txn_Three_Level
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build_opts/X86_MESI_Txn_Three_Level	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,3 @@
+TARGET_ISA = 'x86'
+CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU'
+PROTOCOL = 'MESI_Txn_Three_Level'
diff -r 878f2f30b12d configs/common/Options.py
--- a/configs/common/Options.py	Wed Jul 02 13:19:13 2014 -0400
+++ b/configs/common/Options.py	Fri Jul 04 22:00:13 2014 -0500
@@ -104,6 +104,10 @@
     parser.add_option("--num-dirs", type="int", default=1)
     parser.add_option("--num-l2caches", type="int", default=1)
     parser.add_option("--num-l3caches", type="int", default=1)
+    parser.add_option("--l0d_size", type="string", default="4kB")
+    parser.add_option("--l0i_size", type="string", default="4kB")
+    parser.add_option("--l0d_assoc", type="int", default=1)
+    parser.add_option("--l0i_assoc", type="int", default=1)
     parser.add_option("--l1d_size", type="string", default="64kB")
     parser.add_option("--l1i_size", type="string", default="32kB")
     parser.add_option("--l2_size", type="string", default="2MB")
diff -r 878f2f30b12d configs/ruby/MESI_Txn_Three_Level.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/configs/ruby/MESI_Txn_Three_Level.py	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,209 @@
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# Copyright (c) 2009 Advanced Micro Devices, Inc.
+# Copyright (c) 2013 Mark D. Hill and David A. Wood
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Brad Beckmann
+#          Nilay Vaish
+
+import math
+import m5
+from m5.objects import *
+from m5.defines import buildEnv
+from Ruby import create_topology
+
+#
+# Note: the L1 Cache latency is only used by the sequencer on fast path hits
+#
+class L0Cache(RubyCache):
+    latency = 1
+
+class L1Cache(RubyCache):
+    latency = 5
+
+#
+# Note: the L2 Cache latency is not currently used
+#
+class L2Cache(RubyCache):
+    latency = 15
+
+def define_options(parser):
+    parser.add_option("--num-clusters", type="int", default=1,
+            help="number of clusters in a design in which there are shared\
+            caches private to clusters")
+    return
+
+def create_system(options, system, dma_ports, ruby_system):
+
+    if buildEnv['PROTOCOL'] != 'MESI_Txn_Three_Level':
+        fatal("This script requires the MESI_Txn_Three_Level protocol to be built.")
+
+    cpu_sequencers = []
+
+    #
+    # The ruby network creation expects the list of nodes in the system to be
+    # consistent with the NetDest list.  Therefore the l1 controller nodes must be
+    # listed before the directory nodes and directory nodes before dma nodes, etc.
+    #
+    l0_cntrl_nodes = []
+    l1_cntrl_nodes = []
+    l2_cntrl_nodes = []
+    dir_cntrl_nodes = []
+    dma_cntrl_nodes = []
+
+    assert (options.num_cpus % options.num_clusters == 0)
+    num_cpus_per_cluster = options.num_cpus / options.num_clusters
+
+    assert (options.num_l2caches % options.num_clusters == 0)
+    num_l2caches_per_cluster = options.num_l2caches / options.num_clusters
+
+    l2_bits = int(math.log(num_l2caches_per_cluster, 2))
+    block_size_bits = int(math.log(options.cacheline_size, 2))
+    l2_index_start = block_size_bits + l2_bits
+
+    #
+    # Must create the individual controllers before the network to ensure the
+    # controller constructors are called before the network constructor
+    #
+    for i in xrange(options.num_clusters):
+        for j in xrange(num_cpus_per_cluster):
+            #
+            # First create the Ruby objects associated with this cpu
+            #
+            l0i_cache = L0Cache(size = options.l0i_size, 
+                assoc = options.l0i_assoc, is_icache = True,
+                start_index_bit = block_size_bits, replacement_policy="LRU")
+
+            l0d_cache = L0Cache(size = options.l0d_size, 
+                assoc = options.l0d_assoc, is_icache = False,
+                start_index_bit = block_size_bits, replacement_policy="LRU")
+
+            l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
+                          Icache = l0i_cache, Dcache = l0d_cache,
+                          send_evictions = (options.cpu_type == "detailed"),
+                          ruby_system = ruby_system)
+
+            cpu_seq = RubySequencer(version = i, icache = l0i_cache,
+                        dcache = l0d_cache, ruby_system = ruby_system)
+
+            l0_cntrl.sequencer = cpu_seq
+
+            l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc,
+                            start_index_bit = block_size_bits, is_icache = False)
+
+            l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j,
+                          cache = l1_cache, l2_select_num_bits = l2_bits,
+                          cluster_id = i, ruby_system = ruby_system)
+
+            exec("ruby_system.l0_cntrl%d = l0_cntrl" % (
+                        i*num_cpus_per_cluster+j))
+            exec("ruby_system.l1_cntrl%d = l1_cntrl" % (
+                        i*num_cpus_per_cluster+j))
+
+            #
+            # Add controllers and sequencers to the appropriate lists
+            #
+            cpu_sequencers.append(cpu_seq)
+            l0_cntrl_nodes.append(l0_cntrl)
+            l1_cntrl_nodes.append(l1_cntrl)
+            l0_cntrl.peer = l1_cntrl
+
+        for j in xrange(num_l2caches_per_cluster):
+            l2_cache = L2Cache(size = options.l2_size,
+                               assoc = options.l2_assoc,
+                               start_index_bit = l2_index_start)
+
+            l2_cntrl = L2Cache_Controller(
+                        version = i * num_l2caches_per_cluster + j,
+                        L2cache = l2_cache, cluster_id = i,
+                        transitions_per_cycle=options.ports,
+                        ruby_system = ruby_system)
+
+            exec("ruby_system.l2_cntrl%d = l2_cntrl" % (
+                        i * num_l2caches_per_cluster + j))
+            l2_cntrl_nodes.append(l2_cntrl)
+
+    phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges))
+    assert(phys_mem_size % options.num_dirs == 0)
+    mem_module_size = phys_mem_size / options.num_dirs
+
+    # Run each of the ruby memory controllers at a ratio of the frequency of
+    # the ruby system
+    # clk_divider value is a fix to pass regression.
+    ruby_system.memctrl_clk_domain = DerivedClockDomain(
+                                          clk_domain=ruby_system.clk_domain,
+                                          clk_divider=3)
+
+    for i in xrange(options.num_dirs):
+        #
+        # Create the Ruby objects associated with the directory controller
+        #
+
+        mem_cntrl = RubyMemoryControl(
+                              clk_domain = ruby_system.memctrl_clk_domain,
+                              version = i,
+                              ruby_system = ruby_system)
+
+        dir_size = MemorySize('0B')
+        dir_size.value = mem_module_size
+
+        dir_cntrl = Directory_Controller(version = i,
+                                         directory = \
+                                         RubyDirectoryMemory(version = i,
+                                                             size = dir_size,
+                                                             use_map =
+                                                           options.use_map),
+                                         memBuffer = mem_cntrl,
+                                         transitions_per_cycle = options.ports,
+                                         ruby_system = ruby_system)
+
+        exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
+        dir_cntrl_nodes.append(dir_cntrl)
+
+    for i, dma_port in enumerate(dma_ports):
+        #
+        # Create the Ruby objects associated with the dma controller
+        #
+        dma_seq = DMASequencer(version = i,
+                               ruby_system = ruby_system)
+
+        dma_cntrl = DMA_Controller(version = i,
+                                   dma_sequencer = dma_seq,
+                                   transitions_per_cycle = options.ports,
+                                   ruby_system = ruby_system)
+
+        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
+        exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i)
+        dma_cntrl_nodes.append(dma_cntrl)
+
+    all_cntrls = l0_cntrl_nodes + \
+                 l1_cntrl_nodes + \
+                 l2_cntrl_nodes + \
+                 dir_cntrl_nodes + \
+                 dma_cntrl_nodes
+
+    topology = create_topology(all_cntrls, options)
+    return (cpu_sequencers, dir_cntrl_nodes, topology)
diff -r 878f2f30b12d src/arch/x86/faults.hh
--- a/src/arch/x86/faults.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/faults.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -124,6 +124,18 @@
                 StaticInstPtr inst = StaticInst::nullStaticInstPtr);
     };
 
+    // Base class of handling x86 transactional abort
+    class X86TxnAbort : public X86FaultBase
+    {
+      public:
+        X86TxnAbort()
+            : X86FaultBase("", "", 0, 0)
+        {}
+
+        void invoke(ThreadContext * tc,
+                StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    };
+
     // Base class for x86 aborts which seem to be catastrophic failures.
     class X86Abort : public X86FaultBase
     {
diff -r 878f2f30b12d src/arch/x86/faults.cc
--- a/src/arch/x86/faults.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/faults.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -116,6 +116,18 @@
         pc.uEnd();
     }
 
+    void X86TxnAbort::invoke(ThreadContext * tc, StaticInstPtr inst)
+    {
+        PCState pcState = tc->pcState();
+        
+        using namespace X86ISAInst::RomLabels;
+        MicroPC entry = extern_label_txnAbortInterrupt;
+
+        pcState.upc(romMicroPC(entry));
+        pcState.nupc(romMicroPC(entry) + 1);
+        tc->pcState(pcState);
+    }
+
     void X86Abort::invoke(ThreadContext * tc, StaticInstPtr inst)
     {
         panic("Abort exception!");
diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_ele_opcodes.isa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/arch/x86/isa/decoder/locked_ele_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,183 @@
+// Copyright (c) 2009 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode opcodes with the lock prefix. Opcodes which shouldn't have that
+// prefix should effectively decode to UD2.
+//
+
+// All the memory references in these instructions happen through modrm bytes.
+// We therefore only need to make sure the modrm byte encodes a memory
+// reference to make sure these are the memory forms of these instructions.
+0x1: decode MODRM_MOD {
+    format Inst {
+        0x3: UD2();
+        default: decode OPCODE_NUM {
+            0x1: decode OPCODE_OP_TOP5 {
+            //     0x00: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x01: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x02: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x03: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x04: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x05: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x06: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x10: decode OPCODE_OP_BOTTOM3 {
+            //         0x0: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mb,Ib);
+            //             0x1: OR_LOCKED(Mb,Ib);
+            //             0x2: ADC_LOCKED(Mb,Ib);
+            //             0x3: SBB_LOCKED(Mb,Ib);
+            //             0x4: AND_LOCKED(Mb,Ib);
+            //             0x5: SUB_LOCKED(Mb,Ib);
+            //             0x6: XOR_LOCKED(Mb,Ib);
+            //         }
+            //         0x1: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mv,Iz);
+            //             0x1: OR_LOCKED(Mv,Iz);
+            //             0x2: ADC_LOCKED(Mv,Iz);
+            //             0x3: SBB_LOCKED(Mv,Iz);
+            //             0x4: AND_LOCKED(Mv,Iz);
+            //             0x5: SUB_LOCKED(Mv,Iz);
+            //             0x6: XOR_LOCKED(Mv,Iz);
+            //         }
+            //         0x2: decode MODE_SUBMODE {
+            //             0x0: UD2();
+            //             default: decode MODRM_REG {
+            //                 0x0: ADD_LOCKED(Mb,Ib);
+            //                 0x1: OR_LOCKED(Mb,Ib);
+            //                 0x2: ADC_LOCKED(Mb,Ib);
+            //                 0x3: SBB_LOCKED(Mb,Ib);
+            //                 0x4: AND_LOCKED(Mb,Ib);
+            //                 0x5: SUB_LOCKED(Mb,Ib);
+            //                 0x6: XOR_LOCKED(Mb,Ib);
+            //             }
+            //         }
+            //         //0x3: group1_Ev_Ib();
+            //         0x3: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mv,Ib);
+            //             0x1: OR_LOCKED(Mv,Ib);
+            //             0x2: ADC_LOCKED(Mv,Ib);
+            //             0x3: SBB_LOCKED(Mv,Ib);
+            //             0x4: AND_LOCKED(Mv,Ib);
+            //             0x5: SUB_LOCKED(Mv,Ib);
+            //             0x6: XOR_LOCKED(Mv,Ib);
+            //         }
+            //         0x6: XCHG_LOCKED(Mb,Gb);
+            //         0x7: XCHG_LOCKED(Mv,Gv);
+            //     }
+            //     0x1E: decode OPCODE_OP_BOTTOM3 {
+            //         //0x6: group3_Eb();
+            //         0x6: decode MODRM_REG {
+            //             0x2: NOT_LOCKED(Mb);
+            //             0x3: NEG_LOCKED(Mb);
+            //         }
+            //         //0x7: group3_Ev();
+            //         0x7: decode MODRM_REG {
+            //             0x2: NOT_LOCKED(Mv);
+            //             0x3: NEG_LOCKED(Mv);
+            //         }
+            //     }
+            //     0x1F: decode OPCODE_OP_BOTTOM3 {
+            //         0x6: decode MODRM_REG {
+            //             0x0: INC_LOCKED(Mb);
+            //             0x1: DEC_LOCKED(Mb);
+            //             default: UD2();
+            //         }
+            //         //0x7: group5();
+            //         0x7: decode MODRM_REG {
+            //             0x0: INC_LOCKED(Mv);
+            //             0x1: DEC_LOCKED(Mv);
+            //         }
+            //     }
+                default: M5InternalError::error(
+                    {{"Unimpemented Elesion end!"}});
+            }
+            0x2: decode OPCODE_PREFIXA {
+                0x0F: decode OPCODE_OP_TOP5 {
+                    // 0x04: decode OPCODE_OP_BOTTOM3 {
+                    //     0x0: WarnUnimpl::mov_Rd_CR8D();
+                    //     0x2: WarnUnimpl::mov_CR8D_Rd();
+                    // }
+                    // 0x15: decode OPCODE_OP_BOTTOM3 {
+                    //     0x3: BTS_LOCKED(Mv,Gv);
+                    // }
+                    // 0x16: decode OPCODE_OP_BOTTOM3 {
+                    //     0x0: CMPXCHG_LOCKED(Mb,Gb);
+                    //     0x1: CMPXCHG_LOCKED(Mv,Gv);
+                    //     0x3: BTR_LOCKED(Mv,Gv);
+                    // }
+                    // 0x17: decode OPCODE_OP_BOTTOM3 {
+                    //     0x2: decode MODRM_REG {
+                    //         0x5: BTS_LOCKED(Mv,Ib);
+                    //         0x6: BTR_LOCKED(Mv,Ib);
+                    //         0x7: BTC_LOCKED(Mv,Ib);
+                    //     }
+                    //     0x3: BTC_LOCKED(Mv,Gv);
+                    // }
+                    0x18: decode OPCODE_OP_BOTTOM3 {
+                        0x0: XADD_LOCK_ELE(Mb,Gb);
+                        0x1: XADD_LOCK_ELE(Mv,Gv);
+                        //0x7: group9();
+                        // 0x7: decode MODRM_REG {
+                        //     //Also CMPXCHG16B
+                        //     0x1: CMPXCHG8B_LOCKED(Mdp);
+                        // }
+                        default: M5InternalError::error(
+                            {{"Unimplemented Elesion end!"}});
+                    }
+                    default: M5InternalError::error(
+                        {{"Unimplemented Elesion end!"}});
+                }
+                default: M5InternalError::error(
+                    {{"Unexpected first opcode byte in two byte opcode!"}});
+            }
+        }
+    }
+}
diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_els_opcodes.isa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/arch/x86/isa/decoder/locked_els_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,183 @@
+// Copyright (c) 2009 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode opcodes with the lock prefix. Opcodes which shouldn't have that
+// prefix should effectively decode to UD2.
+//
+
+// All the memory references in these instructions happen through modrm bytes.
+// We therefore only need to make sure the modrm byte encodes a memory
+// reference to make sure these are the memory forms of these instructions.
+0x1: decode MODRM_MOD {
+    format Inst {
+        0x3: UD2();
+        default: decode OPCODE_NUM {
+            0x1: decode OPCODE_OP_TOP5 {
+            //     0x00: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x01: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x02: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x03: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x04: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x05: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x06: decode OPCODE_OP_BOTTOM3 {
+            //         default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3,
+            //                                 [Mb,Gb], [Mv,Gv]);
+            //     }
+            //     0x10: decode OPCODE_OP_BOTTOM3 {
+            //         0x0: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mb,Ib);    
+            //             0x1: OR_LOCKED(Mb,Ib);
+            //             0x2: ADC_LOCKED(Mb,Ib);
+            //             0x3: SBB_LOCKED(Mb,Ib);
+            //             0x4: AND_LOCKED(Mb,Ib);
+            //             0x5: SUB_LOCKED(Mb,Ib);
+            //             0x6: XOR_LOCKED(Mb,Ib);
+            //         }
+            //         0x1: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mv,Iz);
+            //             0x1: OR_LOCKED(Mv,Iz);
+            //             0x2: ADC_LOCKED(Mv,Iz);
+            //             0x3: SBB_LOCKED(Mv,Iz);
+            //             0x4: AND_LOCKED(Mv,Iz);
+            //             0x5: SUB_LOCKED(Mv,Iz);
+            //             0x6: XOR_LOCKED(Mv,Iz);
+            //         }
+            //         0x2: decode MODE_SUBMODE {
+            //             0x0: UD2();
+            //             default: decode MODRM_REG {
+            //                 0x0: ADD_LOCKED(Mb,Ib);
+            //                 0x1: OR_LOCKED(Mb,Ib);
+            //                 0x2: ADC_LOCKED(Mb,Ib);
+            //                 0x3: SBB_LOCKED(Mb,Ib);
+            //                 0x4: AND_LOCKED(Mb,Ib);
+            //                 0x5: SUB_LOCKED(Mb,Ib);
+            //                 0x6: XOR_LOCKED(Mb,Ib);
+            //             }
+            //         }
+            //         //0x3: group1_Ev_Ib();
+            //         0x3: decode MODRM_REG {
+            //             0x0: ADD_LOCKED(Mv,Ib);
+            //             0x1: OR_LOCKED(Mv,Ib);
+            //             0x2: ADC_LOCKED(Mv,Ib);
+            //             0x3: SBB_LOCKED(Mv,Ib);
+            //             0x4: AND_LOCKED(Mv,Ib);
+            //             0x5: SUB_LOCKED(Mv,Ib);
+            //             0x6: XOR_LOCKED(Mv,Ib);
+            //         }
+            //         0x6: XCHG_LOCKED(Mb,Gb);
+            //         0x7: XCHG_LOCKED(Mv,Gv);
+            //     }
+            //     0x1E: decode OPCODE_OP_BOTTOM3 {
+            //         //0x6: group3_Eb();
+            //         0x6: decode MODRM_REG {
+            //             0x2: NOT_LOCKED(Mb);
+            //             0x3: NEG_LOCKED(Mb);
+            //         }
+            //         //0x7: group3_Ev();
+            //         0x7: decode MODRM_REG {
+            //             0x2: NOT_LOCKED(Mv);
+            //             0x3: NEG_LOCKED(Mv);
+            //         }
+            //     }
+            //     0x1F: decode OPCODE_OP_BOTTOM3 {
+            //         0x6: decode MODRM_REG {
+            //             0x0: INC_LOCKED(Mb);
+            //             0x1: DEC_LOCKED(Mb);
+            //             default: UD2();
+            //         }
+            //         //0x7: group5();
+            //         0x7: decode MODRM_REG {
+            //             0x0: INC_LOCKED(Mv);
+            //             0x1: DEC_LOCKED(Mv);
+            //         }
+            //     }
+                default: M5InternalError::error(
+                    {{"Unimplemented Elesion start!"}});
+            }
+            0x2: decode OPCODE_PREFIXA {
+                0x0F: decode OPCODE_OP_TOP5 {
+                    // 0x04: decode OPCODE_OP_BOTTOM3 {
+                    //     0x0: WarnUnimpl::mov_Rd_CR8D();
+                    //     0x2: WarnUnimpl::mov_CR8D_Rd();
+                    // }
+                    // 0x15: decode OPCODE_OP_BOTTOM3 {
+                    //     0x3: BTS_LOCKED(Mv,Gv);
+                    // }
+                    // 0x16: decode OPCODE_OP_BOTTOM3 {
+                    //     0x0: CMPXCHG_LOCKED(Mb,Gb);
+                    //     0x1: CMPXCHG_LOCKED(Mv,Gv);
+                    //     0x3: BTR_LOCKED(Mv,Gv);
+                    // }
+                    // 0x17: decode OPCODE_OP_BOTTOM3 {
+                    //     0x2: decode MODRM_REG {
+                    //         0x5: BTS_LOCKED(Mv,Ib);
+                    //         0x6: BTR_LOCKED(Mv,Ib);
+                    //         0x7: BTC_LOCKED(Mv,Ib);
+                    //     }
+                    //     0x3: BTC_LOCKED(Mv,Gv);
+                    // }
+                    0x18: decode OPCODE_OP_BOTTOM3 {
+                        0x0: XADD_LOCK_ELB(Mb,Gb);
+                        0x1: XADD_LOCK_ELB(Mv,Gv);
+                        //0x7: group9();
+                        // 0x7: decode MODRM_REG {
+                        //     //Also CMPXCHG16B
+                        //     0x1: CMPXCHG8B_LOCKED(Mdp);
+                        // }
+                        default: M5InternalError::error(
+                            {{"Unimplemented Elesion start!"}});
+                    }
+                    default: M5InternalError::error(
+                        {{"Unimplemented Elesion start!"}});
+                }
+                default: M5InternalError::error(
+                    {{"Unexpected first opcode byte in two byte opcode!"}});
+            }
+        }
+    }
+}
diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_nel_opcodes.isa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/arch/x86/isa/decoder/locked_nel_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,177 @@
+// Copyright (c) 2009 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode opcodes with the lock prefix. Opcodes which shouldn't have that
+// prefix should effectively decode to UD2.
+//
+
+// All the memory references in these instructions happen through modrm bytes.
+// We therefore only need to make sure the modrm byte encodes a memory
+// reference to make sure these are the memory forms of these instructions.
+0x0: decode MODRM_MOD {
+    format Inst {
+        0x3: UD2();
+        default: decode OPCODE_NUM {
+            0x1: decode OPCODE_OP_TOP5 {
+                0x00: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x01: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3,
+                                           [Mb,Gb], [Mv,Gv]);
+                }
+                0x02: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x03: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x04: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x05: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x06: decode OPCODE_OP_BOTTOM3 {
+                    default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3,
+                                            [Mb,Gb], [Mv,Gv]);
+                }
+                0x10: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode MODRM_REG {
+                        0x0: ADD_LOCKED(Mb,Ib);
+                        0x1: OR_LOCKED(Mb,Ib);
+                        0x2: ADC_LOCKED(Mb,Ib);
+                        0x3: SBB_LOCKED(Mb,Ib);
+                        0x4: AND_LOCKED(Mb,Ib);
+                        0x5: SUB_LOCKED(Mb,Ib);
+                        0x6: XOR_LOCKED(Mb,Ib);
+                    }
+                    0x1: decode MODRM_REG {
+                        0x0: ADD_LOCKED(Mv,Iz);
+                        0x1: OR_LOCKED(Mv,Iz);
+                        0x2: ADC_LOCKED(Mv,Iz);
+                        0x3: SBB_LOCKED(Mv,Iz);
+                        0x4: AND_LOCKED(Mv,Iz);
+                        0x5: SUB_LOCKED(Mv,Iz);
+                        0x6: XOR_LOCKED(Mv,Iz);
+                    }
+                    0x2: decode MODE_SUBMODE {
+                        0x0: UD2();
+                        default: decode MODRM_REG {
+                            0x0: ADD_LOCKED(Mb,Ib);
+                            0x1: OR_LOCKED(Mb,Ib);
+                            0x2: ADC_LOCKED(Mb,Ib);
+                            0x3: SBB_LOCKED(Mb,Ib);
+                            0x4: AND_LOCKED(Mb,Ib);
+                            0x5: SUB_LOCKED(Mb,Ib);
+                            0x6: XOR_LOCKED(Mb,Ib);
+                        }
+                    }
+                    //0x3: group1_Ev_Ib();
+                    0x3: decode MODRM_REG {
+                        0x0: ADD_LOCKED(Mv,Ib);
+                        0x1: OR_LOCKED(Mv,Ib);
+                        0x2: ADC_LOCKED(Mv,Ib);
+                        0x3: SBB_LOCKED(Mv,Ib);
+                        0x4: AND_LOCKED(Mv,Ib);
+                        0x5: SUB_LOCKED(Mv,Ib);
+                        0x6: XOR_LOCKED(Mv,Ib);
+                    }
+                    0x6: XCHG_LOCKED(Mb,Gb);
+                    0x7: XCHG_LOCKED(Mv,Gv);
+                }
+                0x1E: decode OPCODE_OP_BOTTOM3 {
+                    //0x6: group3_Eb();
+                    0x6: decode MODRM_REG {
+                        0x2: NOT_LOCKED(Mb);
+                        0x3: NEG_LOCKED(Mb);
+                    }
+                    //0x7: group3_Ev();
+                    0x7: decode MODRM_REG {
+                        0x2: NOT_LOCKED(Mv);
+                        0x3: NEG_LOCKED(Mv);
+                    }
+                }
+                0x1F: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode MODRM_REG {
+                        0x0: INC_LOCKED(Mb);
+                        0x1: DEC_LOCKED(Mb);
+                        default: UD2();
+                    }
+                    //0x7: group5();
+                    0x7: decode MODRM_REG {
+                        0x0: INC_LOCKED(Mv);
+                        0x1: DEC_LOCKED(Mv);
+                    }
+                }
+            }
+            0x2: decode OPCODE_PREFIXA {
+                0x0F: decode OPCODE_OP_TOP5 {
+                    0x04: decode OPCODE_OP_BOTTOM3 {
+                        0x0: WarnUnimpl::mov_Rd_CR8D();
+                        0x2: WarnUnimpl::mov_CR8D_Rd();
+                    }
+                    0x15: decode OPCODE_OP_BOTTOM3 {
+                        0x3: BTS_LOCKED(Mv,Gv);
+                    }
+                    0x16: decode OPCODE_OP_BOTTOM3 {
+                        0x0: CMPXCHG_LOCKED(Mb,Gb);
+                        0x1: CMPXCHG_LOCKED(Mv,Gv);
+                        0x3: BTR_LOCKED(Mv,Gv);
+                    }
+                    0x17: decode OPCODE_OP_BOTTOM3 {
+                        0x2: decode MODRM_REG {
+                            0x5: BTS_LOCKED(Mv,Ib);
+                            0x6: BTR_LOCKED(Mv,Ib);
+                            0x7: BTC_LOCKED(Mv,Ib);
+                        }
+                        0x3: BTC_LOCKED(Mv,Gv);
+                    }
+                    0x18: decode OPCODE_OP_BOTTOM3 {
+                        0x0: XADD_LOCKED(Mb,Gb);
+                        0x1: XADD_LOCKED(Mv,Gv);
+                        //0x7: group9();
+                        0x7: decode MODRM_REG {
+                            //Also CMPXCHG16B
+                            0x1: CMPXCHG8B_LOCKED(Mdp);
+                        }
+                    }
+                }
+                default: M5InternalError::error(
+                    {{"Unexpected first opcode byte in two byte opcode!"}});
+            }
+        }
+    }
+}
diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_opcodes.isa
--- a/src/arch/x86/isa/decoder/locked_opcodes.isa	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/decoder/locked_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -35,143 +35,17 @@
 // All the memory references in these instructions happen through modrm bytes.
 // We therefore only need to make sure the modrm byte encodes a memory
 // reference to make sure these are the memory forms of these instructions.
-0x1: decode MODRM_MOD {
-    format Inst {
-        0x3: UD2();
-        default: decode OPCODE_NUM {
-            0x1: decode OPCODE_OP_TOP5 {
-                0x00: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x01: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3,
-                                           [Mb,Gb], [Mv,Gv]);
-                }
-                0x02: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x03: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x04: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x05: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x06: decode OPCODE_OP_BOTTOM3 {
-                    default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3,
-                                            [Mb,Gb], [Mv,Gv]);
-                }
-                0x10: decode OPCODE_OP_BOTTOM3 {
-                    0x0: decode MODRM_REG {
-                        0x0: ADD_LOCKED(Mb,Ib);
-                        0x1: OR_LOCKED(Mb,Ib);
-                        0x2: ADC_LOCKED(Mb,Ib);
-                        0x3: SBB_LOCKED(Mb,Ib);
-                        0x4: AND_LOCKED(Mb,Ib);
-                        0x5: SUB_LOCKED(Mb,Ib);
-                        0x6: XOR_LOCKED(Mb,Ib);
-                    }
-                    0x1: decode MODRM_REG {
-                        0x0: ADD_LOCKED(Mv,Iz);
-                        0x1: OR_LOCKED(Mv,Iz);
-                        0x2: ADC_LOCKED(Mv,Iz);
-                        0x3: SBB_LOCKED(Mv,Iz);
-                        0x4: AND_LOCKED(Mv,Iz);
-                        0x5: SUB_LOCKED(Mv,Iz);
-                        0x6: XOR_LOCKED(Mv,Iz);
-                    }
-                    0x2: decode MODE_SUBMODE {
-                        0x0: UD2();
-                        default: decode MODRM_REG {
-                            0x0: ADD_LOCKED(Mb,Ib);
-                            0x1: OR_LOCKED(Mb,Ib);
-                            0x2: ADC_LOCKED(Mb,Ib);
-                            0x3: SBB_LOCKED(Mb,Ib);
-                            0x4: AND_LOCKED(Mb,Ib);
-                            0x5: SUB_LOCKED(Mb,Ib);
-                            0x6: XOR_LOCKED(Mb,Ib);
-                        }
-                    }
-                    //0x3: group1_Ev_Ib();
-                    0x3: decode MODRM_REG {
-                        0x0: ADD_LOCKED(Mv,Ib);
-                        0x1: OR_LOCKED(Mv,Ib);
-                        0x2: ADC_LOCKED(Mv,Ib);
-                        0x3: SBB_LOCKED(Mv,Ib);
-                        0x4: AND_LOCKED(Mv,Ib);
-                        0x5: SUB_LOCKED(Mv,Ib);
-                        0x6: XOR_LOCKED(Mv,Ib);
-                    }
-                    0x6: XCHG_LOCKED(Mb,Gb);
-                    0x7: XCHG_LOCKED(Mv,Gv);
-                }
-                0x1E: decode OPCODE_OP_BOTTOM3 {
-                    //0x6: group3_Eb();
-                    0x6: decode MODRM_REG {
-                        0x2: NOT_LOCKED(Mb);
-                        0x3: NEG_LOCKED(Mb);
-                    }
-                    //0x7: group3_Ev();
-                    0x7: decode MODRM_REG {
-                        0x2: NOT_LOCKED(Mv);
-                        0x3: NEG_LOCKED(Mv);
-                    }
-                }
-                0x1F: decode OPCODE_OP_BOTTOM3 {
-                    0x6: decode MODRM_REG {
-                        0x0: INC_LOCKED(Mb);
-                        0x1: DEC_LOCKED(Mb);
-                        default: UD2();
-                    }
-                    //0x7: group5();
-                    0x7: decode MODRM_REG {
-                        0x0: INC_LOCKED(Mv);
-                        0x1: DEC_LOCKED(Mv);
-                    }
-                }
-            }
-            0x2: decode OPCODE_PREFIXA {
-                0x0F: decode OPCODE_OP_TOP5 {
-                    0x04: decode OPCODE_OP_BOTTOM3 {
-                        0x0: WarnUnimpl::mov_Rd_CR8D();
-                        0x2: WarnUnimpl::mov_CR8D_Rd();
-                    }
-                    0x15: decode OPCODE_OP_BOTTOM3 {
-                        0x3: BTS_LOCKED(Mv,Gv);
-                    }
-                    0x16: decode OPCODE_OP_BOTTOM3 {
-                        0x0: CMPXCHG_LOCKED(Mb,Gb);
-                        0x1: CMPXCHG_LOCKED(Mv,Gv);
-                        0x3: BTR_LOCKED(Mv,Gv);
-                    }
-                    0x17: decode OPCODE_OP_BOTTOM3 {
-                        0x2: decode MODRM_REG {
-                            0x5: BTS_LOCKED(Mv,Ib);
-                            0x6: BTR_LOCKED(Mv,Ib);
-                            0x7: BTC_LOCKED(Mv,Ib);
-                        }
-                        0x3: BTC_LOCKED(Mv,Gv);
-                    }
-                    0x18: decode OPCODE_OP_BOTTOM3 {
-                        0x0: XADD_LOCKED(Mb,Gb);
-                        0x1: XADD_LOCKED(Mv,Gv);
-                        //0x7: group9();
-                        0x7: decode MODRM_REG {
-                            //Also CMPXCHG16B
-                            0x1: CMPXCHG8B_LOCKED(Mdp);
-                        }
-                    }
-                }
-                default: M5InternalError::error(
-                    {{"Unexpected first opcode byte in two byte opcode!"}});
-            }
-        }
+0x1: decode LEGACY_REP {
+    0x0: decode LEGACY_REPNE {
+        // non elided lock
+        ##include "locked_nel_opcodes.isa"
+        // ellided lock - start transaction
+        ##include "locked_els_opcodes.isa"
+        //0x1: M5InternalError::error({{"TxnStart!"}});
+        default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
     }
-}
+    // ellided lock - end transaction
+    ##include "locked_ele_opcodes.isa"
+    //0x1: M5InternalError::error({{"TxnEnd!"}});
+    default: M5InternalError::error({{"REP should be 0 or 1!"}});
+}
\ No newline at end of file
diff -r 878f2f30b12d src/arch/x86/isa/decoder/one_byte_opcodes.isa
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -266,12 +266,38 @@
             }
             0x4: TEST(Eb,Gb);
             0x5: TEST(Ev,Gv);
-            0x6: XCHG(Eb,Gb);
-            0x7: XCHG(Ev,Gv);
+            0x6: decode LEGACY_REP { 
+                0x0: decode LEGACY_REPNE { 
+                    0x0: XCHG(Eb,Gb);
+                    0x1: XCHG_ELB(Eb,Gb);
+                    default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
+                }
+                0x1: XCHG_ELE(Eb,Gb);
+                default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
+            0x7: decode LEGACY_REP { 
+                0x0: decode LEGACY_REPNE { 
+                    0x0: XCHG(Ev,Gv);
+                    0x1: XCHG_ELB(Ev,Gv);
+                    default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
+                }
+                0x1: XCHG_ELE(Ev,Gv);
+                default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
         }
         0x11: decode OPCODE_OP_BOTTOM3 {
-            0x0: MOV(Eb,Gb);
-            0x1: MOV(Ev,Gv);
+            0x0: decode LEGACY_REP {
+                0x0: MOV(Eb,Gb);
+                default: M5InternalError::error({{"XRelease on MOV!"}});
+                //0x1: MOV_ELE(Eb,Gb);
+                //default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
+            0x1: decode LEGACY_REP {
+                0x0: MOV(Ev,Gv);
+                default: M5InternalError::error({{"XRelease on MOV!"}});
+                //0x1: MOV_ELE(Ev,Gv);
+                //default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
             0x2: MOV(Gb,Eb);
             0x3: MOV(Gv,Ev);
             0x4: decode MODRM_REG {
@@ -297,7 +323,15 @@
         }
         0x12: decode OPCODE_OP_BOTTOM3 {
             0x0: NopInst::NOP(); //XXX repe makes this a "pause"
-            default: XCHG(Bv,rAv);
+            default: decode LEGACY_REP {
+                0x0: decode LEGACY_REPNE { 
+                    0x0: XCHG(Bv,rAv);
+                    0x1: XCHG_ELB(Bv,rAv);
+                    default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
+                }
+                0x1: XCHG_ELE(Bv,rAv);
+                default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
         }
         0x13: decode OPCODE_OP_BOTTOM3 {
             0x0: CDQE(rAv);
@@ -335,8 +369,16 @@
             0x6: StringTestInst::SCAS(Yb);
             0x7: StringTestInst::SCAS(Yv);
         }
-        0x16: MOV(Bb,Ib);
-        0x17: MOV(Bv,Iv);
+        0x16: decode LEGACY_REP {
+                0x0: MOV(Bb,Ib);
+                0x1: MOV_ELE(Bb,Ib);
+                default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
+        0x17: decode LEGACY_REP {
+                0x0: MOV(Bv,Iv);
+                0x1: MOV_ELE(Bv,Iv);
+                default: M5InternalError::error({{"REP should be 0 or 1!"}});
+            }
         0x18: decode OPCODE_OP_BOTTOM3 {
             //0x0: group2_Eb_Ib();
             0x0: decode MODRM_REG {
@@ -372,12 +414,22 @@
             }
             //0x6: group12_Eb_Ib();
             0x6: decode MODRM_REG {
-                0x0: MOV(Eb,Ib);
+                0x0: decode LEGACY_REP {
+                    0x0: MOV(Eb,Ib);
+                    0x1: MOV_ELE(Eb,Ib);
+                    default: M5InternalError::error({{"REP should be 0 or 1!"}});
+                }
+                0x7: XABORT(Ib);
                 default: UD2();
             }
             //0x7: group12_Ev_Iz();
             0x7: decode MODRM_REG {
-                0x0: MOV(Ev,Iz);
+                0x0: decode LEGACY_REP {
+                    0x0: MOV(Ev,Iz);
+                    0x1: MOV_ELE(Ev,Iz);
+                    default: M5InternalError::error({{"REP should be 0 or 1!"}});
+                }
+                0x7: XBEGIN(Jz);
                 default: UD2();
             }
         }
diff -r 878f2f30b12d src/arch/x86/isa/decoder/two_byte_opcodes.isa
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -81,6 +81,7 @@
                         0x3: decode MODRM_RM {
                             0x0: xgetbv();
                             0x1: xsetbv();
+                            0x5: Inst::XEND();
                         }
                         default: decode MODE_SUBMODE {
                             0x0: Inst::LGDT(M);
@@ -784,8 +785,24 @@
             }
             format Inst {
                 0x16: decode OPCODE_OP_BOTTOM3 {
-                    0x0: CMPXCHG(Eb,Gb);
-                    0x1: CMPXCHG(Ev,Gv);
+                    0x0: decode LEGACY_REP {
+                        0x0: decode LEGACY_REPNE { 
+                            0x0: CMPXCHG(Eb,Gb);
+                            0x1: CMPXCHG_ELB(Eb,Gb);
+                            default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
+                        }
+                        0x1: CMPXCHG_ELE(Eb,Gb);
+                        default: M5InternalError::error({{"REP should be 0 or 1!"}});
+                    }
+                    0x1: decode LEGACY_REP {
+                        0x0: decode LEGACY_REPNE { 
+                            0x0: CMPXCHG(Ev,Gv);
+                            0x1: CMPXCHG_ELB(Ev,Gv);
+                            default: M5InternalError::error({{"REPNE should be 0 or 1!"}});
+                        }
+                        0x1: CMPXCHG_ELE(Ev,Gv);
+                        default: M5InternalError::error({{"REP should be 0 or 1!"}});
+                    }
                     0x2: WarnUnimpl::lss_Gz_Mp();
                     0x3: BTR(Ev,Gv);
                     0x4: WarnUnimpl::lfs_Gz_Mp();
diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/data_transfer/move.py
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py	Fri Jul 04 22:00:13 2014 -0500
@@ -370,8 +370,244 @@
     stfp xmml, seg, riprel, disp, dataSize=dsz
 };
 
+def macroop XBEGIN_I {
+    # Make the default data size of pops 64 bits in 64 bit mode
+    #.adjust_env oszIn64Override
+
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+
+    addi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8
+
+    mfence
+
+    wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8
+
+    wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8
+
+    rdip t3, dataSize=8
+    limm t2, imm, dataSize=8
+    
+    wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8
+    wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8
+
+    xor t1, t1, t1, dataSize=8
+    wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8
+
+    xbegin
+end:
+    fault "NoFault"
+};
+
+def macroop XABORT_I {
+    # Make the default data size of pops 64 bits in 64 bit mode
+    #.adjust_env oszIn64Override
+
+    mfence
+    xabort
+
+    rdval rax, "InstRegIndex(MISCREG_TRAX)", dataSize=8
+    rdval rcx, "InstRegIndex(MISCREG_TRCX)", dataSize=8
+    rdval rdx, "InstRegIndex(MISCREG_TRDX)", dataSize=8
+    rdval rbx, "InstRegIndex(MISCREG_TRBX)", dataSize=8
+    rdval rsp, "InstRegIndex(MISCREG_TRSP)", dataSize=8
+    rdval rbp, "InstRegIndex(MISCREG_TRBP)", dataSize=8
+    rdval rsi, "InstRegIndex(MISCREG_TRSI)", dataSize=8
+    rdval rdi, "InstRegIndex(MISCREG_TRDI)", dataSize=8
+
+    rdval  r8, "InstRegIndex(MISCREG_TR8)", dataSize=8
+    rdval  r9, "InstRegIndex(MISCREG_TR9)", dataSize=8
+    rdval  r10, "InstRegIndex(MISCREG_TR10)", dataSize=8
+    rdval  r11, "InstRegIndex(MISCREG_TR11)", dataSize=8
+    rdval  r12, "InstRegIndex(MISCREG_TR12)", dataSize=8
+    rdval  r13, "InstRegIndex(MISCREG_TR13)", dataSize=8
+    rdval  r14, "InstRegIndex(MISCREG_TR14)", dataSize=8
+    rdval  r15, "InstRegIndex(MISCREG_TR15)", dataSize=8
+
+    rdval t3, "InstRegIndex(MISCREG_TRIP)", dataSize=8
+    rdval t2, "InstRegIndex(MISCREG_TIMM)", dataSize=8
+
+    #check if hle - do not write rax if it is
+    rdval t1, "InstRegIndex(MISCREG_THLE)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("skip_abort_code"), flags=(nCEZF,)
+    
+    limm rax, imm, dataSize=8
+    br label("end_abort_code")
+
+skip_abort_code:
+    # set HLE abort register
+    limm t1, 1, dataSize=8
+    wrval "InstRegIndex(MISCREG_THABRT)", t1, dataSize=8
+    
+end_abort_code:
+    wrip t3, t2, dataSize=8
+    limm t0, 0, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t0, dataSize=8
+};
+
+def rom
+{
+    extern txnAbortInterrupt:
+    
+    mfence
+    xabort
+
+    rdval rax, "InstRegIndex(MISCREG_TRAX)", dataSize=8
+    rdval rcx, "InstRegIndex(MISCREG_TRCX)", dataSize=8
+    rdval rdx, "InstRegIndex(MISCREG_TRDX)", dataSize=8
+    rdval rbx, "InstRegIndex(MISCREG_TRBX)", dataSize=8
+    rdval rsp, "InstRegIndex(MISCREG_TRSP)", dataSize=8
+    rdval rbp, "InstRegIndex(MISCREG_TRBP)", dataSize=8
+    rdval rsi, "InstRegIndex(MISCREG_TRSI)", dataSize=8
+    rdval rdi, "InstRegIndex(MISCREG_TRDI)", dataSize=8
+
+    rdval  r8, "InstRegIndex(MISCREG_TR8)", dataSize=8
+    rdval  r9, "InstRegIndex(MISCREG_TR9)", dataSize=8
+    rdval  r10, "InstRegIndex(MISCREG_TR10)", dataSize=8
+    rdval  r11, "InstRegIndex(MISCREG_TR11)", dataSize=8
+    rdval  r12, "InstRegIndex(MISCREG_TR12)", dataSize=8
+    rdval  r13, "InstRegIndex(MISCREG_TR13)", dataSize=8
+    rdval  r14, "InstRegIndex(MISCREG_TR14)", dataSize=8
+    rdval  r15, "InstRegIndex(MISCREG_TR15)", dataSize=8
+    
+    rdval t3, "InstRegIndex(MISCREG_TRIP)", dataSize=8
+    rdval t2, "InstRegIndex(MISCREG_TIMM)", dataSize=8
+
+    #check if xbegin by hle - do not write rax if it is
+    rdval t1, "InstRegIndex(MISCREG_THLE)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br rom_local_label("skip_abort_code"), flags=(nCEZF,)
+    
+    rdval rax, "InstRegIndex(MISCREG_TRET)", dataSize=8
+    br rom_local_label("end_abort_code")
+
+skip_abort_code:
+    # set HLE abort register
+    limm t1, 1, dataSize=8
+    wrval "InstRegIndex(MISCREG_THABRT)", t1, dataSize=8
+    
+end_abort_code:
+    wrip t3, t2, dataSize=8
+    limm t0, 0, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t0, dataSize=8
+    eret
+};
+
+def macroop XEND {
+    # Make the default data size of pops 64 bits in 64 bit mode
+    #.adjust_env oszIn64Override
+
+    mfence
+
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+
+    subi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8
+
+    and t0, t1, t1, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+
+    xend
+
+end:
+    fault "NoFault"
+};
+
 '''
+
+eleCode = '''
+def macroop %(instr)s_ELE_%(suffix)s 
+{
+    %(rdip)s
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("commit"), flags=(nCEZF,)
+
+    # lock was not elided
+    %(rellock)s
+    br label("end")
+
+commit:
+    %(eldlock)s
+    mfence
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    subi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+    xend
+
+end:
+    fault "NoFault"
+};
+'''
+
+##################################
+# XRelease implementations
+##################################
+
+microcode += eleCode % {
+    "instr": "MOV",
+    "suffix": "M_I",
+    "rdip": "", 
+    "eldlock": '''
+        limm t1, imm
+        stele t1, seg, sib, disp
+        ''',
+    "rellock": '''
+        limm t1, imm
+        st t1, seg, sib, disp
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "MOV",
+    "suffix": "P_I",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        limm t1, imm
+        stele t1, seg, riprel, disp
+        ''',
+    "rellock": '''
+        limm t1, imm
+        st t1, seg, riprel, disp
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "MOV",
+    "suffix": "R_I",
+    "rdip": "", 
+    "eldlock": '''
+        limm reg, imm
+        ''',
+    "rellock": '''
+        limm reg, imm
+        '''
+}
+
+##################################
+# End XRelease implementations
+##################################
+
 #let {{
 #    class MOVD(Inst):
 #       "GenFault ${new UnimpInstFault}"
-#}};
+#}};
\ No newline at end of file
diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py	Fri Jul 04 22:00:13 2014 -0500
@@ -105,3 +105,320 @@
     mov reg, reg, t1
 };
 '''
+elbCode = '''
+def macroop %(instr)s_ELB_%(suffix)s 
+{
+    %(rdip)s
+    mfence
+
+    # check if call after abort
+    rdval t1, "InstRegIndex(MISCREG_THABRT)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("acqlck"), flags=(nCEZF,)
+
+    # start a transaction
+    rdval t4, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t4, t4, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+
+    mfence
+
+    wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8
+
+    wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8
+
+    rdip t3, dataSize=8
+    limm t2, -5, dataSize=8
+    
+    wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8
+    wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8
+
+    xor t1, t1, t1, dataSize=8
+    addi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8
+
+    xbegin
+
+end:
+    addi t4, t4, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t4, dataSize=8
+    %(eldlock)s
+    br label("end2")
+
+# if call after abort, get lock
+acqlck:
+    # increment unelided xacquire
+    rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8
+    addi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8
+    %(getlock)s
+
+end2:
+    fault "NoFault"
+};
+'''
+
+##################################
+# XAcquire implementations
+##################################
+
+microcode += elbCode % {
+    "instr": "XCHG",
+    "suffix": "R_M",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        stelo t1, seg, sib, disp
+        stelb reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        stul reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "XCHG",
+    "suffix": "R_P",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        stelo t1, seg, riprel, disp
+        stelb reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        stul reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "XCHG",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        stelo t1, seg, sib, disp
+        stelb reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        stul reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "XCHG",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        stelo t1, seg, riprel, disp
+        stelb reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        stul reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "XCHG",
+    "suffix": "R_R",
+    "rdip": "", 
+    "eldlock": '''
+        xor reg, reg, regm
+        xor regm, regm, reg
+        xor reg, reg, regm
+        ''',
+    "getlock": '''
+        xor reg, reg, regm
+        xor regm, regm, reg
+        xor reg, reg, regm
+        '''
+}
+
+##################################
+# End XAcquire implementations
+##################################
+
+eleCode = '''
+def macroop %(instr)s_ELE_%(suffix)s 
+{
+    %(rdip)s
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("commit"), flags=(nCEZF,)
+
+    # lock was not elided
+    %(rellock)s
+    
+    #decrement unelided xacquire
+    rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8
+    subi t1, t1, 0x1, flags=(EZF,), dataSize=8
+    br label("skip"), flags=(nCEZF,)
+
+    #set abort to 0 if unelided xacquire is 0
+    xor t2, t2, t2, dataSize=8
+    wrval "InstRegIndex(MISCREG_THABRT)", t2, dataSize=8
+
+skip:
+    wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8
+    br label("end")
+
+commit:
+    %(eldlock)s
+    mfence
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    subi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+    xend
+
+end:
+    fault "NoFault"
+};
+'''
+
+##################################
+# XRelease implementations
+##################################
+
+microcode += eleCode % {
+    "instr": "XCHG",
+    "suffix": "R_M",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        stele reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        stul reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "XCHG",
+    "suffix": "R_P",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        stele reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        stul reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "XCHG",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        stele reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        stul reg, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "XCHG",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        stele reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        stul reg, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "XCHG",
+    "suffix": "R_R",
+    "rdip": "", 
+    "eldlock": '''
+        xor reg, reg, regm
+        xor regm, regm, reg
+        xor reg, reg, regm
+        ''',
+    "rellock": '''
+        xor reg, reg, regm
+        xor regm, regm, reg
+        xor reg, reg, regm
+        '''
+}
+
+##################################
+# End XRelease implementations
+##################################
\ No newline at end of file
diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/semaphores.py
--- a/src/arch/x86/isa/insts/general_purpose/semaphores.py	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py	Fri Jul 04 22:00:13 2014 -0500
@@ -165,6 +165,332 @@
                               "l": "l", "ul": "ul",
                               "suffix": "LOCKED_P"}
 
+elbCode = '''
+def macroop %(instr)s_ELB_%(suffix)s 
+{
+    %(rdip)s
+    mfence
+
+    # check if call after abort
+    rdval t1, "InstRegIndex(MISCREG_THABRT)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("acqlck"), flags=(nCEZF,)
+
+    # start a transaction
+    rdval t4, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t4, t4, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+
+    mfence
+
+    wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8
+    wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8
+
+    wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8
+    wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8
+
+    rdip t3, dataSize=8
+    limm t2, -5, dataSize=8
+    
+    wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8
+    wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8
+
+    xor t1, t1, t1, dataSize=8
+    addi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8
+
+    xbegin
+
+end:
+    addi t4, t4, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t4, dataSize=8
+    %(eldlock)s
+    br label("end2")
+
+# if call after abort, get lock
+acqlck:
+    # increment unelided xacquire
+    rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8
+    addi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8
+    %(getlock)s
+
+end2:
+    fault "NoFault"
+};
+'''
+
+##################################
+# XAcquire implementations
+##################################
+
+microcode += elbCode % {
+    "instr": "XADD_LOCK",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stelo t1, seg, sib, disp
+        stelb t2, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stul t2, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "XADD_LOCK",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stelo t1, seg, riprel, disp
+        stelb t2, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "getlock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stul t2, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "CMPXCHG",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        ld t1, seg, sib, disp
+        stelo t1, seg, sib, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        stelb t1, seg, sib, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        ''',
+    "getlock": '''
+        ldst t1, seg, sib, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        st t1, seg, sib, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "CMPXCHG",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        ld t1, seg, riprel, disp
+        stelo t1, seg, riprel, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        stelb t1, seg, riprel, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        ''',
+    "getlock": '''
+        ldst t1, seg, riprel, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        st t1, seg, riprel, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        '''
+}
+
+microcode += elbCode % {
+    "instr": "CMPXCHG",
+    "suffix": "R_R",
+    "rdip": "", 
+    "eldlock": '''
+        sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF)
+        mov reg, reg, regm, flags=(CZF,)
+        mov rax, rax, reg, flags=(nCZF,)
+        ''',
+    "getlock": '''
+        sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF)
+        mov reg, reg, regm, flags=(CZF,)
+        mov rax, rax, reg, flags=(nCZF,)
+        '''
+}
+
+##################################
+# End XAcquire implementations
+##################################
+
+eleCode = '''
+def macroop %(instr)s_ELE_%(suffix)s 
+{
+    %(rdip)s
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("commit"), flags=(nCEZF,)
+
+    # lock was not elided
+    %(rellock)s
+    
+    #decrement unelided xacquire
+    rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8
+    subi t1, t1, 0x1, flags=(EZF,), dataSize=8
+    br label("skip"), flags=(nCEZF,)
+
+    #set abort to 0 if unelided xacquire is 0
+    xor t2, t2, t2, dataSize=8
+    wrval "InstRegIndex(MISCREG_THABRT)", t2, dataSize=8
+
+skip:
+    wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8
+    br label("end")
+
+commit:
+    %(eldlock)s
+    mfence
+    rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8
+    subi t1, t1, 0x1, dataSize=8
+    wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8
+    addi t1, t1, 0x0, flags=(EZF,), dataSize=8
+    br label("end"), flags=(nCEZF,)
+    xend
+
+end:
+    fault "NoFault"
+};
+'''
+
+##################################
+# XRelease implementations
+##################################
+
+microcode += eleCode % {
+    "instr": "XADD_LOCK",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, sib, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stele t2, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, sib, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stul t2, seg, sib, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "XADD_LOCK",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        mfence
+        ld t1, seg, riprel, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stele t2, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        ''',
+    "rellock": '''
+        mfence
+        ldstl t1, seg, riprel, disp
+        add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
+        stul t2, seg, riprel, disp
+        mfence
+        mov reg, reg, t1
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "CMPXCHG",
+    "suffix": "M_R",
+    "rdip": "", 
+    "eldlock": '''
+        ld t1, seg, sib, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        stele t1, seg, sib, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        ''',
+    "rellock": '''
+        ldst t1, seg, sib, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        st t1, seg, sib, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "CMPXCHG",
+    "suffix": "P_R",
+    "rdip": "rdip t7", 
+    "eldlock": '''
+        ld t1, seg, riprel, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        stele t1, seg, riprel, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        ''',
+    "rellock": '''
+        ldst t1, seg, riprel, disp
+        sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
+        mov t1, t1, reg, flags=(CZF,)
+        st t1, seg, riprel, disp
+        mov rax, rax, t1, flags=(nCZF,)
+        '''
+}
+
+microcode += eleCode % {
+    "instr": "CMPXCHG",
+    "suffix": "R_R",
+    "rdip": "", 
+    "eldlock": '''
+        sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF)
+        mov reg, reg, regm, flags=(CZF,)
+        mov rax, rax, reg, flags=(nCZF,)
+        ''',
+    "rellock": '''
+        sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF)
+        mov reg, reg, regm, flags=(CZF,)
+        mov rax, rax, reg, flags=(nCZF,)
+        '''
+}
+
+##################################
+# End XRelease implementations
+##################################
+
 #let {{
 #    class XCHG(Inst):
 #       "GenFault ${new UnimpInstFault}"
diff -r 878f2f30b12d src/arch/x86/isa/microops/ldstop.isa
--- a/src/arch/x86/isa/microops/ldstop.isa	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/microops/ldstop.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -273,7 +273,8 @@
 let {{
     class LdStOp(X86Microop):
         def __init__(self, data, segment, addr, disp,
-                dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec):
+                dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec,
+                el_beg, el_end, el_orig):
             self.data = data
             [self.scale, self.index, self.base] = addr
             self.disp = disp
@@ -289,6 +290,12 @@
                 self.instFlags += " | (1ULL << StaticInst::IsDataPrefetch)"
             if nonSpec:
                 self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)"
+            if el_beg:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionBegin)"
+            if el_end:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionEnd)"
+            if el_orig:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionOrig)"
             self.memFlags += " | (machInst.legacy.addr ? " + \
                              "(AddrSizeFlagBit << FlagShift) : 0)"
 
@@ -309,7 +316,8 @@
 
     class BigLdStOp(X86Microop):
         def __init__(self, data, segment, addr, disp,
-                dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec):
+                dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec,
+                el_beg=False, el_end=False, el_orig=False):
             self.data = data
             [self.scale, self.index, self.base] = addr
             self.disp = disp
@@ -325,6 +333,12 @@
                 self.instFlags += " | (1ULL << StaticInst::IsDataPrefetch)"
             if nonSpec:
                 self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)"
+            if el_beg:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionBegin)"
+            if el_end:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionEnd)"
+            if el_orig:
+                self.instFlags += " | (1ULL << StaticInst::IsElisionOrig)"
             self.memFlags += " | (machInst.legacy.addr ? " + \
                              "(AddrSizeFlagBit << FlagShift) : 0)"
 
@@ -396,7 +410,7 @@
                     atCPL0=False, prefetch=False, nonSpec=False):
                 super(LoadOp, self).__init__(data, segment, addr,
                         disp, dataSize, addressSize, mem_flags,
-                        atCPL0, prefetch, nonSpec)
+                        atCPL0, prefetch, nonSpec, False, False, False)
                 self.className = Name
                 self.mnemonic = name
 
@@ -427,7 +441,8 @@
         }
     ''', big = False)
 
-    def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0"):
+    def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0",
+        el_beg=False, el_end=False, el_orig=False):
         global header_output
         global decoder_output
         global exec_output
@@ -453,7 +468,7 @@
                     atCPL0=False, nonSpec=False):
                 super(StoreOp, self).__init__(data, segment, addr, disp,
                         dataSize, addressSize, mem_flags, atCPL0, False,
-                        nonSpec)
+                        nonSpec, el_beg, el_end, el_orig)
                 self.className = Name
                 self.mnemonic = name
 
@@ -463,6 +478,13 @@
     defineMicroStoreOp('Stul', 'Mem = pick(Data, 2, dataSize);',
             mem_flags="Request::LOCKED")
 
+    defineMicroStoreOp('StElB', 'Mem = pick(Data, 2, dataSize);',
+            mem_flags="Request::XELIDE", el_beg=True)
+    defineMicroStoreOp('StElE', 'Mem = pick(Data, 2, dataSize);',
+            mem_flags="Request::XELIDE", el_end=True)
+    defineMicroStoreOp('StElO', 'Mem = pick(Data, 2, dataSize);',
+            mem_flags="Request::XELIDE", el_orig=True)
+
     defineMicroStoreOp('Stfp', code='Mem = FpData_uqw;')
 
     defineMicroStoreOp('Stfp87', code='''
@@ -495,7 +517,8 @@
         def __init__(self, data, segment, addr, disp = 0,
                 dataSize="env.dataSize", addressSize="env.addressSize"):
             super(LeaOp, self).__init__(data, segment, addr, disp,
-                    dataSize, addressSize, "0", False, False, False)
+                    dataSize, addressSize, "0", False, False, False,
+                    False, False, False)
             self.className = "Lea"
             self.mnemonic = "lea"
 
@@ -515,7 +538,7 @@
                 addressSize="env.addressSize"):
             super(TiaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment,
                     addr, disp, dataSize, addressSize, "0", False, False,
-                    False)
+                    False, False, False, False)
             self.className = "Tia"
             self.mnemonic = "tia"
 
@@ -527,7 +550,7 @@
                 addressSize="env.addressSize", atCPL0=False):
             super(CdaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment,
                     addr, disp, dataSize, addressSize, "Request::NO_ACCESS",
-                    atCPL0, False, False)
+                    atCPL0, False, False, False, False, False)
             self.className = "Cda"
             self.mnemonic = "cda"
 
diff -r 878f2f30b12d src/arch/x86/isa/microops/microops.isa
--- a/src/arch/x86/isa/microops/microops.isa	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/isa/microops/microops.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -59,5 +59,8 @@
 //Miscellaneous microop definitions
 ##include "specop.isa"
 
+//Transaction microop definitions
+##include "txnop.isa"
+
 //Microops for printing out debug messages through M5
 ##include "debug.isa"
diff -r 878f2f30b12d src/arch/x86/isa/microops/txnop.isa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/arch/x86/isa/microops/txnop.isa	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,203 @@
+// Copyright (c) 2007-2008 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Copyright (c) 2008 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+//////////////////////////////////////////////////////////////////////////
+//
+// Txn Microop templates
+//
+//////////////////////////////////////////////////////////////////////////
+
+
+// Txn templates
+
+def template MicroTxnExecute {{
+    Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
+          Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Addr EA;
+        
+        %(evdec)s;
+        %(vardec)s;
+        %(code_exe)s;
+
+        DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA);
+        fault = readMemAtomic(xc, traceData, EA, Mem, dataSize, memFlags);
+        
+        return NoFault;
+    }
+}};
+
+def template MicroTxnInitiateAcc {{
+    Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
+            Trace::InstRecord * traceData) const
+    {
+        Fault fault = NoFault;
+        Addr EA;
+        
+        %(evdec)s;
+        %(vardec)s;
+
+        DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA);
+        fault = readMemAtomic(xc, traceData, EA, Mem, dataSize, memFlags);
+        
+        return NoFault;
+    }
+}};
+
+def template MicroTxnCompleteAcc {{
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
+            CPU_EXEC_CONTEXT * xc,
+            Trace::InstRecord * traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(vardec)s;
+
+        Mem = getMem(pkt, dataSize, traceData);
+
+        %(code_cmp)s;
+        return NoFault;
+    }
+}};
+
+def template MicroTxnOpDeclare {{
+    class %(class_name)s : public X86ISA::X86MicroopBase
+    {
+      protected:
+        const Request::FlagsType memFlags;
+        
+      public:
+        %(class_name)s(ExtMachInst _machInst,
+                const char * instMnem,
+                uint64_t setFlags,
+                Request::FlagsType _memFlags);
+
+        %(BasicExecDeclare)s
+
+        %(InitiateAccDeclare)s
+
+        %(CompleteAccDeclare)s
+    };
+}};
+
+def template MicroTxnOpConstructor {{
+    %(class_name)s::%(class_name)s(
+            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
+            Request::FlagsType _memFlags) :
+        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, MemReadOp),
+                memFlags(_memFlags)
+    {
+        _numSrcRegs = 0;
+        _numDestRegs = 0;
+        _numFPDestRegs = 0;
+        _numIntDestRegs = 0;
+        _numCCDestRegs = 0;
+        flags[IsMemRef] = true;
+        flags[IsLoad] = true;
+        _destRegIdx[_numDestRegs++] = (InstRegIndex(MISCREG_TRET)).idx + Misc_Reg_Base;
+        flags[IsInteger] = true;
+    }
+}};
+
+let {{
+
+    # Make these empty strings so that concatenating onto
+    # them will always work.
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    def defineMicroTxnOp(mnemonic, mem_flags="0"):
+        global header_output
+        global decoder_output
+        global exec_output
+        global microopClasses
+        Name = mnemonic
+        name = mnemonic.lower()
+
+        # Build up the all register version of this micro op
+        iops = [InstObjParams(name, Name, 'X86ISA::X86MicroopBase', 
+            {"evdec" : 'EA = 0',
+	    "vardec" : '''
+            uint8_t dataSize = 8;
+            uint64_t Mem = 0;
+            ''',
+            "code_exe": '',
+            "code_cmp": '''
+            if(fault == NoFault)
+            {
+                xc->setMiscRegOperand(this, 0, Mem);
+                if (traceData) { traceData->setData(Mem); };
+            }
+            '''}
+            )]
+        
+        for iop in iops:
+            header_output += MicroTxnOpDeclare.subst(iop)
+            decoder_output += MicroTxnOpConstructor.subst(iop)
+            exec_output += MicroTxnExecute.subst(iop)
+            exec_output += MicroTxnInitiateAcc.subst(iop)
+            exec_output += MicroTxnCompleteAcc.subst(iop)
+
+        class TxnOp(X86Microop):
+            def __init__(self):
+                self.className = Name
+                self.mnemonic = name
+                self.memFlags = mem_flags
+
+            def getAllocator(self, microFlags):
+                allocator = '''new %(class_name)s(machInst, macrocodeBlock,
+                        %(flags)s, %(memFlags)s)''' % {
+                    "class_name" : self.className,
+                    "mnemonic" : self.mnemonic,
+                    "flags" : self.microFlagsText(microFlags),
+                    "memFlags" : self.memFlags}
+                return allocator
+
+        microopClasses[name] = TxnOp
+
+    defineMicroTxnOp('Xbegin', mem_flags="Request::XBEGIN")
+
+    defineMicroTxnOp('Xabort', mem_flags="Request::XABORT")
+
+    defineMicroTxnOp('Xend', mem_flags="Request::XEND")
+}};
+
diff -r 878f2f30b12d src/arch/x86/linux/syscalls.cc
--- a/src/arch/x86/linux/syscalls.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/linux/syscalls.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -241,7 +241,7 @@
     /*  25 */ SyscallDesc("mremap", mremapFunc<X86Linux64>),
     /*  26 */ SyscallDesc("msync", unimplementedFunc),
     /*  27 */ SyscallDesc("mincore", unimplementedFunc),
-    /*  28 */ SyscallDesc("madvise", unimplementedFunc),
+    /*  28 */ SyscallDesc("madvise", ignoreFunc),
     /*  29 */ SyscallDesc("shmget", unimplementedFunc),
     /*  30 */ SyscallDesc("shmat", unimplementedFunc),
     /*  31 */ SyscallDesc("shmctl", unimplementedFunc),
@@ -711,7 +711,7 @@
     /* 216 */ SyscallDesc("setfsgid32", unimplementedFunc),
     /* 217 */ SyscallDesc("pivot_root", unimplementedFunc),
     /* 218 */ SyscallDesc("mincore", unimplementedFunc),
-    /* 219 */ SyscallDesc("madvise", unimplementedFunc),
+    /* 219 */ SyscallDesc("madvise", ignoreFunc),
     /* 220 */ SyscallDesc("madvise1", unimplementedFunc),
     /* 221 */ SyscallDesc("getdents64", unimplementedFunc),
     /* 222 */ SyscallDesc("fcntl64", unimplementedFunc),
diff -r 878f2f30b12d src/arch/x86/regs/misc.hh
--- a/src/arch/x86/regs/misc.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/arch/x86/regs/misc.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -396,6 +396,40 @@
         // "Fake" MSRs for internally implemented devices
         MISCREG_PCI_CONFIG_ADDRESS,
 
+        // Saving integer registers for transactions
+        MISCREG_TDEPTH,
+        MISCREG_TRAX,
+        MISCREG_TRCX,
+        MISCREG_TRDX,
+        MISCREG_TRBX,
+        MISCREG_TRSP,
+        MISCREG_TRBP,
+        MISCREG_TRSI,
+        MISCREG_TRDI,
+
+        MISCREG_TR8,
+        MISCREG_TR9,
+        MISCREG_TR10,
+        MISCREG_TR11,
+        MISCREG_TR12,
+        MISCREG_TR13,
+        MISCREG_TR14,
+        MISCREG_TR15,
+
+        MISCREG_TRIP,
+        MISCREG_TIMM,
+
+        // registers to help maintain state
+        // used to specify that there was an hle abort
+        MISCREG_THABRT,
+        // check if the outermost xbring was an xacquire
+        MISCREG_THLE,
+        // keep track of nested xacq depth
+        // used to stop elision after an abort
+        MISCREG_TNELXQ,
+        // implicit return register for txn instructions
+        MISCREG_TRET,
+
         NUM_MISCREGS
     };
 
diff -r 878f2f30b12d src/cpu/StaticInstFlags.py
--- a/src/cpu/StaticInstFlags.py	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/StaticInstFlags.py	Fri Jul 04 22:00:13 2014 -0500
@@ -108,5 +108,10 @@
         'IsMicroBranch',    # This microop branches within the microcode for
                             # a macroop
         'IsDspOp',
-        'IsSquashAfter'     # Squash all uncommitted state after executed
+        'IsSquashAfter',    # Squash all uncommitted state after executed
+
+        'IsElisionBegin',   # Start eliding the write to a memory location
+        'IsElisionEnd',     # Stop eliding th write to a memory location
+        'IsElisionOrig'     # The original value of an elided memory location
+
         ]
diff -r 878f2f30b12d src/cpu/base_dyn_inst.hh
--- a/src/cpu/base_dyn_inst.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/base_dyn_inst.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -553,6 +553,11 @@
     bool isFirstMicroop() const { return staticInst->isFirstMicroop(); }
     bool isMicroBranch() const { return staticInst->isMicroBranch(); }
 
+    /* Addiing Elision detection */
+    bool isElisionBegin() const { return staticInst->isElisionBegin(); }
+    bool isElisionEnd() const { return staticInst->isElisionEnd(); }
+    bool isElisionOrig() const { return staticInst->isElisionOrig(); }
+
     /** Temporarily sets this instruction as a serialize before instruction. */
     void setSerializeBefore() { status.set(SerializeBefore); }
 
@@ -897,6 +902,17 @@
 
         req->taskId(cpu->taskId());
 
+        if(req->isTxn()) {
+          // forcing effective address as valid without translating
+          // shouldn't be a problem since the address is not used 
+          // downstream
+          effAddr = 0;
+          effSize = 0;
+          instFlags[EffAddrValid] = true;
+          fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
+          return fault;
+        }
+
         // Only split the request if the ISA supports unaligned accesses.
         if (TheISA::HasUnalignedMemAcc) {
             splitRequest(req, sreqLow, sreqHigh);
diff -r 878f2f30b12d src/cpu/o3/commit_impl.hh
--- a/src/cpu/o3/commit_impl.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/o3/commit_impl.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -998,6 +998,43 @@
         if (interrupt != NoFault)
             handleInterrupt();
 
+        /*
+        ////////////////////////////////////
+        // Handling implicit transaction abort interrupt
+        // - Used to separate detection and handling
+        // - Currently not being used since handling and 
+        //   detection takes place at the same time
+        // TODO: 
+        // - Check if handling and detection can be separated
+        ////////////////////////////////////
+        
+        if (!FullSystem &&
+            canHandleInterrupts && 
+            cpu->instList.empty() &&
+            (cpu->tcBase(0))->isTxnImplicitAbort())
+        {
+            DPRINTF(Commit, "Implicit TxnAbort being handled.");
+            //handle implicit TxnAbort
+            using namespace TheISA;
+            ThreadContext *tc = cpu->tcBase(0);
+            Fault x = new X86TxnAbort();
+            x->invoke(tc);
+            //thread->decoder.reset();
+            tc->setTxnImplicitAbort(false);
+        }
+        else if(!FullSystem &&
+            (cpu->tcBase(0))->isTxnImplicitAbort()) {
+            DPRINTF(Commit, "Implicit TxnAbort pending: instruction is %s"
+                "in flight, ROB is %sempty\n",
+                canHandleInterrupts ? "not " : "",
+                cpu->instList.empty() ? "" : "not " );
+        }
+
+        ////////////////////////////////////
+        // end implicit TxnAbort handling
+        ////////////////////////////////////
+*/
+
         int commit_thread = getCommittingThread();
 
         if (commit_thread == -1 || !rob->isHeadReady(commit_thread))
@@ -1183,6 +1220,17 @@
     // Check if the instruction caused a fault.  If so, trap.
     Fault inst_fault = head_inst->getFault();
 
+    // if txn abort, we are going to ignore all faults (since they
+    // may have been caused cue to aborted memory accesses and their
+    // effects will anyway be wiped out by abort)
+    ThreadContext *tc = cpu->tcBase(0);
+    if (tc->isTxnImplicitAbort() && inst_fault != NoFault)
+    {
+        DPRINTF(Commit, "Inst [sn:%lli] PC %s removing fault\n",
+                head_inst->seqNum, head_inst->pcState());
+        inst_fault = NoFault;
+    }
+
     // Stores mark themselves as completed.
     if (!head_inst->isStore() && inst_fault == NoFault) {
         head_inst->setCompleted();
diff -r 878f2f30b12d src/cpu/o3/fetch_impl.hh
--- a/src/cpu/o3/fetch_impl.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/o3/fetch_impl.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -378,6 +378,51 @@
     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
     assert(!cpu->switchedOut());
 
+    /////////////////////////////
+    // handle implicit abort in instruction cache access
+    /////////////////////////////
+
+    bool impAbort = false;
+
+    if (pkt && (pkt->cmd == MemCmd::TxnAbortResp))
+    {
+        impAbort = true;
+        ThreadContext *tc = cpu->tcBase(0);
+
+        if (pkt->req->isXabort())
+        {
+            // There should never be a XAbort request to ICache
+            panic("XAbort request in ICache! Not expected!\n");
+        }
+        else
+        {
+            if (tc->isTxnImplicitAbort()) {
+                DPRINTF(Fetch, "Consequent Abort in ICache\n");
+            }
+            else {
+                DPRINTF(Fetch, "Implicit Abort in ICache\n");
+                // the first one to see the abort sets the implicitAbort
+                // flag and says that abort hasn't been called yet
+                tc->setTxnImplicitAbort(true);
+                tc->setTxnAbortCalled(false);
+            }
+
+            if(!interruptPending && !tc->isTxnAbortCalled()) {
+                tc->setTxnAbortCalled(true);
+                using namespace TheISA;
+                Fault x = new X86TxnAbort();
+                x->invoke(tc);
+            }
+            else if(interruptPending && !tc->isTxnAbortCalled()) {
+                DPRINTF(Fetch, "Deferring abort invoke because of pending interrupt.\n");
+            }
+        }
+    }
+
+    /////////////////////////////
+    // end handling implicit abort in instruction cache access
+    /////////////////////////////
+
     // Only change the status if it's still waiting on the icache access
     // to return.
     if (fetchStatus[tid] != IcacheWaitResponse ||
@@ -388,8 +433,10 @@
         return;
     }
 
-    memcpy(fetchBuffer[tid], pkt->getPtr<uint8_t>(), fetchBufferSize);
-    fetchBufferValid[tid] = true;
+    if(!impAbort) {
+        memcpy(fetchBuffer[tid], pkt->getPtr<uint8_t>(), fetchBufferSize);
+        fetchBufferValid[tid] = true;
+    }
 
     // Wake up the CPU (if it went to sleep and was waiting on
     // this completion event).
diff -r 878f2f30b12d src/cpu/o3/inst_queue_impl.hh
--- a/src/cpu/o3/inst_queue_impl.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/o3/inst_queue_impl.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -1151,8 +1151,9 @@
         }
 
         if (!squashed_inst->isIssued() ||
-            (squashed_inst->isMemRef() &&
-             !squashed_inst->memOpDone())) {
+            ((squashed_inst->isMemRef() || squashed_inst->isMemBarrier() ||
+                squashed_inst->isWriteBarrier()) &&
+            !squashed_inst->memOpDone())) {
 
             DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n",
                     tid, squashed_inst->seqNum, squashed_inst->pcState());
@@ -1202,11 +1203,14 @@
                 // nonSpecInsts already when they are ready, and so we
                 // cannot always expect to find them
                 if (ns_inst_it == nonSpecInsts.end()) {
+                    ThreadContext *tc = cpu->tcBase(0);
+                    
                     // loads that became ready but stalled on a
                     // blocked cache are alreayd removed from
                     // nonSpecInsts, and have not faulted
                     assert(squashed_inst->getFault() != NoFault ||
-                           squashed_inst->isMemRef());
+                           squashed_inst->isMemRef() || 
+                           tc->isTxnImplicitAbort());
                 } else {
 
                     (*ns_inst_it).second = NULL;
diff -r 878f2f30b12d src/cpu/o3/lsq_unit.hh
--- a/src/cpu/o3/lsq_unit.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/o3/lsq_unit.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -156,6 +156,9 @@
     /** Writes back stores. */
     void writebackStores();
 
+    /** Checks for implicit abort in the data access response. */
+    bool checkImplicitAbort(PacketPtr pkt, DynInstPtr &inst, const char* rw);
+
     /** Completes the data access that has been returned from the
      * memory system. */
     void completeDataAccess(PacketPtr pkt);
@@ -274,6 +277,9 @@
     /** Decrements the given load index (circular queue). */
     inline void decrLdIdx(int &load_idx) const;
 
+    /** Checking if the instruction is an eliding store. */
+    void checkStoreElision(int storeWBIdx);
+
   public:
     /** Debugging function to dump instructions in the LSQ. */
     void dumpInsts() const;
@@ -405,6 +411,10 @@
     /** The load queue. */
     std::vector<DynInstPtr> loadQueue;
 
+    /** The elision buffers - modelled as CAM */
+    std::map<Addr, char*> elsBuffOrig;
+    std::map<Addr, char*> elsBuffCurr;
+
     /** The number of LQ entries, plus a sentinel entry (circular queue).
      *  @todo: Consider having var that records the true number of LQ entries.
      */
@@ -660,6 +670,41 @@
         return NoFault;
     }
 
+    // check in elision buffer before looking for store forwarding
+    // currently only does full match - probably what is required
+    Addr a = req->getVaddr();
+    if(elsBuffCurr.find(a) != elsBuffCurr.end()) {
+        assert(!load_inst->memData);
+        load_inst->memData = new uint8_t[64];
+
+        // memset required? maybe not
+        memcpy(load_inst->memData, elsBuffCurr[a], req->getSize());
+
+        DPRINTF(LSQUnit, "Forwarding from elision buffer to load to "
+                "addr %#x, data %#x\n", req->getVaddr(), data);
+
+        PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
+        data_pkt->dataStatic(load_inst->memData);
+
+        WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
+
+        // We'll say this has a 1 cycle load-store forwarding latency
+        // for now.
+        // @todo: Need to make this a parameter.
+        cpu->schedule(wb, curTick());
+
+        // Don't need to do anything special for split loads.
+        if (TheISA::HasUnalignedMemAcc && sreqLow) {
+            delete sreqLow;
+            delete sreqHigh;
+        }
+
+        ++lsqForwLoads;
+        return NoFault;
+    }
+
+    // end elision forwarding
+
     while (store_idx != -1) {
         // End once we've reached the top of the LSQ
         if (store_idx == storeWBIdx) {
@@ -798,7 +843,17 @@
     bool completedFirst = false;
     if (!lsq->cacheBlocked()) {
         MemCmd command =
-            req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
+            req->isLLSC() ? MemCmd::LoadLockedReq : 
+            req->isXbegin() ? MemCmd::TxnStartReq :
+            req->isXabort() ? MemCmd::TxnAbortReq :
+            req->isXend() ? MemCmd::TxnCommitReq : MemCmd::ReadReq;
+
+        // if abort clear the two elision buffers
+        if(req->isXabort()) {
+            elsBuffOrig.clear();
+            elsBuffCurr.clear();
+        }
+        
         PacketPtr data_pkt = new Packet(req, command);
         PacketPtr fst_data_pkt = NULL;
         PacketPtr snd_data_pkt = NULL;
diff -r 878f2f30b12d src/cpu/o3/lsq_unit_impl.hh
--- a/src/cpu/o3/lsq_unit_impl.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/o3/lsq_unit_impl.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -90,6 +90,73 @@
     return "Store writeback";
 }
 
+////////////////////////////////////
+// Handling implicit transaction abort interrupt
+////////////////////////////////////
+
+template<class Impl>
+bool
+LSQUnit<Impl>::checkImplicitAbort(PacketPtr pkt, DynInstPtr &inst, const char* rw)
+{
+    if (pkt->cmd == MemCmd::TxnAbortResp) {
+        ThreadContext *tc = cpu->tcBase(0);
+
+        if (pkt->req->isXabort())
+        {
+            DPRINTF(LSQUnit, "Clearing Implicit abort %s: sn[%lli]\n",
+                    rw, inst->seqNum);
+
+            // Explicit abort has been seen - set implicitAbort flag 
+            // to false
+            // This has to be actually set during commit of the
+            // abort instruction. Think about how.
+            tc->setTxnImplicitAbort(false);
+        }
+        else
+        {
+            if (tc->isTxnImplicitAbort()) {
+                DPRINTF(LSQUnit, "Consequent Abort in Data %s: sn[%lli]\n",
+                    rw, inst->seqNum);
+            }
+            else {
+                DPRINTF(LSQUnit, "Implicit Abort in Data %s: sn[%lli]\n",
+                    rw, inst->seqNum);
+                
+                // the first one to see the abort sets the implicitAbort
+                // flag and says that abort hasn't been called yet
+                tc->setTxnImplicitAbort(true);
+                tc->setTxnAbortCalled(true);
+
+                // BRILIANT!
+                // why not let fetch handle all implicit aborts?
+                // if() {
+                        
+                    // How should the implicit abort be handled?
+                    // - as an interrupt? (no flush) - CURRENT
+                    // - combination of misprediction and interrupt (flush)
+
+                    // tc->setTxnImplicitAbort(true);
+                    // tc->setTxnAbortCalled(false);
+                    using namespace TheISA;
+                    Fault x = new X86TxnAbort();
+                    x->invoke(tc);
+                // }
+                // else {
+                //     DPRINTF(IEW, "Deferring abort invoke because of pending interrupt.\n");
+                // }
+            }
+        }
+
+        return true;
+    }
+
+    return false;
+}
+
+////////////////////////////////////
+// end implicit TxnAbort handling
+////////////////////////////////////
+
 template<class Impl>
 void
 LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
@@ -122,6 +189,9 @@
         }
 
         if (inst->isStore()) {
+            // check if there was an implicit abort
+            checkImplicitAbort(pkt, inst, "Write");
+
             completeStore(state->idx);
         }
     }
@@ -774,6 +844,100 @@
 
 template <class Impl>
 void
+LSQUnit<Impl>::checkStoreElision(int storeWBIdx)
+{
+    if(storeQueue[storeWBIdx].inst->isElisionOrig() ||
+        storeQueue[storeWBIdx].inst->isElisionBegin() || 
+        storeQueue[storeWBIdx].inst->isElisionEnd()
+        ) 
+    {
+        //panic("To handle: Elision instructions.\n");
+        ThreadContext *tc = cpu->tcBase(0);
+        
+        // on orig, add address and data to elsBuffOrig
+        if(storeQueue[storeWBIdx].inst->isElisionOrig()) {
+            Addr a = storeQueue[storeWBIdx].req->getVaddr();
+            char *d = new char[16];
+            memcpy(d, storeQueue[storeWBIdx].data, 16);
+            
+            if(elsBuffOrig.find(a) == elsBuffOrig.end()) {
+                DPRINTF(LSQUnit, "Adding to elsBuffOrig: "
+                    "Addr: %#x, Data:'%s'\n", a, d);
+                elsBuffOrig[a] = d;
+            }
+            else {
+                // eliding already elided address - abort
+                using namespace TheISA;
+                Fault x = new X86TxnAbort();
+                x->invoke(tc);
+            }
+        }
+
+        // on begin, add address and data to elsBuffCurr
+        // verify that address is in orig, else abort
+        if(storeQueue[storeWBIdx].inst->isElisionBegin()) {
+            Addr a = storeQueue[storeWBIdx].req->getVaddr();
+            char *d = new char[16];
+            memcpy(d, storeQueue[storeWBIdx].data, 16*sizeof(char));
+
+            if(elsBuffOrig.find(a) == elsBuffOrig.end()) {
+                DPRINTF(LSQUnit, "Addr: %#x not found in "
+                    "elsBuffOrig. Aborting\n", a);
+                                    
+                using namespace TheISA;
+                Fault x = new X86TxnAbort();
+                x->invoke(tc);
+            }
+            else {
+                DPRINTF(LSQUnit, "Adding to elsBuffCurr: "
+                    "Addr: %#x, Data:'%s'\n", a, d);
+                elsBuffCurr[a] = d;
+            }
+        }
+
+        // on end, remove address from elisionBuffer(s)
+        // if not present, or data different from orig, abort
+        if(storeQueue[storeWBIdx].inst->isElisionEnd()) {
+            Addr a = storeQueue[storeWBIdx].req->getVaddr();
+            
+            // checking on orig for address
+            if(elsBuffOrig.find(a) == elsBuffOrig.end()) {
+                DPRINTF(LSQUnit, "Addr: %#x not found in "
+                    "elsBuffOrig. Aborting\n", a);
+
+                using namespace TheISA;
+                Fault x = new X86TxnAbort();
+                x->invoke(tc);
+            }
+            else {
+                char *d1 = storeQueue[storeWBIdx].data;
+                char *d2 = elsBuffOrig[a];
+                if(memcmp(d1, d2, 16*sizeof(char)) == 0) {
+                    DPRINTF(LSQUnit, "Removing Addr: %#x from "
+                        "elision buffers\n", a);
+                    elsBuffCurr.erase(a);
+                    elsBuffOrig.erase(a);
+                }
+                else {
+                    DPRINTF(LSQUnit, "Addr: %#x Data: '%s' not found "
+                        "in elsBuffOrig ('%s'). Aborting\n", a, d1, d2);
+
+                    using namespace TheISA;
+                    Fault x = new X86TxnAbort();
+                    x->invoke(tc);
+                }
+                
+            }
+        }
+
+        // set size to 0 so that the instruction doesnot write to
+        // cache and completes
+        storeQueue[storeWBIdx].size = 0;
+    }
+}
+
+template <class Impl>
+void
 LSQUnit<Impl>::writebackStores()
 {
     // First writeback the second packet from any split store that didn't
@@ -795,6 +959,8 @@
             break;
         }
 
+        checkStoreElision(storeWBIdx);
+
         // Store didn't write any data so no need to write it back to
         // memory.
         if (storeQueue[storeWBIdx].size == 0) {
@@ -1139,8 +1305,12 @@
     if (!inst->isExecuted()) {
         inst->setExecuted();
 
-        // Complete access to copy data to proper place.
-        inst->completeAcc(pkt);
+        // check if there was an implicit abort and set instruction fault
+        bool isImplicitAbort = checkImplicitAbort(pkt, inst, "Read");
+        if (!isImplicitAbort) {
+            // Complete access to copy data to proper place.
+            inst->completeAcc(pkt);
+        }
     }
 
     // Need to insert instruction into queue to commit
diff -r 878f2f30b12d src/cpu/static_inst.hh
--- a/src/cpu/static_inst.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/static_inst.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -179,6 +179,10 @@
     bool isMicroBranch() const { return flags[IsMicroBranch]; }
     //@}
 
+    bool isElisionBegin() const { return flags[IsElisionBegin]; }
+    bool isElisionEnd() const { return flags[IsElisionEnd]; }
+    bool isElisionOrig() const { return flags[IsElisionOrig]; }
+
     void setLastMicroop() { flags[IsLastMicroop] = true; }
     void setDelayedCommit() { flags[IsDelayedCommit] = true; }
     void setFlag(Flags f) { flags[f] = true; }
diff -r 878f2f30b12d src/cpu/thread_context.hh
--- a/src/cpu/thread_context.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/cpu/thread_context.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -99,6 +99,9 @@
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg CCReg;
     typedef TheISA::MiscReg MiscReg;
+    bool isAbort;
+    bool abortCalled;
+
   public:
 
     enum Status
@@ -297,6 +300,22 @@
     virtual void setCCRegFlat(int idx, CCReg val) = 0;
     /** @} */
 
+    /* Transaction functions */
+    bool isTxnImplicitAbort() {
+        return isAbort;
+    }
+
+    void setTxnImplicitAbort(bool aborted) {
+        isAbort = aborted;
+    }
+
+    bool isTxnAbortCalled() {
+        return abortCalled;
+    }
+
+    void setTxnAbortCalled(bool called) {
+        abortCalled = called;
+    }
 };
 
 /**
@@ -314,7 +333,7 @@
 {
   public:
     ProxyThreadContext(TC *actual_tc)
-    { actualTC = actual_tc; }
+    { actualTC = actual_tc; this->setTxnImplicitAbort(false); }
 
   private:
     TC *actualTC;
diff -r 878f2f30b12d src/mem/packet.hh
--- a/src/mem/packet.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/packet.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -128,6 +128,13 @@
         PrintReq,       // Print state matching address
         FlushReq,      //request for a cache flush
         InvalidationReq,   // request for address to be invalidated from lsq
+        // Transaction commands
+        TxnStartReq,   // request to start a transaction
+        TxnStartResp,
+        TxnCommitReq,   // request to commit a transaction
+        TxnCommitResp,
+        TxnAbortReq,   // request to abort a transaction (explicit)
+        TxnAbortResp,  // response to transaction abort (implicit/explicit)
         NUM_MEM_CMDS
     };
 
@@ -616,6 +623,11 @@
             flags.set(VALID_ADDR);
             _isSecure = req->isSecure();
         }
+
+        if (req->isTxn()) {
+            flags.set(VALID_ADDR);
+        }
+
         if (req->hasSize()) {
             size = req->getSize();
             flags.set(VALID_SIZE);
@@ -732,6 +744,35 @@
         clearSrc();
     }
 
+    /**
+     * Take a request packet and modify it in place to be suitable for
+     * returning as a response to that request. The source field is
+     * turned into the destination, and subsequently cleared. Note
+     * that the latter is not necessary for atomic requests, but
+     * causes no harm as neither field is valid.
+     */
+    void
+    makeTxnResponse(bool isImplicitAbort)
+    {
+        assert(needsResponse());
+        assert(isRequest());
+        origCmd = cmd;
+        
+        if(isImplicitAbort) {
+            cmd = MemCmd::TxnAbortResp;
+        }
+        else {
+            cmd = cmd.responseCommand();
+        }
+
+        // responses are never express, even if the snoop that
+        // triggered them was
+        flags.clear(EXPRESS_SNOOP);
+
+        dest = src;
+        clearSrc();
+    }
+
     void
     makeAtomicResponse()
     {
diff -r 878f2f30b12d src/mem/packet.cc
--- a/src/mem/packet.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/packet.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -169,6 +169,18 @@
     /* Invalidation Request */
     { SET3(NeedsExclusive, IsInvalidate, IsRequest),
       InvalidCmd, "InvalidationReq" },
+    /* TxnStartReq */
+    { SET3(IsRead, IsRequest, NeedsResponse), TxnStartResp, "TxnStartReq" },
+    /* TxnStartResp */
+    { SET2(IsRead, IsResponse), InvalidCmd, "TxnStartResp" },
+    /* TxnCommitReq */
+    { SET3(IsRead, IsRequest, NeedsResponse), TxnCommitResp, "TxnCommitReq" },
+    /* TxnCommitResp */
+    { SET2(IsRead, IsResponse), InvalidCmd, "TxnCommitResp" },
+    /* TxnAbortReq */
+    { SET3(IsRead, IsRequest, NeedsResponse), TxnAbortResp, "TxnAbortReq" },
+    /* TxnAbortResp */
+    { SET2(IsRead, IsResponse), InvalidCmd, "TxnAbortResp" },
 };
 
 bool
diff -r 878f2f30b12d src/mem/protocol/MESI_Three_Level-msg.sm
--- a/src/mem/protocol/MESI_Three_Level-msg.sm	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/protocol/MESI_Three_Level-msg.sm	Fri Jul 04 22:00:13 2014 -0500
@@ -35,6 +35,7 @@
   GET_INSTR, desc="Get Instruction";
   INV,       desc="INValidate";
   PUTX,      desc="Replacement message";
+  PUTX_COPY, desc="Data block to be copied in L1. L0 will still be in M state";
 
   WB_ACK,    desc="Writeback ack";
 
@@ -51,6 +52,10 @@
   // shared block before it got the data. So the L0 cache can use the data
   // but not store it.
   STALE_DATA;
+
+  // This is used by L0 to tell L1 that it cannot provide the latest 
+  // value
+  NAK;
 }
 
 // Class for messages sent between the L0 and the L1 controllers.
diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level-L0cache.sm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mem/protocol/MESI_Txn_Three_Level-L0cache.sm	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,1088 @@
+/*
+ * Copyright (c) 2013 Mark D. Hill and David A. Wood
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+machine(L0Cache, "MESI Directory L0 Cache")
+ : Sequencer * sequencer,
+   CacheMemory * Icache,
+   CacheMemory * Dcache,
+   Cycles request_latency = 2,
+   Cycles response_latency = 2,
+   bool send_evictions,
+{
+  // Transaction specific variables
+  bool isTransaction, default="false";
+  bool abortTransaction, default="false";
+  int txnDepth, default="0";
+  int abortCode, default="0";
+
+  // NODE L0 CACHE
+  // From this node's L0 cache to the network
+  MessageBuffer requestFromCache, network="To", physical_network="0", ordered="true";
+
+  // To this node's L0 cache FROM the network
+  MessageBuffer bufferToCache, network="From", physical_network="0", ordered="true";
+
+  // Message queue between this controller and the processor
+  MessageBuffer mandatoryQueue, ordered="false";
+
+  // STATES
+  state_declaration(State, desc="Cache states", default="L0Cache_State_I") {
+    // Base states
+
+    // The cache entry has not been allocated.
+    NP, AccessPermission:Invalid, desc="Not present in either cache";
+
+    // The cache entry has been allocated, but is not in use.
+    I, AccessPermission:Invalid;
+
+    // The cache entry is in shared mode. The processor can read this entry
+    // but it cannot write to it.
+    S, AccessPermission:Read_Only;
+
+    // The cache entry is in exclusive mode. The processor can read this
+    // entry. It can write to this entry without informing the directory.
+    // On writing, the entry moves to M state.
+    E, AccessPermission:Read_Only;
+
+    // The processor has read and write permissions on this entry.
+    M, AccessPermission:Read_Write;
+
+    // Transient States
+
+    // The cache controller has requested that this entry be fetched in
+    // shared state so that the processor can read it.
+    IS, AccessPermission:Busy;
+
+    // The cache controller has requested that this entry be fetched in
+    // modify state so that the processor can read/write it.
+    IM, AccessPermission:Busy;
+
+    // The cache controller had read permission over the entry. But now the
+    // processor needs to write to it. So, the controller has requested for
+    // write permission.
+    SM, AccessPermission:Read_Only;
+  }
+
+  // EVENTS
+  enumeration(Event, desc="Cache events") {
+    // L0 events
+    Load,            desc="Load request from the home processor";
+    Ifetch,          desc="I-fetch request from the home processor";
+    Store,           desc="Store request from the home processor";
+
+    Inv,           desc="Invalidate request from L2 bank";
+
+    // internal generated request
+    L0_Replacement,  desc="L0 Replacement", format="!r";
+
+    // other requests
+    Fwd_GETX,   desc="GETX from other processor";
+    Fwd_GETS,   desc="GETS from other processor";
+    Fwd_GET_INSTR,   desc="GET_INSTR from other processor";
+
+    // other requests - transaction
+    Fwd_GETS_Txn,   desc="GETS from other processor during txn";
+    Fwd_GET_INSTR_Txn,   desc="GET_INSTR from other processor during txn";
+
+    //
+    Data,               desc="Data for processor";
+    Data_Exclusive,     desc="Data for processor";
+    Data_Stale,         desc="Data for processor, but not for storage";
+
+    Ack,        desc="Ack for processor";
+    Ack_all,      desc="Last ack for processor";
+
+    WB_Ack,        desc="Ack for replacement";
+
+    // internal request for transactional pop of request
+    Txn_PopQ,     desc="Fake signal to pop the mandatory queue";
+    Txn_NAK,      desc="Signal to send a NAK after aborting";
+    Txn_InvAck,   desc="Signal to send an Inv ack after aborting txn";
+
+    // Transaction events: check if necessary. o/w delete
+    Start_Txn,     desc="Start transaction request from home processor";
+    Abort_Txn,     desc="Abort transaction request from home processor";
+    End_Txn,     desc="End transaction request from home processor";
+    Abort_Ack,     desc="Abort ack to processor";
+    Commit_Ack,     desc="Commit ack to processor";
+  }
+
+  // TYPES
+
+  // CacheEntry
+  structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
+    State CacheState,        desc="cache state";
+    DataBlock DataBlk,       desc="data for the block";
+    bool Dirty, default="false",   desc="data is dirty";
+
+    void setReadSet(), external="yes";
+    void setWriteSet(), external="yes";
+    bool getWriteSet(), external="yes";
+    bool getReadSet(), external="yes";
+    
+    // override invalidateEntry
+    void invalidateEntry(bool dummy) {
+      CacheState := State:I;
+      Dirty := false;
+    }
+  }
+
+  // TBE fields
+  structure(TBE, desc="...") {
+    Address Addr,              desc="Physical address for this TBE";
+    State TBEState,        desc="Transient state";
+    DataBlock DataBlk,                desc="Buffer for the data block";
+    bool Dirty, default="false",   desc="data is dirty";
+    int pendingAcks, default="0", desc="number of pending acks";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Address);
+    TBE getNullEntry();
+    void allocate(Address);
+    void deallocate(Address);
+    bool isPresent(Address);
+  }
+
+  TBETable TBEs, template="<L0Cache_TBE>", constructor="m_number_of_TBEs";
+
+  GenericBloomFilter TxnBF, constructor="m_bloom_filter_id";
+
+  void set_cache_entry(AbstractCacheEntry a);
+  void unset_cache_entry();
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpBuffers(Address a);
+  void wakeUpAllBuffers(Address a);
+  void profileMsgDelay(int virtualNetworkType, Cycles c);
+  void clearBlockMap();
+
+  // inclusive cache returns L0 entries only
+  Entry getCacheEntry(Address addr), return_by_pointer="yes" {
+    Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
+    if(is_valid(Dcache_entry)) {
+      return Dcache_entry;
+    }
+
+    Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
+    return Icache_entry;
+  }
+
+  Entry getDCacheEntry(Address addr), return_by_pointer="yes" {
+    Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
+    return Dcache_entry;
+  }
+
+  Entry getICacheEntry(Address addr), return_by_pointer="yes" {
+    Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
+    return Icache_entry;
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Address addr) {
+    assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
+
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:NP;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
+    assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
+
+    // MUST CHANGE
+    if(is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Address addr) {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(tbe.TBEState));
+      return L0Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(cache_entry.CacheState));
+      return L0Cache_State_to_permission(cache_entry.CacheState);
+    }
+
+    DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
+    return AccessPermission:NotPresent;
+  }
+
+  DataBlock getDataBlock(Address addr), return_by_ref="yes" {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+        return tbe.DataBlk;
+    }
+
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  void setAccessPermission(Entry cache_entry, Address addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L0Cache_State_to_permission(state));
+    }
+  }
+
+  Event mandatory_request_type_to_event(RubyRequestType type) {
+    if (type == RubyRequestType:LD) {
+      return Event:Load;
+    } else if (type == RubyRequestType:IFETCH) {
+      return Event:Ifetch;
+    } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
+      return Event:Store;
+    } else {
+      error("Invalid RubyRequestType");
+    }
+  }
+
+  int getPendingAcks(TBE tbe) {
+    return tbe.pendingAcks;
+  }
+
+  out_port(requestNetwork_out, CoherenceMsg, requestFromCache);
+
+  // Messages for this L0 cache from the L1 cache
+  in_port(messgeBuffer_in, CoherenceMsg, bufferToCache, rank = 1) {
+    if (messgeBuffer_in.isReady()) {
+      peek(messgeBuffer_in, CoherenceMsg, block_on="Addr") {
+        assert(in_msg.Destination == machineID);
+
+        DPRINTF(RubySlicc, "DEBUG: Trying next request from L1Q: %s\n",
+          in_msg.Addr);
+
+        Entry cache_entry := getCacheEntry(in_msg.Addr);
+        TBE tbe := TBEs[in_msg.Addr];
+
+        if(in_msg.Class == CoherenceClass:DATA_EXCLUSIVE) {
+            trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe);
+        } else if(in_msg.Class == CoherenceClass:DATA) {
+            trigger(Event:Data, in_msg.Addr, cache_entry, tbe);
+        } else if(in_msg.Class == CoherenceClass:STALE_DATA) {
+            trigger(Event:Data_Stale, in_msg.Addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:ACK) {
+            trigger(Event:Ack, in_msg.Addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:WB_ACK) {
+            trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:INV) {
+          // if isTransaction and cache entry NULL
+          // and bloom filter match, then abort
+          if(isTransaction == true &&
+            is_valid(cache_entry) == false && 
+            TxnBF.isSet(in_msg.Addr) == true)
+          {
+            Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry());
+            TBE ntbe := TBEs.getNullEntry();
+            trigger(Event:Txn_InvAck, in_msg.Addr, nce, ntbe);
+          }
+          else {
+            trigger(Event:Inv, in_msg.Addr, cache_entry, tbe);  
+          }
+        } else if (in_msg.Class == CoherenceClass:GETX ||
+                   in_msg.Class == CoherenceClass:UPGRADE) {
+          // if isTransaction and cache entry NULL
+          // and bloom filter match, then abort
+          if(isTransaction == true &&
+            is_valid(cache_entry) == false && 
+            TxnBF.isSet(in_msg.Addr) == true)
+          {
+            Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry());
+            TBE ntbe := TBEs.getNullEntry();
+            trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe);
+          }
+          else {
+            // upgrade transforms to GETX due to race
+            trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Class == CoherenceClass:GETS) {
+          // if isTransaction and cache entry NULL
+          // and bloom filter match, then abort
+          if(isTransaction == true &&
+            is_valid(cache_entry) == false && 
+            TxnBF.isSet(in_msg.Addr) == true)
+          {
+            Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry());
+            TBE ntbe := TBEs.getNullEntry();
+            trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe);
+          }
+          else if(cache_entry.getWriteSet() == true &&
+            isTransaction == true) {
+            assert(cache_entry.CacheState == State:M);
+            trigger(Event:Fwd_GETS_Txn, in_msg.Addr, cache_entry, tbe);
+          }
+          else {
+            trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Class == CoherenceClass:GET_INSTR) {
+          // if isTransaction and cache entry NULL
+          // and bloom filter match, then abort
+          if(isTransaction == true &&
+            is_valid(cache_entry) == false && 
+            TxnBF.isSet(in_msg.Addr) == true)
+          {
+            Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry());
+            TBE ntbe := TBEs.getNullEntry();
+            trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe);
+          }
+          else if(cache_entry.getWriteSet() == true &&
+            isTransaction == true) {
+            assert(cache_entry.CacheState == State:M);
+            trigger(Event:Fwd_GET_INSTR_Txn, in_msg.Addr, cache_entry, tbe);
+          }
+          else {
+            trigger(Event:Fwd_GET_INSTR, in_msg.Addr, cache_entry, tbe);
+          }
+        } else {
+          error("Invalid forwarded request type");
+        }
+      }
+    }
+    else {
+      DPRINTF(RubySlicc, "DEBUG: L1Q not ready\n");
+    }
+  }
+
+  // Mandatory Queue betweens Node's CPU and it's L0 caches
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
+    if (mandatoryQueue_in.isReady()) {
+      DPRINTF(RubySlicc, "DEBUG: Trying next request from MDQ\n");
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+        if(in_msg.Type == RubyRequestType:Txn_Start) {
+          DPRINTF(RubySlicc, "DEBUG: Starting transaction: %s\n",
+                txnDepth);
+          isTransaction := true;
+          txnDepth := txnDepth + 1;
+          sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode);
+          DPRINTF(RubySlicc, "DEBUG: Started transaction: %s\n",
+                txnDepth);
+
+          // trigger call to deque the mandatoryQueue
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          // both cache_entry and tbe should be NULL here
+          assert(is_invalid(cache_entry) && is_invalid(tbe));
+          trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe);
+          //mandatoryQueue_in.dequeue();
+        }
+        else if(in_msg.Type == RubyRequestType:Txn_Commit) {
+          if(abortTransaction) {
+            sequencer.txnCallback(in_msg.LineAddress, false, true, abortCode);
+          }
+          else {
+            DPRINTF(RubySlicc, "DEBUG: Committing transaction: %s\n",
+                  txnDepth);
+            txnDepth := txnDepth - 1;
+            DPRINTF(RubySlicc, "DEBUG: Committed transaction: %s\n",
+                  txnDepth);
+            if(txnDepth == 0) {
+              TxnBF.clear();
+              Icache.commitTransaction();
+              Dcache.commitTransaction();
+              isTransaction := false;
+              abortTransaction := false;
+              txnDepth := 0;
+              DPRINTF(RubySlicc, "DEBUG: Committed full transaction: %s\n",
+                    txnDepth);
+            }
+            
+            sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode);
+          }
+          
+          // trigger call to deque the mandatoryQueue
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          // both cache_entry and tbe should be NULL here
+          assert(is_invalid(cache_entry) && is_invalid(tbe));
+          trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe);
+          //mandatoryQueue_in.dequeue();
+        }
+        else if(in_msg.Type == RubyRequestType:Txn_Abort) {
+          DPRINTF(RubySlicc, "DEBUG: Aborting transaction: %s\n",
+                txnDepth);
+          TxnBF.clear();
+          Icache.abortTransaction();
+          Dcache.abortTransaction();
+          clearBlockMap();
+          isTransaction := false;
+          abortTransaction := false;
+          txnDepth := 0;
+          sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode);
+          DPRINTF(RubySlicc, "DEBUG: Aborted transaction: %s\n",
+                txnDepth);
+          
+          // trigger call to deque the mandatoryQueue
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          // both cache_entry and tbe should be NULL here
+          assert(is_invalid(cache_entry) && is_invalid(tbe));
+          trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe);
+          //mandatoryQueue_in.dequeue();
+        }
+        else if(abortTransaction) {
+          if ((in_msg.Type == RubyRequestType:ST) ||
+           (in_msg.Type == RubyRequestType:ATOMIC) ||
+           (in_msg.Type == RubyRequestType:RMW_Read) ||
+           (in_msg.Type == RubyRequestType:RMW_Write) ||
+           (in_msg.Type == RubyRequestType:Load_Linked) ||
+           (in_msg.Type == RubyRequestType:Store_Conditional) ||
+           (in_msg.Type == RubyRequestType:Locked_RMW_Read) ||
+           (in_msg.Type == RubyRequestType:Locked_RMW_Write) ||
+           (in_msg.Type == RubyRequestType:FLUSH)) {
+            sequencer.txnCallback(in_msg.LineAddress, true, true, abortCode);
+          }
+          else {
+            sequencer.txnCallback(in_msg.LineAddress, false, true, abortCode);
+          }
+
+          // trigger call to deque the mandatoryQueue
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          // both cache_entry and tbe should be NULL here
+          assert(is_invalid(cache_entry) && is_invalid(tbe));
+          trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe);
+          //mandatoryQueue_in.dequeue();
+        }
+        else {
+          // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
+
+          if (in_msg.Type == RubyRequestType:IFETCH) {
+            // ** INSTRUCTION ACCESS ***
+
+            Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
+            if (is_valid(Icache_entry)) {
+              // The tag matches for the L0, so the L0 asks the L2 for it.
+              trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                      Icache_entry, TBEs[in_msg.LineAddress]);
+            } else {
+
+              // Check to see if it is in the OTHER L0
+              Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
+              if (is_valid(Dcache_entry)) {
+                // The block is in the wrong L0, put the request on the queue to the shared L2
+                trigger(Event:L0_Replacement, in_msg.LineAddress,
+                        Dcache_entry, TBEs[in_msg.LineAddress]);
+              }
+
+              if (Icache.cacheAvail(in_msg.LineAddress)) {
+                // L0 does't have the line, but we have space for it
+                // in the L0 so let's see if the L2 has it
+                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                        Icache_entry, TBEs[in_msg.LineAddress]);
+              } else {
+                // No room in the L0, so we need to make room in the L0
+                trigger(Event:L0_Replacement, Icache.cacheProbe(in_msg.LineAddress),
+                        getICacheEntry(Icache.cacheProbe(in_msg.LineAddress)),
+                        TBEs[Icache.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+          } else {
+            // *** DATA ACCESS ***
+            Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
+            if (is_valid(Dcache_entry)) {
+              // The tag matches for the L0, so the L0 ask the L1 for it
+              trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                      Dcache_entry, TBEs[in_msg.LineAddress]);
+            } else {
+              // Check to see if it is in the OTHER L0
+              Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
+              if (is_valid(Icache_entry)) {
+                // The block is in the wrong L0, put the request on the queue to the private L1
+                trigger(Event:L0_Replacement, in_msg.LineAddress,
+                        Icache_entry, TBEs[in_msg.LineAddress]);
+              }
+
+              if (Dcache.cacheAvail(in_msg.LineAddress)) {
+                // L1 does't have the line, but we have space for it
+                // in the L0 let's see if the L1 has it
+                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                        Dcache_entry, TBEs[in_msg.LineAddress]);
+              } else {
+                // No room in the L1, so we need to make room in the L0
+                trigger(Event:L0_Replacement, Dcache.cacheProbe(in_msg.LineAddress),
+                        getDCacheEntry(Dcache.cacheProbe(in_msg.LineAddress)),
+                        TBEs[Dcache.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      DPRINTF(RubySlicc, "DEBUG: Mandatory Queue not ready!\n");
+    }
+  }
+
+  //*****************************************************
+  // ACTIONS
+  //*****************************************************
+
+  action(a_issueGETS, "a", desc="Issue GETS") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:GETS;
+        out_msg.Sender := machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(b_issueGETX, "b", desc="Issue GETX") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:GETX;
+        out_msg.Sender := machineID;
+        DPRINTF(RubySlicc, "%s\n", machineID);
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(c_issueUPGRADE, "c", desc="Issue GETX") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:UPGRADE;
+        out_msg.Sender := machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(fn_abortsendNakToL1, "fnan", desc="abort and send NAK to the L2 cache saying that it has latest data") {
+    abortTransaction := true;
+
+    // send NAK to L1
+    enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.Addr := address;
+      out_msg.Class := CoherenceClass:NAK;
+      out_msg.Sender := machineID;
+      out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(fn_abortsendInvAckToL1, "fnai", desc="abort and send Inv ACK to the L2 cache saying that it has latest data") {
+    abortTransaction := true;
+
+    // send NAK to L1
+    enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+      out_msg.Addr := address;
+      out_msg.Class := CoherenceClass:INV_ACK;
+      out_msg.Sender := machineID;
+      out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(fn_sendNakToL1, "fn", desc="send NAK to the L2 cache saying that it has latest data") {
+    // send NAK to L1
+    enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+      out_msg.Addr := address;
+      out_msg.Class := CoherenceClass:NAK;
+      out_msg.Sender := machineID;
+      out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+    }
+  }
+
+  action(f_sendDataToL1, "f", desc="send data to the L2 cache") {
+    if(isTransaction == true &&
+      cache_entry.getWriteSet() == true &&
+      cache_entry.CacheState == State:M)
+    { 
+      // send NAK to L1
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:NAK;
+        out_msg.Sender := machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+    else {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:INV_DATA;
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.Sender := machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      }
+    }
+  }
+
+  action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
+    peek(messgeBuffer_in, CoherenceMsg) {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:INV_ACK;
+        out_msg.Sender := machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
+  action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
+    if(isTransaction == true &&
+      (cache_entry.CacheState == State:S) &&
+      cache_entry.getReadSet() == true) 
+    {
+      // Do not send eviction to cpu
+      // The cache entry is added to the Bloom Filter because it is in the read set of the transaction
+    }
+    else {
+      if (send_evictions) {
+        DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
+        sequencer.evictionCallback(address);
+      }
+    }
+  }
+
+  action(g_issuePUTX, "g", desc="send data to the L2 cache") {
+    if(isTransaction == true &&
+      cache_entry.getWriteSet() == true) 
+    { }
+    else {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.Addr := address;
+        out_msg.Class := CoherenceClass:PUTX;
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.Sender:= machineID;
+        out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+
+        if (cache_entry.Dirty) {
+          out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        } else {
+          out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        }
+      }
+    }
+  }
+
+  action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    sequencer.readCallback(address, cache_entry.DataBlk);
+  }
+
+  action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    sequencer.writeCallback(address, cache_entry.DataBlk);
+    cache_entry.Dirty := true;
+  }
+
+  action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs[address]);
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.DataBlk := cache_entry.DataBlk;
+  }
+
+  action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
+    mandatoryQueue_in.dequeue();
+  }
+
+  action(l_popRequestQueue, "l",
+         desc="Pop incoming request queue and profile the delay within this virtual network") {
+    profileMsgDelay(2, messgeBuffer_in.dequeue());
+  }
+
+  action(o_popIncomingResponseQueue, "o",
+         desc="Pop Incoming Response queue and profile the delay within this virtual network") {
+    profileMsgDelay(1, messgeBuffer_in.dequeue());
+  }
+
+  action(s_deallocateTBE, "s", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(u_writeDataToCache, "u", desc="Write data to cache") {
+    peek(messgeBuffer_in, CoherenceMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(ff_deallocateCacheBlock, "\f",
+         desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
+    if (Dcache.isTagPresent(address)) {
+      Dcache.deallocate(address);
+    } else {
+      Icache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(oo_allocateDCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(Dcache.allocate(address, new Entry));
+    }
+  }
+
+  action(pp_allocateICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(Icache.allocate(address, new Entry));
+    }
+  }
+
+  action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle cpu request queue") {
+    DPRINTF(RubySlicc, "DEBUG: Stalling access: %s\n", address);
+    stall_and_wait(mandatoryQueue_in, address);
+    DPRINTF(RubySlicc, "DEBUG: Completing stall call: %s\n", address);
+  }
+
+  action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
+    DPRINTF(RubySlicc, "DEBUG: Waking up all buffers stalled on: %s\n", address);
+    wakeUpAllBuffers(address);
+    DPRINTF(RubySlicc, "DEBUG: Completed wake up all buffers stalled on: %s\n", address);
+  }
+
+  action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
+        ++Icache.demand_misses;
+  }
+
+  action(uu_profileInstHit, "\uih", desc="Profile the demand miss") {
+        ++Icache.demand_hits;
+  }
+
+  action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
+        ++Dcache.demand_misses;
+  }
+
+  action(uu_profileDataHit, "\udh", desc="Profile the demand miss") {
+        ++Dcache.demand_hits;
+  }
+
+  action(rs_addToReadSet, "rs", desc="Add to read set") {
+    if (isTransaction) {
+      // set read set bit
+      cache_entry.setReadSet();
+    }
+  }
+
+  action(rw_addToWriteSet, "rw", desc="Add to write set") {
+    if (isTransaction) {
+      // ON DEMAND write-back (may be easier)
+      // if dirty and not in write set 
+      // write back and retain M state
+      if((cache_entry.getWriteSet() == false) && (cache_entry.CacheState == State:M)){
+        if(cache_entry.Dirty) {
+          // code copied from issuePUTX
+          enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+            assert(is_valid(cache_entry));
+            out_msg.Addr := address;
+            out_msg.Class := CoherenceClass:PUTX_COPY;
+            out_msg.DataBlk := cache_entry.DataBlk;
+            out_msg.Dirty := cache_entry.Dirty;
+            out_msg.Sender:= machineID;
+            out_msg.Destination := createMachineID(MachineType:L1Cache, version);
+            out_msg.MessageSize := MessageSizeType:Writeback_Data;
+          }
+        }
+      }
+
+      // set write set bit
+      cache_entry.setWriteSet();
+    }
+  }
+
+  action(abf_addToBloomFilter, "abf", desc="Add to Bloom Filter") {
+    if(isTransaction == true &&
+      (cache_entry.CacheState == State:S) &&
+      cache_entry.getReadSet() == true) 
+    {
+      TxnBF.set(address);
+    }
+  }
+
+  // action(stx_startTransaction, "stx", desc="Start a transaction") {
+  // }
+
+  // action(ctx_commitTransaction, "ctx", desc="Commit a transaction") {
+  // }
+
+  action(atx_abortTransaction, "atx", desc="Abort a transaction") {
+    if(isTransaction && (cache_entry.getWriteSet() || cache_entry.getReadSet())) {
+      DPRINTF(RubySlicc, "Implicit abort of a transaction: %s, %s, %s\n", 
+        cache_entry.getReadSet(), cache_entry.getWriteSet(), 
+        address);
+      abortTransaction := true;  
+    }
+  }  
+
+
+
+  //*****************************************************
+  // TRANSITIONS
+  //*****************************************************
+
+  // False Transition to just pop the mandatory queue
+  // making sure all mandatory queue ends with a trigger
+  // Currently in testing phase
+  transition(NP, Txn_PopQ) {
+    k_popMandatoryQueue;
+  }
+
+  transition(NP, Txn_NAK) {
+    fn_abortsendNakToL1;
+    l_popRequestQueue;
+  }
+
+  transition(NP, Txn_InvAck) {
+    fn_abortsendInvAckToL1;
+    l_popRequestQueue;
+  }
+
+  // Transitions for Load/Store/Replacement/WriteBack from transient states
+  transition({IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) {
+    z_stallAndWaitMandatoryQueue;
+  }
+
+  // Transitions from Idle
+  transition({NP,I}, L0_Replacement) {
+    ff_deallocateCacheBlock;
+  }
+
+  transition({NP,I}, Load, IS) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    rs_addToReadSet; // TSX_Support
+    a_issueGETS;
+    uu_profileDataMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition({NP,I}, Ifetch, IS) {
+    pp_allocateICacheBlock;
+    i_allocateTBE;
+    rs_addToReadSet; // TSX_Support
+    a_issueGETS;
+    uu_profileInstMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition({NP,I}, Store, IM) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    rw_addToWriteSet; // TSX_Support
+    b_issueGETX;
+    uu_profileDataMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition({NP, I}, Inv) {
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition({IS, IM}, Inv) {
+    fi_sendInvAck;
+    atx_abortTransaction; // TSX_Support
+    l_popRequestQueue;
+  }
+
+  transition(SM, Inv, IM) {
+    fi_sendInvAck;
+    atx_abortTransaction; // TSX_Support
+    l_popRequestQueue;
+  }
+
+  // Transitions from Shared
+  transition({S,E,M}, Load) {
+    h_load_hit;
+    rs_addToReadSet; // TSX_Support
+    uu_profileDataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({S,E,M}, Ifetch) {
+    h_load_hit;
+    rs_addToReadSet; // TSX_Support
+    uu_profileInstHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(S, Store, SM) {
+    i_allocateTBE;
+    rw_addToWriteSet; // TSX_Support
+    c_issueUPGRADE;
+    uu_profileDataMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(S, L0_Replacement, I) {
+    // Do not abort. Just add entry in BloomFilter TxnBF
+    // atx_abortTransaction; // TSX_Support
+    abf_addToBloomFilter; // TSX_Support
+    forward_eviction_to_cpu;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(S, Inv, I) {
+    atx_abortTransaction; // TSX_Support
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  // Transitions from Exclusive
+  transition({E,M}, Store, M) {
+    rw_addToWriteSet; // TSX_Support
+    hh_store_hit;
+    uu_profileDataHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(E, L0_Replacement, I) {
+    atx_abortTransaction;  // TSX_Support
+    forward_eviction_to_cpu;
+    g_issuePUTX;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(E, {Inv, Fwd_GETX}, I) {
+    // don't send data
+    atx_abortTransaction; // TSX_Support
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
+    f_sendDataToL1;
+    l_popRequestQueue;
+  }
+
+  // Transitions from Modified
+  transition(M, L0_Replacement, I) {
+    atx_abortTransaction; // TSX_Support
+    forward_eviction_to_cpu;
+    g_issuePUTX;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(M, {Inv, Fwd_GETX}, I) {
+    atx_abortTransaction; // TSX_Support
+    forward_eviction_to_cpu;
+    f_sendDataToL1;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
+    //atx_abortTransaction; // TSX_Support
+    f_sendDataToL1;
+    l_popRequestQueue;
+  }
+
+  // If transaction, abort transaction and go to I
+  transition(M, {Fwd_GETS_Txn, Fwd_GET_INSTR_Txn}, I) {
+    atx_abortTransaction; // TSX_Support
+    f_sendDataToL1;
+    l_popRequestQueue;
+  }
+
+  transition(IS, Data, S) {
+    u_writeDataToCache;
+    h_load_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS, Data_Exclusive, E) {
+    u_writeDataToCache;
+    h_load_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS, Data_Stale, I) {
+    atx_abortTransaction; // TSX_Support
+    u_writeDataToCache;
+    h_load_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({IM,SM}, Data_Exclusive, M) {
+    u_writeDataToCache;
+    hh_store_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(I, {Fwd_GETS, Fwd_GET_INSTR, Fwd_GETX}) {
+    fn_sendNakToL1;
+    l_popRequestQueue;
+  }
+}
diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level-L1cache.sm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mem/protocol/MESI_Txn_Three_Level-L1cache.sm	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,1093 @@
+/*
+ * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+machine(L1Cache, "MESI Directory L1 Cache CMP")
+ : CacheMemory * cache,
+   int l2_select_num_bits,
+   Cycles l1_request_latency = 4,
+   Cycles l1_response_latency = 4,
+   Cycles to_l2_latency = 1,
+{
+  // From this node's L1 cache TO the network
+  // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
+  MessageBuffer requestFromCache, network="To", virtual_network="0", ordered="false", vnet_type="request";
+  // a local L1 -> this L2 bank
+  MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false", vnet_type="response";
+  MessageBuffer unblockFromCache, network="To", virtual_network="2", ordered="false", vnet_type="unblock";
+
+
+  // To this node's L1 cache FROM the network
+  // a L2 bank -> this L1
+  MessageBuffer requestToCache, network="From", virtual_network="0", ordered="false", vnet_type="request";
+  // a L2 bank -> this L1
+  MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false", vnet_type="response";
+
+  // Message Buffers between the L1 and the L0 Cache
+  // From the L1 cache to the L0 cache
+  MessageBuffer bufferFromL1ToL0, network="To", physical_network="0", ordered="true";
+  // From the L0 cache to the L1 cache
+  MessageBuffer bufferToL1FromL0, network="From", physical_network="0", ordered="true";
+
+  // STATES
+  state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
+    // Base states
+    NP, AccessPermission:Invalid, desc="Not present in either cache";
+    I, AccessPermission:Invalid, desc="a L1 cache entry Idle";
+    S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
+    SS, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
+    E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
+    EE, AccessPermission:Read_Write, desc="a L1 cache entry Exclusive";
+    M, AccessPermission:Maybe_Stale, desc="a L1 cache entry Modified", format="!b";
+    MM, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
+
+    // Transient States
+    IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
+    IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
+    SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
+    IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
+
+    M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK";
+    SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2";
+
+    // For all of the following states, invalidate
+    // message has been sent to L0 cache. The response
+    // from the L0 cache has not been seen yet.
+    S_IL0, AccessPermission:Busy;
+    E_IL0, AccessPermission:Busy;
+    M_IL0, AccessPermission:Busy;
+    MM_IL0, AccessPermission:Read_Write;
+    SM_IL0, AccessPermission:Busy;
+  }
+
+  // EVENTS
+  enumeration(Event, desc="Cache events") {
+    // Requests from the L0 cache
+    Load,            desc="Load request";
+    Store,           desc="Store request";
+    WriteBack,       desc="Writeback request";
+    L0_DataCopy,     desc="Data Block from L0. Should remain in M state.";
+
+    // Responses from the L0 Cache
+    // L0 cache received the invalidation message
+    // and has sent the data.
+    L0_DataAck;
+
+    // L0 cache received the invalidation message
+    // and has sent a NAK (because of txn abort) 
+    // saying that the data in L1 is the latest value.
+    L0_DataNak;
+
+    Inv,           desc="Invalidate request from L2 bank";
+
+    // internal generated request
+    // Invalidate the line in L0 due to own requirements
+    L0_Invalidate_Own;
+    // Invalidate the line in L0 due to some other cache's requirements
+    L0_Invalidate_Else;
+    // Invalidate the line in the cache due to some one else / space needs.
+    L1_Replacement;
+
+    // other requests
+    Fwd_GETX,   desc="GETX from other processor";
+    Fwd_GETS,   desc="GETS from other processor";
+
+    Data,       desc="Data for processor";
+    Data_Exclusive,       desc="Data for processor";
+    DataS_fromL1,       desc="data for GETS request, need to unblock directory";
+    Data_all_Acks,       desc="Data for processor, all acks";
+
+    L0_Ack,        desc="Ack for processor";
+    Ack,        desc="Ack for processor";
+    Ack_all,      desc="Last ack for processor";
+
+    WB_Ack,        desc="Ack for replacement";
+  }
+
+  // TYPES
+
+  // CacheEntry
+  structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
+    State CacheState,        desc="cache state";
+    DataBlock DataBlk,       desc="data for the block";
+    bool Dirty, default="false",   desc="data is dirty";
+  }
+
+  // TBE fields
+  structure(TBE, desc="...") {
+    Address Addr,              desc="Physical address for this TBE";
+    State TBEState,        desc="Transient state";
+    DataBlock DataBlk,                desc="Buffer for the data block";
+    bool Dirty, default="false",   desc="data is dirty";
+    int pendingAcks, default="0", desc="number of pending acks";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Address);
+    void allocate(Address);
+    void deallocate(Address);
+    bool isPresent(Address);
+  }
+
+  TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
+
+  int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
+
+  void set_cache_entry(AbstractCacheEntry a);
+  void unset_cache_entry();
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpBuffers(Address a);
+  void wakeUpAllBuffers(Address a);
+  void profileMsgDelay(int virtualNetworkType, Cycles c);
+
+  // inclusive cache returns L1 entries only
+  Entry getCacheEntry(Address addr), return_by_pointer="yes" {
+    Entry cache_entry := static_cast(Entry, "pointer", cache[addr]);
+    return cache_entry;
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Address addr) {
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:NP;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
+    // MUST CHANGE
+    if(is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Address addr) {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
+      return L1Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
+      return L1Cache_State_to_permission(cache_entry.CacheState);
+    }
+
+    DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
+    return AccessPermission:NotPresent;
+  }
+
+  DataBlock getDataBlock(Address addr), return_by_ref="yes" {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+        return tbe.DataBlk;
+    }
+
+    return getCacheEntry(addr).DataBlk;
+  }
+
+  void setAccessPermission(Entry cache_entry, Address addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L1Cache_State_to_permission(state));
+    }
+  }
+
+  Event mandatory_request_type_to_event(CoherenceClass type) {
+    if (type == CoherenceClass:GETS) {
+      return Event:Load;
+    } else if ((type == CoherenceClass:GETX) ||
+               (type == CoherenceClass:UPGRADE)) {
+      return Event:Store;
+    } else if (type == CoherenceClass:PUTX) {
+      return Event:WriteBack;
+    } else {
+      error("Invalid RequestType");
+    }
+  }
+
+  int getPendingAcks(TBE tbe) {
+    return tbe.pendingAcks;
+  }
+
+  bool inL0Cache(State state) {
+    if (state == State:S || state == State:E || state == State:M ||
+        state == State:S_IL0 || state == State:E_IL0 ||
+        state == State:M_IL0 || state == State:SM_IL0) {
+        return true;
+    }
+
+    return false;
+  }
+
+  out_port(requestNetwork_out, RequestMsg, requestFromCache);
+  out_port(responseNetwork_out, ResponseMsg, responseFromCache);
+  out_port(unblockNetwork_out, ResponseMsg, unblockFromCache);
+  out_port(bufferToL0_out, CoherenceMsg, bufferFromL1ToL0);
+
+  // Response From the L2 Cache to this L1 cache
+  in_port(responseNetwork_in, ResponseMsg, responseToCache, rank = 3) {
+    if (responseNetwork_in.isReady()) {
+      peek(responseNetwork_in, ResponseMsg) {
+        assert(in_msg.Destination.isElement(machineID));
+
+        Entry cache_entry := getCacheEntry(in_msg.Addr);
+        TBE tbe := TBEs[in_msg.Addr];
+
+        if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
+          trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe);
+        } else if(in_msg.Type == CoherenceResponseType:DATA) {
+          if ((getState(tbe, cache_entry, in_msg.Addr) == State:IS ||
+               getState(tbe, cache_entry, in_msg.Addr) == State:IS_I) &&
+              machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) {
+
+              trigger(Event:DataS_fromL1, in_msg.Addr, cache_entry, tbe);
+
+          } else if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
+            trigger(Event:Data_all_Acks, in_msg.Addr, cache_entry, tbe);
+          } else {
+            trigger(Event:Data, in_msg.Addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:ACK) {
+          if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
+            trigger(Event:Ack_all, in_msg.Addr, cache_entry, tbe);
+          } else {
+            trigger(Event:Ack, in_msg.Addr, cache_entry, tbe);
+          }
+        } else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
+          trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe);
+        } else {
+          error("Invalid L1 response type");
+        }
+      }
+    }
+  }
+
+  // Request to this L1 cache from the shared L2
+  in_port(requestNetwork_in, RequestMsg, requestToCache, rank = 2) {
+    if(requestNetwork_in.isReady()) {
+      peek(requestNetwork_in, RequestMsg) {
+        assert(in_msg.Destination.isElement(machineID));
+        Entry cache_entry := getCacheEntry(in_msg.Addr);
+        TBE tbe := TBEs[in_msg.Addr];
+
+        if (in_msg.Type == CoherenceRequestType:INV) {
+            if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) {
+                trigger(Event:L0_Invalidate_Else, in_msg.Addr,
+                        cache_entry, tbe);
+            }  else {
+                trigger(Event:Inv, in_msg.Addr, cache_entry, tbe);
+            }
+        } else if (in_msg.Type == CoherenceRequestType:GETX ||
+                   in_msg.Type == CoherenceRequestType:UPGRADE) {
+            if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) {
+                trigger(Event:L0_Invalidate_Else, in_msg.Addr,
+                        cache_entry, tbe);
+            } else {
+                trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe);
+            }
+        } else if (in_msg.Type == CoherenceRequestType:GETS) {
+            if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) {
+                trigger(Event:L0_Invalidate_Else, in_msg.Addr,
+                        cache_entry, tbe);
+            } else {
+                trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe);
+            }
+        } else {
+          error("Invalid forwarded request type");
+        }
+      }
+    }
+  }
+
+  // Requests to this L1 cache from the L0 cache.
+  in_port(messageBufferFromL0_in, CoherenceMsg, bufferToL1FromL0, rank = 0) {
+    if (messageBufferFromL0_in.isReady()) {
+      peek(messageBufferFromL0_in, CoherenceMsg) {
+        Entry cache_entry := getCacheEntry(in_msg.Addr);
+        TBE tbe := TBEs[in_msg.Addr];
+
+        if(in_msg.Class == CoherenceClass:INV_DATA) {
+            trigger(Event:L0_DataAck, in_msg.Addr, cache_entry, tbe);
+        }  
+        else if (in_msg.Class == CoherenceClass:NAK) {
+            trigger(Event:L0_DataNak, in_msg.Addr, cache_entry, tbe);
+        }
+        else if (in_msg.Class == CoherenceClass:PUTX_COPY) {
+            trigger(Event:L0_DataCopy, in_msg.Addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:INV_ACK) {
+            trigger(Event:L0_Ack, in_msg.Addr, cache_entry, tbe);
+        }  else {
+            if (is_valid(cache_entry)) {
+                trigger(mandatory_request_type_to_event(in_msg.Class),
+                        in_msg.Addr, cache_entry, tbe);
+            } else {
+                if (cache.cacheAvail(in_msg.Addr)) {
+                    // L1 does't have the line, but we have space for it
+                    // in the L1 let's see if the L2 has it
+                    trigger(mandatory_request_type_to_event(in_msg.Class),
+                            in_msg.Addr, cache_entry, tbe);
+                } else {
+                    // No room in the L1, so we need to make room in the L1
+                    Entry victim_entry :=
+                        getCacheEntry(cache.cacheProbe(in_msg.Addr));
+                    TBE victim_tbe := TBEs[cache.cacheProbe(in_msg.Addr)];
+
+                    if (is_valid(victim_entry) && inL0Cache(victim_entry.CacheState)) {
+                        trigger(Event:L0_Invalidate_Own,
+                                cache.cacheProbe(in_msg.Addr),
+                                victim_entry, victim_tbe);
+                    }  else {
+                        trigger(Event:L1_Replacement,
+                                cache.cacheProbe(in_msg.Addr),
+                                victim_entry, victim_tbe);
+                    }
+                }
+            } 
+        }
+      }
+    }
+  }
+
+  // ACTIONS
+  action(a_issueGETS, "a", desc="Issue GETS") {
+    peek(messageBufferFromL0_in, CoherenceMsg) {
+      enqueue(requestNetwork_out, RequestMsg, l1_request_latency) {
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceRequestType:GETS;
+        out_msg.Requestor := machineID;
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(b_issueGETX, "b", desc="Issue GETX") {
+    peek(messageBufferFromL0_in, CoherenceMsg) {
+      enqueue(requestNetwork_out, RequestMsg, l1_request_latency) {
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceRequestType:GETX;
+        out_msg.Requestor := machineID;
+        DPRINTF(RubySlicc, "%s\n", machineID);
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(c_issueUPGRADE, "c", desc="Issue GETX") {
+    peek(messageBufferFromL0_in, CoherenceMsg) {
+      enqueue(requestNetwork_out, RequestMsg,  l1_request_latency) {
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceRequestType:UPGRADE;
+        out_msg.Requestor := machineID;
+        out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+        DPRINTF(RubySlicc, "address: %s, destination: %s\n",
+                address, out_msg.Destination);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(d_sendDataToRequestor, "d", desc="send data to requestor") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceResponseType:DATA;
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+    }
+  }
+
+  action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
+    enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:DATA;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Dirty := cache_entry.Dirty;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+        assert(is_valid(tbe));
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceResponseType:DATA;
+        out_msg.DataBlk := tbe.DataBlk;
+        out_msg.Dirty := tbe.Dirty;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+    }
+  }
+
+  action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
+    enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+      assert(is_valid(tbe));
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:DATA;
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.Dirty := tbe.Dirty;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Response_Data;
+    }
+  }
+
+  action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceResponseType:ACK;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
+  action(f_sendDataToL2, "f", desc="send data to the L2 cache") {
+    enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:DATA;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Dirty := cache_entry.Dirty;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Writeback_Data;
+    }
+  }
+
+  action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") {
+    enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+      assert(is_valid(tbe));
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:DATA;
+      out_msg.DataBlk := tbe.DataBlk;
+      out_msg.Dirty := tbe.Dirty;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Writeback_Data;
+    }
+  }
+
+  action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
+    peek(requestNetwork_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) {
+        out_msg.Addr := address;
+        out_msg.Type := CoherenceResponseType:ACK;
+        out_msg.Sender := machineID;
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+        out_msg.AckCount := 1;
+      }
+    }
+  }
+
+  action(forward_eviction_to_L0, "\cc", desc="sends eviction information to the processor") {
+      enqueue(bufferToL0_out, CoherenceMsg, l1_request_latency) {
+          out_msg.Addr := address;
+          out_msg.Class := CoherenceClass:INV;
+          out_msg.Sender := machineID;
+          out_msg.Destination := createMachineID(MachineType:L0Cache, version);
+          out_msg.MessageSize := MessageSizeType:Control;
+      }
+  }
+
+  action(g_issuePUTX, "g", desc="send data to the L2 cache") {
+    enqueue(requestNetwork_out, RequestMsg, l1_response_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceRequestType:PUTX;
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.Dirty := cache_entry.Dirty;
+      out_msg.Requestor:= machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      if (cache_entry.Dirty) {
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      } else {
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+      }
+    }
+  }
+
+  action(j_sendUnblock, "j", desc="send unblock to the L2 cache") {
+    enqueue(unblockNetwork_out, ResponseMsg, to_l2_latency) {
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:UNBLOCK;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      DPRINTF(RubySlicc, "%s\n", address);
+    }
+  }
+
+  action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") {
+    enqueue(unblockNetwork_out, ResponseMsg, to_l2_latency) {
+      out_msg.Addr := address;
+      out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+                        l2_select_low_bit, l2_select_num_bits, clusterID));
+      out_msg.MessageSize := MessageSizeType:Response_Control;
+      DPRINTF(RubySlicc, "%s\n", address);
+
+    }
+  }
+
+  action(h_data_to_l0, "h", desc="If not prefetch, send data to the L0 cache.") {
+      enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) {
+          assert(is_valid(cache_entry));
+
+          out_msg.Addr := address;
+          out_msg.Class := CoherenceClass:DATA;
+          out_msg.Sender := machineID;
+          out_msg.Destination := createMachineID(MachineType:L0Cache, version);
+          out_msg.DataBlk := cache_entry.DataBlk;
+          out_msg.Dirty := cache_entry.Dirty;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+  }
+
+  action(h_stale_data_to_l0, "hs", desc="If not prefetch, send data to the L0 cache.") {
+      enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) {
+          assert(is_valid(cache_entry));
+
+          out_msg.Addr := address;
+          out_msg.Class := CoherenceClass:STALE_DATA;
+          out_msg.Sender := machineID;
+          out_msg.Destination := createMachineID(MachineType:L0Cache, version);
+          out_msg.DataBlk := cache_entry.DataBlk;
+          out_msg.Dirty := cache_entry.Dirty;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+      }
+  }
+
+  action(hh_xdata_to_l0, "\h", desc="If not prefetch, notify sequencer that store completed.") {
+      enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) {
+          assert(is_valid(cache_entry));
+
+          out_msg.Addr := address;
+          out_msg.Class := CoherenceClass:DATA_EXCLUSIVE;
+          out_msg.Sender := machineID;
+          out_msg.Destination := createMachineID(MachineType:L0Cache, version);
+
+          DPRINTF(RubySlicc, "DEBUG: address: %s, destination: %s\n",
+                address, out_msg.Destination);
+      
+          out_msg.DataBlk := cache_entry.DataBlk;
+          out_msg.Dirty := cache_entry.Dirty;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+
+          cache_entry.Dirty := true;
+      }
+  }
+
+  action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs[address]);
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.DataBlk := cache_entry.DataBlk;
+  }
+
+  action(k_popL0RequestQueue, "k", desc="Pop mandatory queue.") {
+    messageBufferFromL0_in.dequeue();
+  }
+
+  action(l_popL2RequestQueue, "l",
+         desc="Pop incoming request queue and profile the delay within this virtual network") {
+    profileMsgDelay(2, requestNetwork_in.dequeue());
+  }
+
+  action(o_popL2ResponseQueue, "o",
+         desc="Pop Incoming Response queue and profile the delay within this virtual network") {
+    profileMsgDelay(1, responseNetwork_in.dequeue());
+  }
+
+  action(s_deallocateTBE, "s", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(u_writeDataFromL0Request, "ureql0", desc="Write data to cache") {
+    peek(messageBufferFromL0_in, CoherenceMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(u_writeDataFromL2Response, "uresl2", desc="Write data to cache") {
+    peek(responseNetwork_in, ResponseMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(u_writeDataFromL0Response, "uresl0", desc="Write data to cache") {
+    peek(messageBufferFromL0_in, CoherenceMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.Dirty := in_msg.Dirty;
+    }
+  }
+
+  action(q_updateAckCount, "q", desc="Update ack count") {
+    peek(responseNetwork_in, ResponseMsg) {
+      assert(is_valid(tbe));
+      tbe.pendingAcks := tbe.pendingAcks - in_msg.AckCount;
+      APPEND_TRANSITION_COMMENT(in_msg.AckCount);
+      APPEND_TRANSITION_COMMENT(" p: ");
+      APPEND_TRANSITION_COMMENT(tbe.pendingAcks);
+    }
+  }
+
+  action(ff_deallocateCacheBlock, "\f",
+         desc="Deallocate L1 cache block.  Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
+    if (cache.isTagPresent(address)) {
+      cache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(oo_allocateCacheBlock, "\o", desc="Set cache tag equal to tag of block B.") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(cache.allocate(address, new Entry));
+    }
+  }
+
+  action(z0_stallAndWaitL0Queue, "\z0", desc="recycle L0 request queue") {
+    stall_and_wait(messageBufferFromL0_in, address);
+  }
+
+  action(z2_stallAndWaitL2Queue, "\z2", desc="recycle L2 request queue") {
+    stall_and_wait(requestNetwork_in, address);
+  }
+
+  action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
+    wakeUpAllBuffers(address);
+  }
+
+  action(uu_profileMiss, "\um", desc="Profile the demand miss") {
+      ++cache.demand_misses;
+  }
+
+  action(uu_profileHit, "\uh", desc="Profile the demand hit") {
+      ++cache.demand_hits;
+  }
+
+
+  //*****************************************************
+  // TRANSITIONS
+  //*****************************************************
+
+  // Transitions for Load/Store/Replacement/WriteBack from transient states
+  transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK, S_IL0, M_IL0, E_IL0, MM_IL0},
+             {Load, Store, L1_Replacement}) {
+    z0_stallAndWaitL0Queue;
+  }
+
+  // Transitions from Idle
+  transition({NP,I}, L1_Replacement) {
+    ff_deallocateCacheBlock;
+  }
+
+  transition({NP,I}, Load, IS) {
+    oo_allocateCacheBlock;
+    i_allocateTBE;
+    a_issueGETS;
+    uu_profileMiss;
+    k_popL0RequestQueue;
+  }
+
+  transition({NP,I}, Store, IM) {
+    oo_allocateCacheBlock;
+    i_allocateTBE;
+    b_issueGETX;
+    uu_profileMiss;
+    k_popL0RequestQueue;
+  }
+
+  transition({NP, I}, Inv) {
+    fi_sendInvAck;
+    l_popL2RequestQueue;
+  }
+
+  // Transitions from Shared
+  transition({S,SS}, Load, S) {
+    h_data_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  // if transaction has aborted, L0 could re-request data which is in E state in L1
+  // Therefore support Load from L0 in both E and EE state.
+  transition({EE,E}, Load, E) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  // if transaction has aborted, L0 could re-request data which is in M state in L1
+  // Therefore support Load from L0 in both M and MM state.
+  transition({MM,M}, Load, M) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  transition({S,SS}, Store, SM) {
+    i_allocateTBE;
+    c_issueUPGRADE;
+    uu_profileMiss;
+    k_popL0RequestQueue;
+  }
+
+  transition(SS, L1_Replacement, I) {
+    ff_deallocateCacheBlock;
+  }
+
+  transition(S, {L0_Invalidate_Own, L0_Invalidate_Else}, S_IL0) {
+    forward_eviction_to_L0;
+  }
+
+  transition(SS, Inv, I) {
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popL2RequestQueue;
+  }
+
+  // Transitions from Exclusive
+
+  // if transaction has aborted, L0 could re-request data which is in M state in L1
+  // Therefore support Load from L0 in both M and MM state.
+  transition({E,M}, Store, M) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  transition({EE,MM}, Store, M) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  transition(EE, L1_Replacement, M_I) {
+    // silent E replacement??
+    i_allocateTBE;
+    g_issuePUTX;   // send data, but hold in case forwarded request
+    ff_deallocateCacheBlock;
+  }
+
+  transition(EE, Inv, I) {
+    // don't send data
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popL2RequestQueue;
+  }
+
+  transition(EE, Fwd_GETX, I) {
+    d_sendDataToRequestor;
+    ff_deallocateCacheBlock;
+    l_popL2RequestQueue;
+  }
+
+  transition(EE, Fwd_GETS, SS) {
+    d_sendDataToRequestor;
+    d2_sendDataToL2;
+    l_popL2RequestQueue;
+  }
+
+  transition(E, {L0_Invalidate_Own, L0_Invalidate_Else}, E_IL0) {
+    forward_eviction_to_L0;
+  }
+
+  // Transitions from Modified
+  transition(MM, L1_Replacement, M_I) {
+    i_allocateTBE;
+    g_issuePUTX;   // send data, but hold in case forwarded request
+    ff_deallocateCacheBlock;
+  }
+
+  transition({M,E}, WriteBack, MM) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+  }
+
+  transition({M,E}, L0_DataCopy, M) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+  }
+
+  transition(M_I, WB_Ack, I) {
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(MM, Inv, I) {
+    f_sendDataToL2;
+    ff_deallocateCacheBlock;
+    l_popL2RequestQueue;
+  }
+
+  transition(M_I, Inv, SINK_WB_ACK) {
+    ft_sendDataToL2_fromTBE;
+    l_popL2RequestQueue;
+  }
+
+  transition(MM, Fwd_GETX, I) {
+    d_sendDataToRequestor;
+    l_popL2RequestQueue;
+  }
+
+  transition(MM, Fwd_GETS, SS) {
+    d_sendDataToRequestor;
+    d2_sendDataToL2;
+    l_popL2RequestQueue;
+  }
+
+  transition(M, {L0_Invalidate_Own, L0_Invalidate_Else}, M_IL0) {
+    forward_eviction_to_L0;
+  }
+
+  transition(M_I, Fwd_GETX, SINK_WB_ACK) {
+    dt_sendDataToRequestor_fromTBE;
+    l_popL2RequestQueue;
+  }
+
+  transition(M_I, Fwd_GETS, SINK_WB_ACK) {
+    dt_sendDataToRequestor_fromTBE;
+    d2t_sendDataToL2_fromTBE;
+    l_popL2RequestQueue;
+  }
+
+  // Transitions from IS
+  transition({IS,IS_I}, Inv, IS_I) {
+    fi_sendInvAck;
+    l_popL2RequestQueue;
+  }
+
+  transition(IS, Data_all_Acks, S) {
+    u_writeDataFromL2Response;
+    h_data_to_l0;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS_I, Data_all_Acks, I) {
+    u_writeDataFromL2Response;
+    h_stale_data_to_l0;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS, DataS_fromL1, S) {
+    u_writeDataFromL2Response;
+    j_sendUnblock;
+    h_data_to_l0;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS_I, DataS_fromL1, I) {
+    u_writeDataFromL2Response;
+    j_sendUnblock;
+    h_stale_data_to_l0;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  // directory is blocked when sending exclusive data
+  transition(IS, Data_Exclusive, E) {
+    u_writeDataFromL2Response;
+    hh_xdata_to_l0;
+    jj_sendExclusiveUnblock;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  // directory is blocked when sending exclusive data
+  transition(IS_I, Data_Exclusive, E) {
+    u_writeDataFromL2Response;
+    hh_xdata_to_l0;
+    jj_sendExclusiveUnblock;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  // Transitions from IM
+  transition({IM,SM}, Inv, IM) {
+    fi_sendInvAck;
+    l_popL2RequestQueue;
+  }
+
+  transition(IM, Data, SM) {
+    u_writeDataFromL2Response;
+    q_updateAckCount;
+    o_popL2ResponseQueue;
+  }
+
+  transition(IM, Data_all_Acks, M) {
+    u_writeDataFromL2Response;
+    hh_xdata_to_l0;
+    jj_sendExclusiveUnblock;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({SM, IM}, Ack) {
+    q_updateAckCount;
+    o_popL2ResponseQueue;
+  }
+
+  transition(SM, Ack_all, M) {
+    jj_sendExclusiveUnblock;
+    hh_xdata_to_l0;
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(SM, L0_Invalidate_Else, SM_IL0) {
+    forward_eviction_to_L0;
+  }
+
+  transition(SINK_WB_ACK, Inv){
+    fi_sendInvAck;
+    l_popL2RequestQueue;
+  }
+
+  transition(SINK_WB_ACK, WB_Ack, I){
+    s_deallocateTBE;
+    o_popL2ResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({M_IL0, E_IL0}, WriteBack, MM_IL0) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  // a transaction may have tried to modify a cache block
+  // in M state jut before it was evicted from L0
+  // transition to M_IL0 since data is now modified
+  transition({M_IL0, E_IL0}, L0_DataCopy, M_IL0) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+  }
+
+  transition({M_IL0, E_IL0}, L0_DataAck, MM) {
+    u_writeDataFromL0Response;
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  // transition on NAK from 
+  transition({M_IL0, E_IL0}, L0_DataNak, MM) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({M_IL0, MM_IL0}, L0_Ack, MM) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(E_IL0, L0_Ack, EE) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(S_IL0, L0_Ack, SS) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(SM_IL0, L0_Ack, IM) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({S_IL0, M_IL0, E_IL0, SM_IL0, SM}, L0_Invalidate_Own) {
+    z0_stallAndWaitL0Queue;
+  }
+
+  transition({S_IL0, M_IL0, E_IL0, SM_IL0}, L0_Invalidate_Else) {
+    z2_stallAndWaitL2Queue;
+  }
+
+  transition({S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) {
+    z2_stallAndWaitL2Queue;
+  }
+}
diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level.slicc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mem/protocol/MESI_Txn_Three_Level.slicc	Fri Jul 04 22:00:13 2014 -0500
@@ -0,0 +1,9 @@
+protocol "MESI_Three_Level";
+include "RubySlicc_interfaces.slicc";
+include "MESI_Two_Level-msg.sm";
+include "MESI_Three_Level-msg.sm";
+include "MESI_Txn_Three_Level-L0cache.sm";
+include "MESI_Txn_Three_Level-L1cache.sm";
+include "MESI_Two_Level-L2cache.sm";
+include "MESI_Two_Level-dir.sm";
+include "MESI_Two_Level-dma.sm";
diff -r 878f2f30b12d src/mem/protocol/RubySlicc_Exports.sm
--- a/src/mem/protocol/RubySlicc_Exports.sm	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/protocol/RubySlicc_Exports.sm	Fri Jul 04 22:00:13 2014 -0500
@@ -135,6 +135,9 @@
   COMMIT,            desc="Commit version";
   NULL,              desc="Invalid request type";
   FLUSH,             desc="Flush request type";
+  Txn_Start,         desc="Start transaction request type";
+  Txn_Commit,        desc="Commit transaction request type";
+  Txn_Abort,         desc="Abort transaction request type";
 }
 
 enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
diff -r 878f2f30b12d src/mem/protocol/RubySlicc_Types.sm
--- a/src/mem/protocol/RubySlicc_Types.sm	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/protocol/RubySlicc_Types.sm	Fri Jul 04 22:00:13 2014 -0500
@@ -112,6 +112,9 @@
   void recordRequestType(SequencerRequestType);
   bool checkResourceAvailable(CacheResourceType, Address);
   void invalidateSC(Address);
+
+  // adding callback for abort transaction
+  void txnCallback(Address, bool, bool, int);
 }
 
 structure(RubyRequest, desc="...", interface="Message", external="yes") {
@@ -153,6 +156,11 @@
   void recordRequestType(CacheRequestType);
   bool checkResourceAvailable(CacheResourceType, Address);
 
+  // added functions for transactions
+  void commitTransaction();
+  void abortTransaction();
+  AbstractCacheEntry getNullEntry();
+
   Scalar demand_misses;
   Scalar demand_hits;
 }
@@ -180,14 +188,14 @@
 }
 
 structure (GenericBloomFilter, external = "yes") {
-  void clear(int);
-  void increment(Address, int);
-  void decrement(Address, int);
-  void set(Address, int);
-  void unset(Address, int);
+  void clear();
+  void increment(Address);
+  void decrement(Address);
+  void set(Address);
+  void unset(Address);
 
-  bool isSet(Address, int);
-  int getCount(Address, int);
+  bool isSet(Address);
+  int getCount(Address);
 }
 
 structure (Prefetcher, external = "yes") {
diff -r 878f2f30b12d src/mem/protocol/SConsopts
--- a/src/mem/protocol/SConsopts	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/protocol/SConsopts	Fri Jul 04 22:00:13 2014 -0500
@@ -35,6 +35,7 @@
 all_protocols.extend([
     'MESI_Two_Level',
     'MESI_Three_Level',
+    'MESI_Txn_Three_Level',
     'MI_example',
     'MOESI_CMP_directory',
     'MOESI_CMP_token',
diff -r 878f2f30b12d src/mem/request.hh
--- a/src/mem/request.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/request.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -150,6 +150,18 @@
     /** The request is a page table walk */
     static const FlagsType PT_WALK                     = 0x20000000;
 
+    /** The request is for Transaction Begin */
+    static const FlagsType XBEGIN                      = 0x00020000;
+
+    /** The request is for Transaction Abort */
+    static const FlagsType XABORT                      = 0x00040000;
+
+    /** The request is for Transaction End */
+    static const FlagsType XEND                        = 0x40000000;
+
+    /** The request is for Elided Memory ops */
+    static const FlagsType XELIDE                      = 0x80000000;
+
     /** These flags are *not* cleared when a Request object is reused
        (assigned a new address). */
     static const FlagsType STICKY_FLAGS = INST_FETCH;
@@ -631,6 +643,10 @@
     bool isClearLL() const { return _flags.isSet(CLEAR_LL); }
     bool isSecure() const { return _flags.isSet(SECURE); }
     bool isPTWalk() const { return _flags.isSet(PT_WALK); }
+    bool isXbegin() const { return _flags.isSet(XBEGIN); }
+    bool isXabort() const { return _flags.isSet(XABORT); }
+    bool isXend() const { return _flags.isSet(XEND); }
+    bool isTxn() const { return (_flags.isSet(XBEGIN) || _flags.isSet(XABORT) || _flags.isSet(XEND)); }
 };
 
 #endif // __MEM_REQUEST_HH__
diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -53,6 +53,16 @@
     Address m_Address; // Address of this block, required by CacheMemory
     int m_locked; // Holds info whether the address is locked,
                   // required for implementing LL/SC
+
+    bool readSet; // set if the cache entry is part of read set of transaction
+    bool writeSet; // set if the cache entry is part of write set of transaction
+
+    void setReadSet();
+    void setWriteSet();
+    bool getReadSet();
+    bool getWriteSet();
+    
+    virtual void invalidateEntry(const bool& param_dummy);
 };
 
 inline std::ostream&
diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractCacheEntry.cc
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -33,6 +33,8 @@
     m_Permission = AccessPermission_NotPresent;
     m_Address.setAddress(0);
     m_locked = -1;
+    readSet = false;
+    writeSet = false;
 }
 
 AbstractCacheEntry::~AbstractCacheEntry()
@@ -48,3 +50,32 @@
         m_locked = -1;
     }
 }
+
+void
+AbstractCacheEntry::setReadSet()
+{
+	readSet = true;
+}
+
+void
+AbstractCacheEntry::setWriteSet()
+{
+	writeSet = true;
+}
+
+bool
+AbstractCacheEntry::getReadSet()
+{
+    return readSet;
+}
+
+bool
+AbstractCacheEntry::getWriteSet()
+{
+	return writeSet;
+}
+
+void
+AbstractCacheEntry::invalidateEntry(const bool& param_dummy)
+{
+}
diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractController.hh
--- a/src/mem/ruby/slicc_interface/AbstractController.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -128,6 +128,7 @@
     void wakeUpBuffers(Address addr);
     void wakeUpAllBuffers(Address addr);
     void wakeUpAllBuffers();
+    void clearBlockMap();
 
   protected:
     NodeID m_version;
@@ -148,6 +149,7 @@
     int m_transitions_per_cycle;
     unsigned int m_buffer_size;
     Cycles m_recycle_latency;
+    std::string m_bloom_filter_id;
 
     //! Map from physical network number to the Message Buffer.
     std::map<uint32_t, MessageBuffer*> peerQueueMap;
diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractController.cc
--- a/src/mem/ruby/slicc_interface/AbstractController.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -40,6 +40,7 @@
     m_buffer_size = p->buffer_size;
     m_recycle_latency = p->recycle_latency;
     m_number_of_TBEs = p->number_of_TBEs;
+    m_bloom_filter_id = "H3_1024_2_Regular";
     m_is_blocking = false;
 
     if (m_version == 0) {
@@ -194,3 +195,10 @@
        m_is_blocking = false;
     }
 }
+
+void
+AbstractController::clearBlockMap()
+{
+    m_block_map.clear();
+    m_is_blocking = false;
+}
diff -r 878f2f30b12d src/mem/ruby/system/CacheMemory.hh
--- a/src/mem/ruby/system/CacheMemory.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/CacheMemory.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -110,6 +110,11 @@
     bool checkResourceAvailable(CacheResourceType res, Address addr);
     void recordRequestType(CacheRequestType requestType);
 
+    // Transaction functions
+    void abortTransaction();
+    void commitTransaction();
+    AbstractCacheEntry* getNullEntry();
+
   public:
     Stats::Scalar m_demand_hits;
     Stats::Scalar m_demand_misses;
diff -r 878f2f30b12d src/mem/ruby/system/CacheMemory.cc
--- a/src/mem/ruby/system/CacheMemory.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/CacheMemory.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -563,3 +563,62 @@
         return true;
     }
 }
+
+// Transaction functions
+void
+CacheMemory::abortTransaction()
+{
+    // std::vector<std::vector<AbstractCacheEntry*> > m_cache;
+    for (std::vector<std::vector<AbstractCacheEntry*> >::iterator i = m_cache.begin(); 
+        i != m_cache.end(); 
+        ++i)
+    {
+        std::vector<AbstractCacheEntry*> temp = *i;
+
+        for (std::vector<AbstractCacheEntry*>::iterator j = temp.begin(); 
+        j != temp.end(); 
+        ++j)
+        {
+            AbstractCacheEntry *ace = *j;
+            
+            if(ace != NULL) {
+                if(ace->writeSet) {
+                    // set state as invalid
+                    ace->invalidateEntry(true);
+                }
+
+                ace->writeSet = false;
+                ace->readSet = false;    
+            }
+        }
+    }
+}
+
+void
+CacheMemory::commitTransaction()
+{
+    // std::vector<std::vector<AbstractCacheEntry*> > m_cache;
+    for (std::vector<std::vector<AbstractCacheEntry*> >::iterator i = m_cache.begin(); 
+        i != m_cache.end(); 
+        ++i)
+    {
+        std::vector<AbstractCacheEntry*> temp = *i;
+
+        for (std::vector<AbstractCacheEntry*>::iterator j = temp.begin(); 
+        j != temp.end(); 
+        ++j)
+        {
+            AbstractCacheEntry *ace = *j;
+            if(ace != NULL) {
+                ace->writeSet = false;
+                ace->readSet = false;    
+            }
+        }
+    }
+}
+
+AbstractCacheEntry* 
+CacheMemory::getNullEntry()
+{
+    return NULL;
+}
\ No newline at end of file
diff -r 878f2f30b12d src/mem/ruby/system/RubyPort.hh
--- a/src/mem/ruby/system/RubyPort.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/RubyPort.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -82,6 +82,7 @@
         MemSlavePort(const std::string &_name, RubyPort *_port,
                RubySystem*_system, bool _access_phys_mem, PortID id);
         void hitCallback(PacketPtr pkt);
+        void txnCallback(PacketPtr pkt, bool isImplicitAbort);
         void evictionCallback(const Address& address);
 
       protected:
@@ -165,6 +166,7 @@
 
   protected:
     void ruby_hit_callback(PacketPtr pkt);
+    void ruby_txn_callback(PacketPtr pkt, bool isImplicitAbort);
     void testDrainComplete();
     void ruby_eviction_callback(const Address& address);
 
diff -r 878f2f30b12d src/mem/ruby/system/RubyPort.cc
--- a/src/mem/ruby/system/RubyPort.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/RubyPort.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -225,7 +225,7 @@
 
     // Check for pio requests and directly send them to the dedicated
     // pio port.
-    if (!isPhysMemAddress(pkt->getAddr())) {
+    if (!isPhysMemAddress(pkt->getAddr()) && !pkt->req->isTxn()) {
         assert(ruby_port->memMasterPort.isConnected());
         DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n",
                 pkt->getAddr());
@@ -243,8 +243,10 @@
     // Save the port id to be used later to route the response
     pkt->setSrc(id);
 
-    assert(Address(pkt->getAddr()).getOffset() + pkt->getSize() <=
-           RubySystem::getBlockSizeBytes());
+    if(!pkt->req->isTxn()) { 
+        assert(Address(pkt->getAddr()).getOffset() + pkt->getSize() <=
+               RubySystem::getBlockSizeBytes());
+    }
 
     // Submit the ruby request
     RequestStatus requestStatus = ruby_port->makeRequest(pkt);
@@ -563,3 +565,72 @@
         r.pioSlavePort.sendRangeChange();
     }
 }
+
+void
+RubyPort::ruby_txn_callback(PacketPtr pkt, bool isImplicitAbort)
+{
+    DPRINTF(RubyPort, "Txn callback for %s\n", pkt->cmdString());
+
+    // The packet has not yet been turned into a response
+    assert(pkt->isRequest());
+
+    // As it has not yet been turned around, the source field tells us
+    // which port it came from.
+    assert(pkt->getSrc() < slave_ports.size());
+
+    slave_ports[pkt->getSrc()]->txnCallback(pkt, isImplicitAbort);
+
+    //
+    // If we had to stall the MemSlavePorts, wake them up because the sequencer
+    // likely has free resources now.
+    //
+    if (!retryList.empty()) {
+        //
+        // Record the current list of ports to retry on a temporary list before
+        // calling sendRetry on those ports.  sendRetry will cause an 
+        // immediate retry, which may result in the ports being put back on the
+        // list. Therefore we want to clear the retryList before calling
+        // sendRetry.
+        //
+        std::vector<MemSlavePort *> curRetryList(retryList);
+
+        retryList.clear();
+
+        for (auto i = curRetryList.begin(); i != curRetryList.end(); ++i) {
+            DPRINTF(RubyPort,
+                    "Sequencer may now be free.  SendRetry to port %s\n",
+                    (*i)->name());
+            (*i)->sendRetry();
+        }
+    }
+
+    testDrainComplete();
+}
+
+void
+RubyPort::MemSlavePort::txnCallback(PacketPtr pkt, bool isImplicitAbort)
+{
+    bool needsResponse = pkt->needsResponse();
+
+    DPRINTF(RubyPort, "Txn callback: isImplicitAbort=%d, xbegin=%d, xabort=%d, xend=%d, needsResponse = %d\n", 
+        isImplicitAbort, pkt->req->isXbegin(), pkt->req->isXabort(), pkt->req->isXend(), needsResponse);
+
+    if (needsResponse) {
+        pkt->makeTxnResponse(isImplicitAbort);
+    }
+
+    // turn packet around to go back to requester if response expected
+    if (needsResponse) {
+        DPRINTF(RubyPort, "Sending packet back over port\n");
+        // send next cycle
+        schedTimingResp(pkt, curTick() + g_system_ptr->clockPeriod());
+    } else {
+        // ADDING DOUBLE DELETE DETECTION //
+        bool b = pkt->req->isTxn();
+        if(b)
+            DPRINTF(RubyPort, "Deleting packet with pointer address7: %p\n", pkt);
+        // ENDING DOUBLE DELETE DETECTION //
+        delete pkt;
+    }
+    DPRINTF(RubyPort, "Txn callback done!\n");
+}
\ No newline at end of file
diff -r 878f2f30b12d src/mem/ruby/system/Sequencer.hh
--- a/src/mem/ruby/system/Sequencer.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/Sequencer.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -84,6 +84,10 @@
                       const Cycles forwardRequestTime = Cycles(0),
                       const Cycles firstResponseTime = Cycles(0));
 
+    // callback to abort a transaction
+    void txnCallback(const Address& address, bool isWrite, 
+        bool isImplicitAbort, int abortCode);
+
     RequestStatus makeRequest(PacketPtr pkt);
     bool empty() const;
     int outstandingCount() const { return m_outstanding_count; }
diff -r 878f2f30b12d src/mem/ruby/system/Sequencer.cc
--- a/src/mem/ruby/system/Sequencer.cc	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/Sequencer.cc	Fri Jul 04 22:00:13 2014 -0500
@@ -629,7 +629,20 @@
         if (pkt->isRead()) {
             if (pkt->req->isInstFetch()) {
                 primary_type = secondary_type = RubyRequestType_IFETCH;
-            } else {
+            } 
+            else if(pkt->req->isTxn()) {
+                if(pkt->req->isXbegin()) {
+                    primary_type = secondary_type = RubyRequestType_Txn_Start;
+                }
+                else if(pkt->req->isXabort()) {
+                    primary_type = secondary_type = RubyRequestType_Txn_Abort;
+                }
+                else if(pkt->req->isXend()) {
+                    primary_type = secondary_type = RubyRequestType_Txn_Commit;
+                }
+                
+            }
+            else {
 #if THE_ISA == X86_ISA
                 uint32_t flags = pkt->req->getFlags();
                 bool storeCheck = flags &
@@ -756,6 +769,69 @@
 }
 
 void
+Sequencer::txnCallback(const Address& address, bool isWrite, 
+    bool isImplicitAbort, int abortCode)
+{
+    assert(address == line_address(address));
+    SequencerRequest* request;
+
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
+             curTick(), m_version, "Seq",
+             "TxnCallBackDone", "", "",
+             address, 0);
+
+    if(isWrite) {
+        assert(m_writeRequestTable.count(line_address(address)));
+
+        RequestTable::iterator i = m_writeRequestTable.find(address);
+        assert(i != m_writeRequestTable.end());
+        request = i->second;
+
+        m_writeRequestTable.erase(i);
+        markRemoved();
+
+        // unblocking if any of the previous requests lead to blocking
+        if (request->m_type == RubyRequestType_Locked_RMW_Write) {
+            m_controller->unblock(address);
+        }
+
+        assert((request->m_type == RubyRequestType_ST) ||
+               (request->m_type == RubyRequestType_ATOMIC) ||
+               (request->m_type == RubyRequestType_RMW_Read) ||
+               (request->m_type == RubyRequestType_RMW_Write) ||
+               (request->m_type == RubyRequestType_Load_Linked) ||
+               (request->m_type == RubyRequestType_Store_Conditional) ||
+               (request->m_type == RubyRequestType_Locked_RMW_Read) ||
+               (request->m_type == RubyRequestType_Locked_RMW_Write) ||
+               (request->m_type == RubyRequestType_FLUSH));
+    }
+    else {
+        assert(m_readRequestTable.count(line_address(address)));
+
+        RequestTable::iterator i = m_readRequestTable.find(address);
+        assert(i != m_readRequestTable.end());
+        request = i->second;
+
+        m_readRequestTable.erase(i);
+        markRemoved();
+
+        assert((request->m_type == RubyRequestType_LD) ||
+               (request->m_type == RubyRequestType_IFETCH) || 
+               (request->m_type == RubyRequestType_Txn_Start) ||
+               (request->m_type == RubyRequestType_Txn_Commit) ||
+               (request->m_type == RubyRequestType_Txn_Abort));
+    }
+
+    PacketPtr pkt = request->pkt;
+
+    // copy abort code to pkt's data
+    uint64_t abc = abortCode; // changing to 8 bytes
+    memcpy(pkt->getPtr<uint8_t>(), &abc, pkt->getSize());
+
+    ruby_txn_callback(pkt, isImplicitAbort);
+}
+
+void
 Sequencer::regStats()
 {
     m_store_waiting_on_load
diff -r 878f2f30b12d src/mem/ruby/system/TBETable.hh
--- a/src/mem/ruby/system/TBETable.hh	Wed Jul 02 13:19:13 2014 -0400
+++ b/src/mem/ruby/system/TBETable.hh	Fri Jul 04 22:00:13 2014 -0500
@@ -54,6 +54,9 @@
 
     ENTRY* lookup(const Address& address);
 
+    // addition for transaction
+    ENTRY* getNullEntry();
+
     // Print cache contents
     void print(std::ostream& out) const;
 
@@ -114,6 +117,13 @@
   return NULL;
 }
 
+// addition for transactions
+template<class ENTRY>
+inline ENTRY*
+TBETable<ENTRY>::getNullEntry()
+{
+  return NULL;
+}
 
 template<class ENTRY>
 inline void