diff -r 878f2f30b12d build_opts/X86_MESI_Three_Level --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_opts/X86_MESI_Three_Level Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,3 @@ +TARGET_ISA = 'x86' +CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU' +PROTOCOL = 'MESI_Three_Level' diff -r 878f2f30b12d build_opts/X86_MESI_Txn_Three_Level --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build_opts/X86_MESI_Txn_Three_Level Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,3 @@ +TARGET_ISA = 'x86' +CPU_MODELS = 'TimingSimpleCPU,O3CPU,AtomicSimpleCPU' +PROTOCOL = 'MESI_Txn_Three_Level' diff -r 878f2f30b12d configs/common/Options.py --- a/configs/common/Options.py Wed Jul 02 13:19:13 2014 -0400 +++ b/configs/common/Options.py Fri Jul 04 22:00:13 2014 -0500 @@ -104,6 +104,10 @@ parser.add_option("--num-dirs", type="int", default=1) parser.add_option("--num-l2caches", type="int", default=1) parser.add_option("--num-l3caches", type="int", default=1) + parser.add_option("--l0d_size", type="string", default="4kB") + parser.add_option("--l0i_size", type="string", default="4kB") + parser.add_option("--l0d_assoc", type="int", default=1) + parser.add_option("--l0i_assoc", type="int", default=1) parser.add_option("--l1d_size", type="string", default="64kB") parser.add_option("--l1i_size", type="string", default="32kB") parser.add_option("--l2_size", type="string", default="2MB") diff -r 878f2f30b12d configs/ruby/MESI_Txn_Three_Level.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/configs/ruby/MESI_Txn_Three_Level.py Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,209 @@ +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# Copyright (c) 2009 Advanced Micro Devices, Inc. +# Copyright (c) 2013 Mark D. Hill and David A. Wood +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Brad Beckmann +# Nilay Vaish + +import math +import m5 +from m5.objects import * +from m5.defines import buildEnv +from Ruby import create_topology + +# +# Note: the L1 Cache latency is only used by the sequencer on fast path hits +# +class L0Cache(RubyCache): + latency = 1 + +class L1Cache(RubyCache): + latency = 5 + +# +# Note: the L2 Cache latency is not currently used +# +class L2Cache(RubyCache): + latency = 15 + +def define_options(parser): + parser.add_option("--num-clusters", type="int", default=1, + help="number of clusters in a design in which there are shared\ + caches private to clusters") + return + +def create_system(options, system, dma_ports, ruby_system): + + if buildEnv['PROTOCOL'] != 'MESI_Txn_Three_Level': + fatal("This script requires the MESI_Txn_Three_Level protocol to be built.") + + cpu_sequencers = [] + + # + # The ruby network creation expects the list of nodes in the system to be + # consistent with the NetDest list. Therefore the l1 controller nodes must be + # listed before the directory nodes and directory nodes before dma nodes, etc. + # + l0_cntrl_nodes = [] + l1_cntrl_nodes = [] + l2_cntrl_nodes = [] + dir_cntrl_nodes = [] + dma_cntrl_nodes = [] + + assert (options.num_cpus % options.num_clusters == 0) + num_cpus_per_cluster = options.num_cpus / options.num_clusters + + assert (options.num_l2caches % options.num_clusters == 0) + num_l2caches_per_cluster = options.num_l2caches / options.num_clusters + + l2_bits = int(math.log(num_l2caches_per_cluster, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + l2_index_start = block_size_bits + l2_bits + + # + # Must create the individual controllers before the network to ensure the + # controller constructors are called before the network constructor + # + for i in xrange(options.num_clusters): + for j in xrange(num_cpus_per_cluster): + # + # First create the Ruby objects associated with this cpu + # + l0i_cache = L0Cache(size = options.l0i_size, + assoc = options.l0i_assoc, is_icache = True, + start_index_bit = block_size_bits, replacement_policy="LRU") + + l0d_cache = L0Cache(size = options.l0d_size, + assoc = options.l0d_assoc, is_icache = False, + start_index_bit = block_size_bits, replacement_policy="LRU") + + l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j, + Icache = l0i_cache, Dcache = l0d_cache, + send_evictions = (options.cpu_type == "detailed"), + ruby_system = ruby_system) + + cpu_seq = RubySequencer(version = i, icache = l0i_cache, + dcache = l0d_cache, ruby_system = ruby_system) + + l0_cntrl.sequencer = cpu_seq + + l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, + start_index_bit = block_size_bits, is_icache = False) + + l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j, + cache = l1_cache, l2_select_num_bits = l2_bits, + cluster_id = i, ruby_system = ruby_system) + + exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( + i*num_cpus_per_cluster+j)) + exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( + i*num_cpus_per_cluster+j)) + + # + # Add controllers and sequencers to the appropriate lists + # + cpu_sequencers.append(cpu_seq) + l0_cntrl_nodes.append(l0_cntrl) + l1_cntrl_nodes.append(l1_cntrl) + l0_cntrl.peer = l1_cntrl + + for j in xrange(num_l2caches_per_cluster): + l2_cache = L2Cache(size = options.l2_size, + assoc = options.l2_assoc, + start_index_bit = l2_index_start) + + l2_cntrl = L2Cache_Controller( + version = i * num_l2caches_per_cluster + j, + L2cache = l2_cache, cluster_id = i, + transitions_per_cycle=options.ports, + ruby_system = ruby_system) + + exec("ruby_system.l2_cntrl%d = l2_cntrl" % ( + i * num_l2caches_per_cluster + j)) + l2_cntrl_nodes.append(l2_cntrl) + + phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) + assert(phys_mem_size % options.num_dirs == 0) + mem_module_size = phys_mem_size / options.num_dirs + + # Run each of the ruby memory controllers at a ratio of the frequency of + # the ruby system + # clk_divider value is a fix to pass regression. + ruby_system.memctrl_clk_domain = DerivedClockDomain( + clk_domain=ruby_system.clk_domain, + clk_divider=3) + + for i in xrange(options.num_dirs): + # + # Create the Ruby objects associated with the directory controller + # + + mem_cntrl = RubyMemoryControl( + clk_domain = ruby_system.memctrl_clk_domain, + version = i, + ruby_system = ruby_system) + + dir_size = MemorySize('0B') + dir_size.value = mem_module_size + + dir_cntrl = Directory_Controller(version = i, + directory = \ + RubyDirectoryMemory(version = i, + size = dir_size, + use_map = + options.use_map), + memBuffer = mem_cntrl, + transitions_per_cycle = options.ports, + ruby_system = ruby_system) + + exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) + dir_cntrl_nodes.append(dir_cntrl) + + for i, dma_port in enumerate(dma_ports): + # + # Create the Ruby objects associated with the dma controller + # + dma_seq = DMASequencer(version = i, + ruby_system = ruby_system) + + dma_cntrl = DMA_Controller(version = i, + dma_sequencer = dma_seq, + transitions_per_cycle = options.ports, + ruby_system = ruby_system) + + exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) + exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) + dma_cntrl_nodes.append(dma_cntrl) + + all_cntrls = l0_cntrl_nodes + \ + l1_cntrl_nodes + \ + l2_cntrl_nodes + \ + dir_cntrl_nodes + \ + dma_cntrl_nodes + + topology = create_topology(all_cntrls, options) + return (cpu_sequencers, dir_cntrl_nodes, topology) diff -r 878f2f30b12d src/arch/x86/faults.hh --- a/src/arch/x86/faults.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/faults.hh Fri Jul 04 22:00:13 2014 -0500 @@ -124,6 +124,18 @@ StaticInstPtr inst = StaticInst::nullStaticInstPtr); }; + // Base class of handling x86 transactional abort + class X86TxnAbort : public X86FaultBase + { + public: + X86TxnAbort() + : X86FaultBase("", "", 0, 0) + {} + + void invoke(ThreadContext * tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + }; + // Base class for x86 aborts which seem to be catastrophic failures. class X86Abort : public X86FaultBase { diff -r 878f2f30b12d src/arch/x86/faults.cc --- a/src/arch/x86/faults.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/faults.cc Fri Jul 04 22:00:13 2014 -0500 @@ -116,6 +116,18 @@ pc.uEnd(); } + void X86TxnAbort::invoke(ThreadContext * tc, StaticInstPtr inst) + { + PCState pcState = tc->pcState(); + + using namespace X86ISAInst::RomLabels; + MicroPC entry = extern_label_txnAbortInterrupt; + + pcState.upc(romMicroPC(entry)); + pcState.nupc(romMicroPC(entry) + 1); + tc->pcState(pcState); + } + void X86Abort::invoke(ThreadContext * tc, StaticInstPtr inst) { panic("Abort exception!"); diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_ele_opcodes.isa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/arch/x86/isa/decoder/locked_ele_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,183 @@ +// Copyright (c) 2009 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode opcodes with the lock prefix. Opcodes which shouldn't have that +// prefix should effectively decode to UD2. +// + +// All the memory references in these instructions happen through modrm bytes. +// We therefore only need to make sure the modrm byte encodes a memory +// reference to make sure these are the memory forms of these instructions. +0x1: decode MODRM_MOD { + format Inst { + 0x3: UD2(); + default: decode OPCODE_NUM { + 0x1: decode OPCODE_OP_TOP5 { + // 0x00: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x01: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x02: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x03: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x04: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x05: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x06: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x10: decode OPCODE_OP_BOTTOM3 { + // 0x0: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mb,Ib); + // 0x1: OR_LOCKED(Mb,Ib); + // 0x2: ADC_LOCKED(Mb,Ib); + // 0x3: SBB_LOCKED(Mb,Ib); + // 0x4: AND_LOCKED(Mb,Ib); + // 0x5: SUB_LOCKED(Mb,Ib); + // 0x6: XOR_LOCKED(Mb,Ib); + // } + // 0x1: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mv,Iz); + // 0x1: OR_LOCKED(Mv,Iz); + // 0x2: ADC_LOCKED(Mv,Iz); + // 0x3: SBB_LOCKED(Mv,Iz); + // 0x4: AND_LOCKED(Mv,Iz); + // 0x5: SUB_LOCKED(Mv,Iz); + // 0x6: XOR_LOCKED(Mv,Iz); + // } + // 0x2: decode MODE_SUBMODE { + // 0x0: UD2(); + // default: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mb,Ib); + // 0x1: OR_LOCKED(Mb,Ib); + // 0x2: ADC_LOCKED(Mb,Ib); + // 0x3: SBB_LOCKED(Mb,Ib); + // 0x4: AND_LOCKED(Mb,Ib); + // 0x5: SUB_LOCKED(Mb,Ib); + // 0x6: XOR_LOCKED(Mb,Ib); + // } + // } + // //0x3: group1_Ev_Ib(); + // 0x3: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mv,Ib); + // 0x1: OR_LOCKED(Mv,Ib); + // 0x2: ADC_LOCKED(Mv,Ib); + // 0x3: SBB_LOCKED(Mv,Ib); + // 0x4: AND_LOCKED(Mv,Ib); + // 0x5: SUB_LOCKED(Mv,Ib); + // 0x6: XOR_LOCKED(Mv,Ib); + // } + // 0x6: XCHG_LOCKED(Mb,Gb); + // 0x7: XCHG_LOCKED(Mv,Gv); + // } + // 0x1E: decode OPCODE_OP_BOTTOM3 { + // //0x6: group3_Eb(); + // 0x6: decode MODRM_REG { + // 0x2: NOT_LOCKED(Mb); + // 0x3: NEG_LOCKED(Mb); + // } + // //0x7: group3_Ev(); + // 0x7: decode MODRM_REG { + // 0x2: NOT_LOCKED(Mv); + // 0x3: NEG_LOCKED(Mv); + // } + // } + // 0x1F: decode OPCODE_OP_BOTTOM3 { + // 0x6: decode MODRM_REG { + // 0x0: INC_LOCKED(Mb); + // 0x1: DEC_LOCKED(Mb); + // default: UD2(); + // } + // //0x7: group5(); + // 0x7: decode MODRM_REG { + // 0x0: INC_LOCKED(Mv); + // 0x1: DEC_LOCKED(Mv); + // } + // } + default: M5InternalError::error( + {{"Unimpemented Elesion end!"}}); + } + 0x2: decode OPCODE_PREFIXA { + 0x0F: decode OPCODE_OP_TOP5 { + // 0x04: decode OPCODE_OP_BOTTOM3 { + // 0x0: WarnUnimpl::mov_Rd_CR8D(); + // 0x2: WarnUnimpl::mov_CR8D_Rd(); + // } + // 0x15: decode OPCODE_OP_BOTTOM3 { + // 0x3: BTS_LOCKED(Mv,Gv); + // } + // 0x16: decode OPCODE_OP_BOTTOM3 { + // 0x0: CMPXCHG_LOCKED(Mb,Gb); + // 0x1: CMPXCHG_LOCKED(Mv,Gv); + // 0x3: BTR_LOCKED(Mv,Gv); + // } + // 0x17: decode OPCODE_OP_BOTTOM3 { + // 0x2: decode MODRM_REG { + // 0x5: BTS_LOCKED(Mv,Ib); + // 0x6: BTR_LOCKED(Mv,Ib); + // 0x7: BTC_LOCKED(Mv,Ib); + // } + // 0x3: BTC_LOCKED(Mv,Gv); + // } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: XADD_LOCK_ELE(Mb,Gb); + 0x1: XADD_LOCK_ELE(Mv,Gv); + //0x7: group9(); + // 0x7: decode MODRM_REG { + // //Also CMPXCHG16B + // 0x1: CMPXCHG8B_LOCKED(Mdp); + // } + default: M5InternalError::error( + {{"Unimplemented Elesion end!"}}); + } + default: M5InternalError::error( + {{"Unimplemented Elesion end!"}}); + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in two byte opcode!"}}); + } + } + } +} diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_els_opcodes.isa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/arch/x86/isa/decoder/locked_els_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,183 @@ +// Copyright (c) 2009 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode opcodes with the lock prefix. Opcodes which shouldn't have that +// prefix should effectively decode to UD2. +// + +// All the memory references in these instructions happen through modrm bytes. +// We therefore only need to make sure the modrm byte encodes a memory +// reference to make sure these are the memory forms of these instructions. +0x1: decode MODRM_MOD { + format Inst { + 0x3: UD2(); + default: decode OPCODE_NUM { + 0x1: decode OPCODE_OP_TOP5 { + // 0x00: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x01: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x02: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x03: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x04: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x05: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x06: decode OPCODE_OP_BOTTOM3 { + // default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3, + // [Mb,Gb], [Mv,Gv]); + // } + // 0x10: decode OPCODE_OP_BOTTOM3 { + // 0x0: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mb,Ib); + // 0x1: OR_LOCKED(Mb,Ib); + // 0x2: ADC_LOCKED(Mb,Ib); + // 0x3: SBB_LOCKED(Mb,Ib); + // 0x4: AND_LOCKED(Mb,Ib); + // 0x5: SUB_LOCKED(Mb,Ib); + // 0x6: XOR_LOCKED(Mb,Ib); + // } + // 0x1: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mv,Iz); + // 0x1: OR_LOCKED(Mv,Iz); + // 0x2: ADC_LOCKED(Mv,Iz); + // 0x3: SBB_LOCKED(Mv,Iz); + // 0x4: AND_LOCKED(Mv,Iz); + // 0x5: SUB_LOCKED(Mv,Iz); + // 0x6: XOR_LOCKED(Mv,Iz); + // } + // 0x2: decode MODE_SUBMODE { + // 0x0: UD2(); + // default: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mb,Ib); + // 0x1: OR_LOCKED(Mb,Ib); + // 0x2: ADC_LOCKED(Mb,Ib); + // 0x3: SBB_LOCKED(Mb,Ib); + // 0x4: AND_LOCKED(Mb,Ib); + // 0x5: SUB_LOCKED(Mb,Ib); + // 0x6: XOR_LOCKED(Mb,Ib); + // } + // } + // //0x3: group1_Ev_Ib(); + // 0x3: decode MODRM_REG { + // 0x0: ADD_LOCKED(Mv,Ib); + // 0x1: OR_LOCKED(Mv,Ib); + // 0x2: ADC_LOCKED(Mv,Ib); + // 0x3: SBB_LOCKED(Mv,Ib); + // 0x4: AND_LOCKED(Mv,Ib); + // 0x5: SUB_LOCKED(Mv,Ib); + // 0x6: XOR_LOCKED(Mv,Ib); + // } + // 0x6: XCHG_LOCKED(Mb,Gb); + // 0x7: XCHG_LOCKED(Mv,Gv); + // } + // 0x1E: decode OPCODE_OP_BOTTOM3 { + // //0x6: group3_Eb(); + // 0x6: decode MODRM_REG { + // 0x2: NOT_LOCKED(Mb); + // 0x3: NEG_LOCKED(Mb); + // } + // //0x7: group3_Ev(); + // 0x7: decode MODRM_REG { + // 0x2: NOT_LOCKED(Mv); + // 0x3: NEG_LOCKED(Mv); + // } + // } + // 0x1F: decode OPCODE_OP_BOTTOM3 { + // 0x6: decode MODRM_REG { + // 0x0: INC_LOCKED(Mb); + // 0x1: DEC_LOCKED(Mb); + // default: UD2(); + // } + // //0x7: group5(); + // 0x7: decode MODRM_REG { + // 0x0: INC_LOCKED(Mv); + // 0x1: DEC_LOCKED(Mv); + // } + // } + default: M5InternalError::error( + {{"Unimplemented Elesion start!"}}); + } + 0x2: decode OPCODE_PREFIXA { + 0x0F: decode OPCODE_OP_TOP5 { + // 0x04: decode OPCODE_OP_BOTTOM3 { + // 0x0: WarnUnimpl::mov_Rd_CR8D(); + // 0x2: WarnUnimpl::mov_CR8D_Rd(); + // } + // 0x15: decode OPCODE_OP_BOTTOM3 { + // 0x3: BTS_LOCKED(Mv,Gv); + // } + // 0x16: decode OPCODE_OP_BOTTOM3 { + // 0x0: CMPXCHG_LOCKED(Mb,Gb); + // 0x1: CMPXCHG_LOCKED(Mv,Gv); + // 0x3: BTR_LOCKED(Mv,Gv); + // } + // 0x17: decode OPCODE_OP_BOTTOM3 { + // 0x2: decode MODRM_REG { + // 0x5: BTS_LOCKED(Mv,Ib); + // 0x6: BTR_LOCKED(Mv,Ib); + // 0x7: BTC_LOCKED(Mv,Ib); + // } + // 0x3: BTC_LOCKED(Mv,Gv); + // } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: XADD_LOCK_ELB(Mb,Gb); + 0x1: XADD_LOCK_ELB(Mv,Gv); + //0x7: group9(); + // 0x7: decode MODRM_REG { + // //Also CMPXCHG16B + // 0x1: CMPXCHG8B_LOCKED(Mdp); + // } + default: M5InternalError::error( + {{"Unimplemented Elesion start!"}}); + } + default: M5InternalError::error( + {{"Unimplemented Elesion start!"}}); + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in two byte opcode!"}}); + } + } + } +} diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_nel_opcodes.isa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/arch/x86/isa/decoder/locked_nel_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,177 @@ +// Copyright (c) 2009 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode opcodes with the lock prefix. Opcodes which shouldn't have that +// prefix should effectively decode to UD2. +// + +// All the memory references in these instructions happen through modrm bytes. +// We therefore only need to make sure the modrm byte encodes a memory +// reference to make sure these are the memory forms of these instructions. +0x0: decode MODRM_MOD { + format Inst { + 0x3: UD2(); + default: decode OPCODE_NUM { + 0x1: decode OPCODE_OP_TOP5 { + 0x00: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x01: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x02: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x03: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x04: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x05: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x06: decode OPCODE_OP_BOTTOM3 { + default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3, + [Mb,Gb], [Mv,Gv]); + } + 0x10: decode OPCODE_OP_BOTTOM3 { + 0x0: decode MODRM_REG { + 0x0: ADD_LOCKED(Mb,Ib); + 0x1: OR_LOCKED(Mb,Ib); + 0x2: ADC_LOCKED(Mb,Ib); + 0x3: SBB_LOCKED(Mb,Ib); + 0x4: AND_LOCKED(Mb,Ib); + 0x5: SUB_LOCKED(Mb,Ib); + 0x6: XOR_LOCKED(Mb,Ib); + } + 0x1: decode MODRM_REG { + 0x0: ADD_LOCKED(Mv,Iz); + 0x1: OR_LOCKED(Mv,Iz); + 0x2: ADC_LOCKED(Mv,Iz); + 0x3: SBB_LOCKED(Mv,Iz); + 0x4: AND_LOCKED(Mv,Iz); + 0x5: SUB_LOCKED(Mv,Iz); + 0x6: XOR_LOCKED(Mv,Iz); + } + 0x2: decode MODE_SUBMODE { + 0x0: UD2(); + default: decode MODRM_REG { + 0x0: ADD_LOCKED(Mb,Ib); + 0x1: OR_LOCKED(Mb,Ib); + 0x2: ADC_LOCKED(Mb,Ib); + 0x3: SBB_LOCKED(Mb,Ib); + 0x4: AND_LOCKED(Mb,Ib); + 0x5: SUB_LOCKED(Mb,Ib); + 0x6: XOR_LOCKED(Mb,Ib); + } + } + //0x3: group1_Ev_Ib(); + 0x3: decode MODRM_REG { + 0x0: ADD_LOCKED(Mv,Ib); + 0x1: OR_LOCKED(Mv,Ib); + 0x2: ADC_LOCKED(Mv,Ib); + 0x3: SBB_LOCKED(Mv,Ib); + 0x4: AND_LOCKED(Mv,Ib); + 0x5: SUB_LOCKED(Mv,Ib); + 0x6: XOR_LOCKED(Mv,Ib); + } + 0x6: XCHG_LOCKED(Mb,Gb); + 0x7: XCHG_LOCKED(Mv,Gv); + } + 0x1E: decode OPCODE_OP_BOTTOM3 { + //0x6: group3_Eb(); + 0x6: decode MODRM_REG { + 0x2: NOT_LOCKED(Mb); + 0x3: NEG_LOCKED(Mb); + } + //0x7: group3_Ev(); + 0x7: decode MODRM_REG { + 0x2: NOT_LOCKED(Mv); + 0x3: NEG_LOCKED(Mv); + } + } + 0x1F: decode OPCODE_OP_BOTTOM3 { + 0x6: decode MODRM_REG { + 0x0: INC_LOCKED(Mb); + 0x1: DEC_LOCKED(Mb); + default: UD2(); + } + //0x7: group5(); + 0x7: decode MODRM_REG { + 0x0: INC_LOCKED(Mv); + 0x1: DEC_LOCKED(Mv); + } + } + } + 0x2: decode OPCODE_PREFIXA { + 0x0F: decode OPCODE_OP_TOP5 { + 0x04: decode OPCODE_OP_BOTTOM3 { + 0x0: WarnUnimpl::mov_Rd_CR8D(); + 0x2: WarnUnimpl::mov_CR8D_Rd(); + } + 0x15: decode OPCODE_OP_BOTTOM3 { + 0x3: BTS_LOCKED(Mv,Gv); + } + 0x16: decode OPCODE_OP_BOTTOM3 { + 0x0: CMPXCHG_LOCKED(Mb,Gb); + 0x1: CMPXCHG_LOCKED(Mv,Gv); + 0x3: BTR_LOCKED(Mv,Gv); + } + 0x17: decode OPCODE_OP_BOTTOM3 { + 0x2: decode MODRM_REG { + 0x5: BTS_LOCKED(Mv,Ib); + 0x6: BTR_LOCKED(Mv,Ib); + 0x7: BTC_LOCKED(Mv,Ib); + } + 0x3: BTC_LOCKED(Mv,Gv); + } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: XADD_LOCKED(Mb,Gb); + 0x1: XADD_LOCKED(Mv,Gv); + //0x7: group9(); + 0x7: decode MODRM_REG { + //Also CMPXCHG16B + 0x1: CMPXCHG8B_LOCKED(Mdp); + } + } + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in two byte opcode!"}}); + } + } + } +} diff -r 878f2f30b12d src/arch/x86/isa/decoder/locked_opcodes.isa --- a/src/arch/x86/isa/decoder/locked_opcodes.isa Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/decoder/locked_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -35,143 +35,17 @@ // All the memory references in these instructions happen through modrm bytes. // We therefore only need to make sure the modrm byte encodes a memory // reference to make sure these are the memory forms of these instructions. -0x1: decode MODRM_MOD { - format Inst { - 0x3: UD2(); - default: decode OPCODE_NUM { - 0x1: decode OPCODE_OP_TOP5 { - 0x00: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::ADD_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x01: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::OR_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x02: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::ADC_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x03: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::SBB_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x04: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::AND_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x05: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::SUB_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x06: decode OPCODE_OP_BOTTOM3 { - default: MultiInst::XOR_LOCKED(OPCODE_OP_BOTTOM3, - [Mb,Gb], [Mv,Gv]); - } - 0x10: decode OPCODE_OP_BOTTOM3 { - 0x0: decode MODRM_REG { - 0x0: ADD_LOCKED(Mb,Ib); - 0x1: OR_LOCKED(Mb,Ib); - 0x2: ADC_LOCKED(Mb,Ib); - 0x3: SBB_LOCKED(Mb,Ib); - 0x4: AND_LOCKED(Mb,Ib); - 0x5: SUB_LOCKED(Mb,Ib); - 0x6: XOR_LOCKED(Mb,Ib); - } - 0x1: decode MODRM_REG { - 0x0: ADD_LOCKED(Mv,Iz); - 0x1: OR_LOCKED(Mv,Iz); - 0x2: ADC_LOCKED(Mv,Iz); - 0x3: SBB_LOCKED(Mv,Iz); - 0x4: AND_LOCKED(Mv,Iz); - 0x5: SUB_LOCKED(Mv,Iz); - 0x6: XOR_LOCKED(Mv,Iz); - } - 0x2: decode MODE_SUBMODE { - 0x0: UD2(); - default: decode MODRM_REG { - 0x0: ADD_LOCKED(Mb,Ib); - 0x1: OR_LOCKED(Mb,Ib); - 0x2: ADC_LOCKED(Mb,Ib); - 0x3: SBB_LOCKED(Mb,Ib); - 0x4: AND_LOCKED(Mb,Ib); - 0x5: SUB_LOCKED(Mb,Ib); - 0x6: XOR_LOCKED(Mb,Ib); - } - } - //0x3: group1_Ev_Ib(); - 0x3: decode MODRM_REG { - 0x0: ADD_LOCKED(Mv,Ib); - 0x1: OR_LOCKED(Mv,Ib); - 0x2: ADC_LOCKED(Mv,Ib); - 0x3: SBB_LOCKED(Mv,Ib); - 0x4: AND_LOCKED(Mv,Ib); - 0x5: SUB_LOCKED(Mv,Ib); - 0x6: XOR_LOCKED(Mv,Ib); - } - 0x6: XCHG_LOCKED(Mb,Gb); - 0x7: XCHG_LOCKED(Mv,Gv); - } - 0x1E: decode OPCODE_OP_BOTTOM3 { - //0x6: group3_Eb(); - 0x6: decode MODRM_REG { - 0x2: NOT_LOCKED(Mb); - 0x3: NEG_LOCKED(Mb); - } - //0x7: group3_Ev(); - 0x7: decode MODRM_REG { - 0x2: NOT_LOCKED(Mv); - 0x3: NEG_LOCKED(Mv); - } - } - 0x1F: decode OPCODE_OP_BOTTOM3 { - 0x6: decode MODRM_REG { - 0x0: INC_LOCKED(Mb); - 0x1: DEC_LOCKED(Mb); - default: UD2(); - } - //0x7: group5(); - 0x7: decode MODRM_REG { - 0x0: INC_LOCKED(Mv); - 0x1: DEC_LOCKED(Mv); - } - } - } - 0x2: decode OPCODE_PREFIXA { - 0x0F: decode OPCODE_OP_TOP5 { - 0x04: decode OPCODE_OP_BOTTOM3 { - 0x0: WarnUnimpl::mov_Rd_CR8D(); - 0x2: WarnUnimpl::mov_CR8D_Rd(); - } - 0x15: decode OPCODE_OP_BOTTOM3 { - 0x3: BTS_LOCKED(Mv,Gv); - } - 0x16: decode OPCODE_OP_BOTTOM3 { - 0x0: CMPXCHG_LOCKED(Mb,Gb); - 0x1: CMPXCHG_LOCKED(Mv,Gv); - 0x3: BTR_LOCKED(Mv,Gv); - } - 0x17: decode OPCODE_OP_BOTTOM3 { - 0x2: decode MODRM_REG { - 0x5: BTS_LOCKED(Mv,Ib); - 0x6: BTR_LOCKED(Mv,Ib); - 0x7: BTC_LOCKED(Mv,Ib); - } - 0x3: BTC_LOCKED(Mv,Gv); - } - 0x18: decode OPCODE_OP_BOTTOM3 { - 0x0: XADD_LOCKED(Mb,Gb); - 0x1: XADD_LOCKED(Mv,Gv); - //0x7: group9(); - 0x7: decode MODRM_REG { - //Also CMPXCHG16B - 0x1: CMPXCHG8B_LOCKED(Mdp); - } - } - } - default: M5InternalError::error( - {{"Unexpected first opcode byte in two byte opcode!"}}); - } - } +0x1: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + // non elided lock + ##include "locked_nel_opcodes.isa" + // ellided lock - start transaction + ##include "locked_els_opcodes.isa" + //0x1: M5InternalError::error({{"TxnStart!"}}); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); } -} + // ellided lock - end transaction + ##include "locked_ele_opcodes.isa" + //0x1: M5InternalError::error({{"TxnEnd!"}}); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); +} \ No newline at end of file diff -r 878f2f30b12d src/arch/x86/isa/decoder/one_byte_opcodes.isa --- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -266,12 +266,38 @@ } 0x4: TEST(Eb,Gb); 0x5: TEST(Ev,Gv); - 0x6: XCHG(Eb,Gb); - 0x7: XCHG(Ev,Gv); + 0x6: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + 0x0: XCHG(Eb,Gb); + 0x1: XCHG_ELB(Eb,Gb); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); + } + 0x1: XCHG_ELE(Eb,Gb); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x7: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + 0x0: XCHG(Ev,Gv); + 0x1: XCHG_ELB(Ev,Gv); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); + } + 0x1: XCHG_ELE(Ev,Gv); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } } 0x11: decode OPCODE_OP_BOTTOM3 { - 0x0: MOV(Eb,Gb); - 0x1: MOV(Ev,Gv); + 0x0: decode LEGACY_REP { + 0x0: MOV(Eb,Gb); + default: M5InternalError::error({{"XRelease on MOV!"}}); + //0x1: MOV_ELE(Eb,Gb); + //default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x1: decode LEGACY_REP { + 0x0: MOV(Ev,Gv); + default: M5InternalError::error({{"XRelease on MOV!"}}); + //0x1: MOV_ELE(Ev,Gv); + //default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } 0x2: MOV(Gb,Eb); 0x3: MOV(Gv,Ev); 0x4: decode MODRM_REG { @@ -297,7 +323,15 @@ } 0x12: decode OPCODE_OP_BOTTOM3 { 0x0: NopInst::NOP(); //XXX repe makes this a "pause" - default: XCHG(Bv,rAv); + default: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + 0x0: XCHG(Bv,rAv); + 0x1: XCHG_ELB(Bv,rAv); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); + } + 0x1: XCHG_ELE(Bv,rAv); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } } 0x13: decode OPCODE_OP_BOTTOM3 { 0x0: CDQE(rAv); @@ -335,8 +369,16 @@ 0x6: StringTestInst::SCAS(Yb); 0x7: StringTestInst::SCAS(Yv); } - 0x16: MOV(Bb,Ib); - 0x17: MOV(Bv,Iv); + 0x16: decode LEGACY_REP { + 0x0: MOV(Bb,Ib); + 0x1: MOV_ELE(Bb,Ib); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x17: decode LEGACY_REP { + 0x0: MOV(Bv,Iv); + 0x1: MOV_ELE(Bv,Iv); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } 0x18: decode OPCODE_OP_BOTTOM3 { //0x0: group2_Eb_Ib(); 0x0: decode MODRM_REG { @@ -372,12 +414,22 @@ } //0x6: group12_Eb_Ib(); 0x6: decode MODRM_REG { - 0x0: MOV(Eb,Ib); + 0x0: decode LEGACY_REP { + 0x0: MOV(Eb,Ib); + 0x1: MOV_ELE(Eb,Ib); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x7: XABORT(Ib); default: UD2(); } //0x7: group12_Ev_Iz(); 0x7: decode MODRM_REG { - 0x0: MOV(Ev,Iz); + 0x0: decode LEGACY_REP { + 0x0: MOV(Ev,Iz); + 0x1: MOV_ELE(Ev,Iz); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x7: XBEGIN(Jz); default: UD2(); } } diff -r 878f2f30b12d src/arch/x86/isa/decoder/two_byte_opcodes.isa --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa Fri Jul 04 22:00:13 2014 -0500 @@ -81,6 +81,7 @@ 0x3: decode MODRM_RM { 0x0: xgetbv(); 0x1: xsetbv(); + 0x5: Inst::XEND(); } default: decode MODE_SUBMODE { 0x0: Inst::LGDT(M); @@ -784,8 +785,24 @@ } format Inst { 0x16: decode OPCODE_OP_BOTTOM3 { - 0x0: CMPXCHG(Eb,Gb); - 0x1: CMPXCHG(Ev,Gv); + 0x0: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + 0x0: CMPXCHG(Eb,Gb); + 0x1: CMPXCHG_ELB(Eb,Gb); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); + } + 0x1: CMPXCHG_ELE(Eb,Gb); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } + 0x1: decode LEGACY_REP { + 0x0: decode LEGACY_REPNE { + 0x0: CMPXCHG(Ev,Gv); + 0x1: CMPXCHG_ELB(Ev,Gv); + default: M5InternalError::error({{"REPNE should be 0 or 1!"}}); + } + 0x1: CMPXCHG_ELE(Ev,Gv); + default: M5InternalError::error({{"REP should be 0 or 1!"}}); + } 0x2: WarnUnimpl::lss_Gz_Mp(); 0x3: BTR(Ev,Gv); 0x4: WarnUnimpl::lfs_Gz_Mp(); diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/data_transfer/move.py --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py Fri Jul 04 22:00:13 2014 -0500 @@ -370,8 +370,244 @@ stfp xmml, seg, riprel, disp, dataSize=dsz }; +def macroop XBEGIN_I { + # Make the default data size of pops 64 bits in 64 bit mode + #.adjust_env oszIn64Override + + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + + addi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8 + + mfence + + wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8 + wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8 + + wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8 + wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8 + wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8 + wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8 + wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8 + wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8 + wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8 + wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8 + + rdip t3, dataSize=8 + limm t2, imm, dataSize=8 + + wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8 + wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8 + + xor t1, t1, t1, dataSize=8 + wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8 + + xbegin +end: + fault "NoFault" +}; + +def macroop XABORT_I { + # Make the default data size of pops 64 bits in 64 bit mode + #.adjust_env oszIn64Override + + mfence + xabort + + rdval rax, "InstRegIndex(MISCREG_TRAX)", dataSize=8 + rdval rcx, "InstRegIndex(MISCREG_TRCX)", dataSize=8 + rdval rdx, "InstRegIndex(MISCREG_TRDX)", dataSize=8 + rdval rbx, "InstRegIndex(MISCREG_TRBX)", dataSize=8 + rdval rsp, "InstRegIndex(MISCREG_TRSP)", dataSize=8 + rdval rbp, "InstRegIndex(MISCREG_TRBP)", dataSize=8 + rdval rsi, "InstRegIndex(MISCREG_TRSI)", dataSize=8 + rdval rdi, "InstRegIndex(MISCREG_TRDI)", dataSize=8 + + rdval r8, "InstRegIndex(MISCREG_TR8)", dataSize=8 + rdval r9, "InstRegIndex(MISCREG_TR9)", dataSize=8 + rdval r10, "InstRegIndex(MISCREG_TR10)", dataSize=8 + rdval r11, "InstRegIndex(MISCREG_TR11)", dataSize=8 + rdval r12, "InstRegIndex(MISCREG_TR12)", dataSize=8 + rdval r13, "InstRegIndex(MISCREG_TR13)", dataSize=8 + rdval r14, "InstRegIndex(MISCREG_TR14)", dataSize=8 + rdval r15, "InstRegIndex(MISCREG_TR15)", dataSize=8 + + rdval t3, "InstRegIndex(MISCREG_TRIP)", dataSize=8 + rdval t2, "InstRegIndex(MISCREG_TIMM)", dataSize=8 + + #check if hle - do not write rax if it is + rdval t1, "InstRegIndex(MISCREG_THLE)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("skip_abort_code"), flags=(nCEZF,) + + limm rax, imm, dataSize=8 + br label("end_abort_code") + +skip_abort_code: + # set HLE abort register + limm t1, 1, dataSize=8 + wrval "InstRegIndex(MISCREG_THABRT)", t1, dataSize=8 + +end_abort_code: + wrip t3, t2, dataSize=8 + limm t0, 0, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t0, dataSize=8 +}; + +def rom +{ + extern txnAbortInterrupt: + + mfence + xabort + + rdval rax, "InstRegIndex(MISCREG_TRAX)", dataSize=8 + rdval rcx, "InstRegIndex(MISCREG_TRCX)", dataSize=8 + rdval rdx, "InstRegIndex(MISCREG_TRDX)", dataSize=8 + rdval rbx, "InstRegIndex(MISCREG_TRBX)", dataSize=8 + rdval rsp, "InstRegIndex(MISCREG_TRSP)", dataSize=8 + rdval rbp, "InstRegIndex(MISCREG_TRBP)", dataSize=8 + rdval rsi, "InstRegIndex(MISCREG_TRSI)", dataSize=8 + rdval rdi, "InstRegIndex(MISCREG_TRDI)", dataSize=8 + + rdval r8, "InstRegIndex(MISCREG_TR8)", dataSize=8 + rdval r9, "InstRegIndex(MISCREG_TR9)", dataSize=8 + rdval r10, "InstRegIndex(MISCREG_TR10)", dataSize=8 + rdval r11, "InstRegIndex(MISCREG_TR11)", dataSize=8 + rdval r12, "InstRegIndex(MISCREG_TR12)", dataSize=8 + rdval r13, "InstRegIndex(MISCREG_TR13)", dataSize=8 + rdval r14, "InstRegIndex(MISCREG_TR14)", dataSize=8 + rdval r15, "InstRegIndex(MISCREG_TR15)", dataSize=8 + + rdval t3, "InstRegIndex(MISCREG_TRIP)", dataSize=8 + rdval t2, "InstRegIndex(MISCREG_TIMM)", dataSize=8 + + #check if xbegin by hle - do not write rax if it is + rdval t1, "InstRegIndex(MISCREG_THLE)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br rom_local_label("skip_abort_code"), flags=(nCEZF,) + + rdval rax, "InstRegIndex(MISCREG_TRET)", dataSize=8 + br rom_local_label("end_abort_code") + +skip_abort_code: + # set HLE abort register + limm t1, 1, dataSize=8 + wrval "InstRegIndex(MISCREG_THABRT)", t1, dataSize=8 + +end_abort_code: + wrip t3, t2, dataSize=8 + limm t0, 0, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t0, dataSize=8 + eret +}; + +def macroop XEND { + # Make the default data size of pops 64 bits in 64 bit mode + #.adjust_env oszIn64Override + + mfence + + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + + subi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8 + + and t0, t1, t1, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + + xend + +end: + fault "NoFault" +}; + ''' + +eleCode = ''' +def macroop %(instr)s_ELE_%(suffix)s +{ + %(rdip)s + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("commit"), flags=(nCEZF,) + + # lock was not elided + %(rellock)s + br label("end") + +commit: + %(eldlock)s + mfence + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + subi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + xend + +end: + fault "NoFault" +}; +''' + +################################## +# XRelease implementations +################################## + +microcode += eleCode % { + "instr": "MOV", + "suffix": "M_I", + "rdip": "", + "eldlock": ''' + limm t1, imm + stele t1, seg, sib, disp + ''', + "rellock": ''' + limm t1, imm + st t1, seg, sib, disp + ''' +} + +microcode += eleCode % { + "instr": "MOV", + "suffix": "P_I", + "rdip": "rdip t7", + "eldlock": ''' + limm t1, imm + stele t1, seg, riprel, disp + ''', + "rellock": ''' + limm t1, imm + st t1, seg, riprel, disp + ''' +} + +microcode += eleCode % { + "instr": "MOV", + "suffix": "R_I", + "rdip": "", + "eldlock": ''' + limm reg, imm + ''', + "rellock": ''' + limm reg, imm + ''' +} + +################################## +# End XRelease implementations +################################## + #let {{ # class MOVD(Inst): # "GenFault ${new UnimpInstFault}" -#}}; +#}}; \ No newline at end of file diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py Fri Jul 04 22:00:13 2014 -0500 @@ -105,3 +105,320 @@ mov reg, reg, t1 }; ''' +elbCode = ''' +def macroop %(instr)s_ELB_%(suffix)s +{ + %(rdip)s + mfence + + # check if call after abort + rdval t1, "InstRegIndex(MISCREG_THABRT)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("acqlck"), flags=(nCEZF,) + + # start a transaction + rdval t4, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t4, t4, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + + mfence + + wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8 + wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8 + + wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8 + wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8 + wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8 + wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8 + wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8 + wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8 + wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8 + wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8 + + rdip t3, dataSize=8 + limm t2, -5, dataSize=8 + + wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8 + wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8 + + xor t1, t1, t1, dataSize=8 + addi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8 + + xbegin + +end: + addi t4, t4, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t4, dataSize=8 + %(eldlock)s + br label("end2") + +# if call after abort, get lock +acqlck: + # increment unelided xacquire + rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8 + addi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8 + %(getlock)s + +end2: + fault "NoFault" +}; +''' + +################################## +# XAcquire implementations +################################## + +microcode += elbCode % { + "instr": "XCHG", + "suffix": "R_M", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + stelo t1, seg, sib, disp + stelb reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, sib, disp + stul reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "XCHG", + "suffix": "R_P", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + stelo t1, seg, riprel, disp + stelb reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, riprel, disp + stul reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "XCHG", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + stelo t1, seg, sib, disp + stelb reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, sib, disp + stul reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "XCHG", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + stelo t1, seg, riprel, disp + stelb reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, riprel, disp + stul reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "XCHG", + "suffix": "R_R", + "rdip": "", + "eldlock": ''' + xor reg, reg, regm + xor regm, regm, reg + xor reg, reg, regm + ''', + "getlock": ''' + xor reg, reg, regm + xor regm, regm, reg + xor reg, reg, regm + ''' +} + +################################## +# End XAcquire implementations +################################## + +eleCode = ''' +def macroop %(instr)s_ELE_%(suffix)s +{ + %(rdip)s + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("commit"), flags=(nCEZF,) + + # lock was not elided + %(rellock)s + + #decrement unelided xacquire + rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8 + subi t1, t1, 0x1, flags=(EZF,), dataSize=8 + br label("skip"), flags=(nCEZF,) + + #set abort to 0 if unelided xacquire is 0 + xor t2, t2, t2, dataSize=8 + wrval "InstRegIndex(MISCREG_THABRT)", t2, dataSize=8 + +skip: + wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8 + br label("end") + +commit: + %(eldlock)s + mfence + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + subi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + xend + +end: + fault "NoFault" +}; +''' + +################################## +# XRelease implementations +################################## + +microcode += eleCode % { + "instr": "XCHG", + "suffix": "R_M", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + stele reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, sib, disp + stul reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "XCHG", + "suffix": "R_P", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + stele reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, riprel, disp + stul reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "XCHG", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + stele reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, sib, disp + stul reg, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "XCHG", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + stele reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, riprel, disp + stul reg, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "XCHG", + "suffix": "R_R", + "rdip": "", + "eldlock": ''' + xor reg, reg, regm + xor regm, regm, reg + xor reg, reg, regm + ''', + "rellock": ''' + xor reg, reg, regm + xor regm, regm, reg + xor reg, reg, regm + ''' +} + +################################## +# End XRelease implementations +################################## \ No newline at end of file diff -r 878f2f30b12d src/arch/x86/isa/insts/general_purpose/semaphores.py --- a/src/arch/x86/isa/insts/general_purpose/semaphores.py Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py Fri Jul 04 22:00:13 2014 -0500 @@ -165,6 +165,332 @@ "l": "l", "ul": "ul", "suffix": "LOCKED_P"} +elbCode = ''' +def macroop %(instr)s_ELB_%(suffix)s +{ + %(rdip)s + mfence + + # check if call after abort + rdval t1, "InstRegIndex(MISCREG_THABRT)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("acqlck"), flags=(nCEZF,) + + # start a transaction + rdval t4, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t4, t4, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + + mfence + + wrval "InstRegIndex(MISCREG_TRAX)", rax, dataSize=8 + wrval "InstRegIndex(MISCREG_TRCX)", rcx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDX)", rdx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBX)", rbx, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSP)", rsp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRBP)", rbp, dataSize=8 + wrval "InstRegIndex(MISCREG_TRSI)", rsi, dataSize=8 + wrval "InstRegIndex(MISCREG_TRDI)", rdi, dataSize=8 + + wrval "InstRegIndex(MISCREG_TR8)", r8, dataSize=8 + wrval "InstRegIndex(MISCREG_TR9)", r9, dataSize=8 + wrval "InstRegIndex(MISCREG_TR10)", r10, dataSize=8 + wrval "InstRegIndex(MISCREG_TR11)", r11, dataSize=8 + wrval "InstRegIndex(MISCREG_TR12)", r12, dataSize=8 + wrval "InstRegIndex(MISCREG_TR13)", r13, dataSize=8 + wrval "InstRegIndex(MISCREG_TR14)", r14, dataSize=8 + wrval "InstRegIndex(MISCREG_TR15)", r15, dataSize=8 + + rdip t3, dataSize=8 + limm t2, -5, dataSize=8 + + wrval "InstRegIndex(MISCREG_TRIP)", t3, dataSize=8 + wrval "InstRegIndex(MISCREG_TIMM)", t2, dataSize=8 + + xor t1, t1, t1, dataSize=8 + addi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_THLE)", t1, dataSize=8 + + xbegin + +end: + addi t4, t4, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t4, dataSize=8 + %(eldlock)s + br label("end2") + +# if call after abort, get lock +acqlck: + # increment unelided xacquire + rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8 + addi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8 + %(getlock)s + +end2: + fault "NoFault" +}; +''' + +################################## +# XAcquire implementations +################################## + +microcode += elbCode % { + "instr": "XADD_LOCK", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stelo t1, seg, sib, disp + stelb t2, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, sib, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stul t2, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "XADD_LOCK", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stelo t1, seg, riprel, disp + stelb t2, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "getlock": ''' + mfence + ldstl t1, seg, riprel, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stul t2, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += elbCode % { + "instr": "CMPXCHG", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + ld t1, seg, sib, disp + stelo t1, seg, sib, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + stelb t1, seg, sib, disp + mov rax, rax, t1, flags=(nCZF,) + ''', + "getlock": ''' + ldst t1, seg, sib, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + st t1, seg, sib, disp + mov rax, rax, t1, flags=(nCZF,) + ''' +} + +microcode += elbCode % { + "instr": "CMPXCHG", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + ld t1, seg, riprel, disp + stelo t1, seg, riprel, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + stelb t1, seg, riprel, disp + mov rax, rax, t1, flags=(nCZF,) + ''', + "getlock": ''' + ldst t1, seg, riprel, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + st t1, seg, riprel, disp + mov rax, rax, t1, flags=(nCZF,) + ''' +} + +microcode += elbCode % { + "instr": "CMPXCHG", + "suffix": "R_R", + "rdip": "", + "eldlock": ''' + sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF) + mov reg, reg, regm, flags=(CZF,) + mov rax, rax, reg, flags=(nCZF,) + ''', + "getlock": ''' + sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF) + mov reg, reg, regm, flags=(CZF,) + mov rax, rax, reg, flags=(nCZF,) + ''' +} + +################################## +# End XAcquire implementations +################################## + +eleCode = ''' +def macroop %(instr)s_ELE_%(suffix)s +{ + %(rdip)s + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("commit"), flags=(nCEZF,) + + # lock was not elided + %(rellock)s + + #decrement unelided xacquire + rdval t1, "InstRegIndex(MISCREG_TNELXQ)", dataSize=8 + subi t1, t1, 0x1, flags=(EZF,), dataSize=8 + br label("skip"), flags=(nCEZF,) + + #set abort to 0 if unelided xacquire is 0 + xor t2, t2, t2, dataSize=8 + wrval "InstRegIndex(MISCREG_THABRT)", t2, dataSize=8 + +skip: + wrval "InstRegIndex(MISCREG_TNELXQ)", t1, dataSize=8 + br label("end") + +commit: + %(eldlock)s + mfence + rdval t1, "InstRegIndex(MISCREG_TDEPTH)", dataSize=8 + subi t1, t1, 0x1, dataSize=8 + wrval "InstRegIndex(MISCREG_TDEPTH)", t1, dataSize=8 + addi t1, t1, 0x0, flags=(EZF,), dataSize=8 + br label("end"), flags=(nCEZF,) + xend + +end: + fault "NoFault" +}; +''' + +################################## +# XRelease implementations +################################## + +microcode += eleCode % { + "instr": "XADD_LOCK", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + mfence + ld t1, seg, sib, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stele t2, seg, sib, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, sib, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stul t2, seg, sib, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "XADD_LOCK", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + mfence + ld t1, seg, riprel, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stele t2, seg, riprel, disp + mfence + mov reg, reg, t1 + ''', + "rellock": ''' + mfence + ldstl t1, seg, riprel, disp + add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) + stul t2, seg, riprel, disp + mfence + mov reg, reg, t1 + ''' +} + +microcode += eleCode % { + "instr": "CMPXCHG", + "suffix": "M_R", + "rdip": "", + "eldlock": ''' + ld t1, seg, sib, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + stele t1, seg, sib, disp + mov rax, rax, t1, flags=(nCZF,) + ''', + "rellock": ''' + ldst t1, seg, sib, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + st t1, seg, sib, disp + mov rax, rax, t1, flags=(nCZF,) + ''' +} + +microcode += eleCode % { + "instr": "CMPXCHG", + "suffix": "P_R", + "rdip": "rdip t7", + "eldlock": ''' + ld t1, seg, riprel, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + stele t1, seg, riprel, disp + mov rax, rax, t1, flags=(nCZF,) + ''', + "rellock": ''' + ldst t1, seg, riprel, disp + sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) + mov t1, t1, reg, flags=(CZF,) + st t1, seg, riprel, disp + mov rax, rax, t1, flags=(nCZF,) + ''' +} + +microcode += eleCode % { + "instr": "CMPXCHG", + "suffix": "R_R", + "rdip": "", + "eldlock": ''' + sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF) + mov reg, reg, regm, flags=(CZF,) + mov rax, rax, reg, flags=(nCZF,) + ''', + "rellock": ''' + sub t0, rax, reg, flags=(OF, SF, ZF, AF, PF, CF) + mov reg, reg, regm, flags=(CZF,) + mov rax, rax, reg, flags=(nCZF,) + ''' +} + +################################## +# End XRelease implementations +################################## + #let {{ # class XCHG(Inst): # "GenFault ${new UnimpInstFault}" diff -r 878f2f30b12d src/arch/x86/isa/microops/ldstop.isa --- a/src/arch/x86/isa/microops/ldstop.isa Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/microops/ldstop.isa Fri Jul 04 22:00:13 2014 -0500 @@ -273,7 +273,8 @@ let {{ class LdStOp(X86Microop): def __init__(self, data, segment, addr, disp, - dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec): + dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, + el_beg, el_end, el_orig): self.data = data [self.scale, self.index, self.base] = addr self.disp = disp @@ -289,6 +290,12 @@ self.instFlags += " | (1ULL << StaticInst::IsDataPrefetch)" if nonSpec: self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)" + if el_beg: + self.instFlags += " | (1ULL << StaticInst::IsElisionBegin)" + if el_end: + self.instFlags += " | (1ULL << StaticInst::IsElisionEnd)" + if el_orig: + self.instFlags += " | (1ULL << StaticInst::IsElisionOrig)" self.memFlags += " | (machInst.legacy.addr ? " + \ "(AddrSizeFlagBit << FlagShift) : 0)" @@ -309,7 +316,8 @@ class BigLdStOp(X86Microop): def __init__(self, data, segment, addr, disp, - dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec): + dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, + el_beg=False, el_end=False, el_orig=False): self.data = data [self.scale, self.index, self.base] = addr self.disp = disp @@ -325,6 +333,12 @@ self.instFlags += " | (1ULL << StaticInst::IsDataPrefetch)" if nonSpec: self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)" + if el_beg: + self.instFlags += " | (1ULL << StaticInst::IsElisionBegin)" + if el_end: + self.instFlags += " | (1ULL << StaticInst::IsElisionEnd)" + if el_orig: + self.instFlags += " | (1ULL << StaticInst::IsElisionOrig)" self.memFlags += " | (machInst.legacy.addr ? " + \ "(AddrSizeFlagBit << FlagShift) : 0)" @@ -396,7 +410,7 @@ atCPL0=False, prefetch=False, nonSpec=False): super(LoadOp, self).__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, - atCPL0, prefetch, nonSpec) + atCPL0, prefetch, nonSpec, False, False, False) self.className = Name self.mnemonic = name @@ -427,7 +441,8 @@ } ''', big = False) - def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0"): + def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0", + el_beg=False, el_end=False, el_orig=False): global header_output global decoder_output global exec_output @@ -453,7 +468,7 @@ atCPL0=False, nonSpec=False): super(StoreOp, self).__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, atCPL0, False, - nonSpec) + nonSpec, el_beg, el_end, el_orig) self.className = Name self.mnemonic = name @@ -463,6 +478,13 @@ defineMicroStoreOp('Stul', 'Mem = pick(Data, 2, dataSize);', mem_flags="Request::LOCKED") + defineMicroStoreOp('StElB', 'Mem = pick(Data, 2, dataSize);', + mem_flags="Request::XELIDE", el_beg=True) + defineMicroStoreOp('StElE', 'Mem = pick(Data, 2, dataSize);', + mem_flags="Request::XELIDE", el_end=True) + defineMicroStoreOp('StElO', 'Mem = pick(Data, 2, dataSize);', + mem_flags="Request::XELIDE", el_orig=True) + defineMicroStoreOp('Stfp', code='Mem = FpData_uqw;') defineMicroStoreOp('Stfp87', code=''' @@ -495,7 +517,8 @@ def __init__(self, data, segment, addr, disp = 0, dataSize="env.dataSize", addressSize="env.addressSize"): super(LeaOp, self).__init__(data, segment, addr, disp, - dataSize, addressSize, "0", False, False, False) + dataSize, addressSize, "0", False, False, False, + False, False, False) self.className = "Lea" self.mnemonic = "lea" @@ -515,7 +538,7 @@ addressSize="env.addressSize"): super(TiaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment, addr, disp, dataSize, addressSize, "0", False, False, - False) + False, False, False, False) self.className = "Tia" self.mnemonic = "tia" @@ -527,7 +550,7 @@ addressSize="env.addressSize", atCPL0=False): super(CdaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment, addr, disp, dataSize, addressSize, "Request::NO_ACCESS", - atCPL0, False, False) + atCPL0, False, False, False, False, False) self.className = "Cda" self.mnemonic = "cda" diff -r 878f2f30b12d src/arch/x86/isa/microops/microops.isa --- a/src/arch/x86/isa/microops/microops.isa Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/isa/microops/microops.isa Fri Jul 04 22:00:13 2014 -0500 @@ -59,5 +59,8 @@ //Miscellaneous microop definitions ##include "specop.isa" +//Transaction microop definitions +##include "txnop.isa" + //Microops for printing out debug messages through M5 ##include "debug.isa" diff -r 878f2f30b12d src/arch/x86/isa/microops/txnop.isa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/arch/x86/isa/microops/txnop.isa Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,203 @@ +// Copyright (c) 2007-2008 The Hewlett-Packard Development Company +// All rights reserved. +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Copyright (c) 2008 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +////////////////////////////////////////////////////////////////////////// +// +// Txn Microop templates +// +////////////////////////////////////////////////////////////////////////// + + +// Txn templates + +def template MicroTxnExecute {{ + Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Addr EA; + + %(evdec)s; + %(vardec)s; + %(code_exe)s; + + DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); + fault = readMemAtomic(xc, traceData, EA, Mem, dataSize, memFlags); + + return NoFault; + } +}}; + +def template MicroTxnInitiateAcc {{ + Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc, + Trace::InstRecord * traceData) const + { + Fault fault = NoFault; + Addr EA; + + %(evdec)s; + %(vardec)s; + + DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); + fault = readMemAtomic(xc, traceData, EA, Mem, dataSize, memFlags); + + return NoFault; + } +}}; + +def template MicroTxnCompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + CPU_EXEC_CONTEXT * xc, + Trace::InstRecord * traceData) const + { + Fault fault = NoFault; + + %(vardec)s; + + Mem = getMem(pkt, dataSize, traceData); + + %(code_cmp)s; + return NoFault; + } +}}; + +def template MicroTxnOpDeclare {{ + class %(class_name)s : public X86ISA::X86MicroopBase + { + protected: + const Request::FlagsType memFlags; + + public: + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + uint64_t setFlags, + Request::FlagsType _memFlags); + + %(BasicExecDeclare)s + + %(InitiateAccDeclare)s + + %(CompleteAccDeclare)s + }; +}}; + +def template MicroTxnOpConstructor {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, uint64_t setFlags, + Request::FlagsType _memFlags) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, MemReadOp), + memFlags(_memFlags) + { + _numSrcRegs = 0; + _numDestRegs = 0; + _numFPDestRegs = 0; + _numIntDestRegs = 0; + _numCCDestRegs = 0; + flags[IsMemRef] = true; + flags[IsLoad] = true; + _destRegIdx[_numDestRegs++] = (InstRegIndex(MISCREG_TRET)).idx + Misc_Reg_Base; + flags[IsInteger] = true; + } +}}; + +let {{ + + # Make these empty strings so that concatenating onto + # them will always work. + header_output = "" + decoder_output = "" + exec_output = "" + + def defineMicroTxnOp(mnemonic, mem_flags="0"): + global header_output + global decoder_output + global exec_output + global microopClasses + Name = mnemonic + name = mnemonic.lower() + + # Build up the all register version of this micro op + iops = [InstObjParams(name, Name, 'X86ISA::X86MicroopBase', + {"evdec" : 'EA = 0', + "vardec" : ''' + uint8_t dataSize = 8; + uint64_t Mem = 0; + ''', + "code_exe": '', + "code_cmp": ''' + if(fault == NoFault) + { + xc->setMiscRegOperand(this, 0, Mem); + if (traceData) { traceData->setData(Mem); }; + } + '''} + )] + + for iop in iops: + header_output += MicroTxnOpDeclare.subst(iop) + decoder_output += MicroTxnOpConstructor.subst(iop) + exec_output += MicroTxnExecute.subst(iop) + exec_output += MicroTxnInitiateAcc.subst(iop) + exec_output += MicroTxnCompleteAcc.subst(iop) + + class TxnOp(X86Microop): + def __init__(self): + self.className = Name + self.mnemonic = name + self.memFlags = mem_flags + + def getAllocator(self, microFlags): + allocator = '''new %(class_name)s(machInst, macrocodeBlock, + %(flags)s, %(memFlags)s)''' % { + "class_name" : self.className, + "mnemonic" : self.mnemonic, + "flags" : self.microFlagsText(microFlags), + "memFlags" : self.memFlags} + return allocator + + microopClasses[name] = TxnOp + + defineMicroTxnOp('Xbegin', mem_flags="Request::XBEGIN") + + defineMicroTxnOp('Xabort', mem_flags="Request::XABORT") + + defineMicroTxnOp('Xend', mem_flags="Request::XEND") +}}; + diff -r 878f2f30b12d src/arch/x86/linux/syscalls.cc --- a/src/arch/x86/linux/syscalls.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/linux/syscalls.cc Fri Jul 04 22:00:13 2014 -0500 @@ -241,7 +241,7 @@ /* 25 */ SyscallDesc("mremap", mremapFunc), /* 26 */ SyscallDesc("msync", unimplementedFunc), /* 27 */ SyscallDesc("mincore", unimplementedFunc), - /* 28 */ SyscallDesc("madvise", unimplementedFunc), + /* 28 */ SyscallDesc("madvise", ignoreFunc), /* 29 */ SyscallDesc("shmget", unimplementedFunc), /* 30 */ SyscallDesc("shmat", unimplementedFunc), /* 31 */ SyscallDesc("shmctl", unimplementedFunc), @@ -711,7 +711,7 @@ /* 216 */ SyscallDesc("setfsgid32", unimplementedFunc), /* 217 */ SyscallDesc("pivot_root", unimplementedFunc), /* 218 */ SyscallDesc("mincore", unimplementedFunc), - /* 219 */ SyscallDesc("madvise", unimplementedFunc), + /* 219 */ SyscallDesc("madvise", ignoreFunc), /* 220 */ SyscallDesc("madvise1", unimplementedFunc), /* 221 */ SyscallDesc("getdents64", unimplementedFunc), /* 222 */ SyscallDesc("fcntl64", unimplementedFunc), diff -r 878f2f30b12d src/arch/x86/regs/misc.hh --- a/src/arch/x86/regs/misc.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/arch/x86/regs/misc.hh Fri Jul 04 22:00:13 2014 -0500 @@ -396,6 +396,40 @@ // "Fake" MSRs for internally implemented devices MISCREG_PCI_CONFIG_ADDRESS, + // Saving integer registers for transactions + MISCREG_TDEPTH, + MISCREG_TRAX, + MISCREG_TRCX, + MISCREG_TRDX, + MISCREG_TRBX, + MISCREG_TRSP, + MISCREG_TRBP, + MISCREG_TRSI, + MISCREG_TRDI, + + MISCREG_TR8, + MISCREG_TR9, + MISCREG_TR10, + MISCREG_TR11, + MISCREG_TR12, + MISCREG_TR13, + MISCREG_TR14, + MISCREG_TR15, + + MISCREG_TRIP, + MISCREG_TIMM, + + // registers to help maintain state + // used to specify that there was an hle abort + MISCREG_THABRT, + // check if the outermost xbring was an xacquire + MISCREG_THLE, + // keep track of nested xacq depth + // used to stop elision after an abort + MISCREG_TNELXQ, + // implicit return register for txn instructions + MISCREG_TRET, + NUM_MISCREGS }; diff -r 878f2f30b12d src/cpu/StaticInstFlags.py --- a/src/cpu/StaticInstFlags.py Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/StaticInstFlags.py Fri Jul 04 22:00:13 2014 -0500 @@ -108,5 +108,10 @@ 'IsMicroBranch', # This microop branches within the microcode for # a macroop 'IsDspOp', - 'IsSquashAfter' # Squash all uncommitted state after executed + 'IsSquashAfter', # Squash all uncommitted state after executed + + 'IsElisionBegin', # Start eliding the write to a memory location + 'IsElisionEnd', # Stop eliding th write to a memory location + 'IsElisionOrig' # The original value of an elided memory location + ] diff -r 878f2f30b12d src/cpu/base_dyn_inst.hh --- a/src/cpu/base_dyn_inst.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/base_dyn_inst.hh Fri Jul 04 22:00:13 2014 -0500 @@ -553,6 +553,11 @@ bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } bool isMicroBranch() const { return staticInst->isMicroBranch(); } + /* Addiing Elision detection */ + bool isElisionBegin() const { return staticInst->isElisionBegin(); } + bool isElisionEnd() const { return staticInst->isElisionEnd(); } + bool isElisionOrig() const { return staticInst->isElisionOrig(); } + /** Temporarily sets this instruction as a serialize before instruction. */ void setSerializeBefore() { status.set(SerializeBefore); } @@ -897,6 +902,17 @@ req->taskId(cpu->taskId()); + if(req->isTxn()) { + // forcing effective address as valid without translating + // shouldn't be a problem since the address is not used + // downstream + effAddr = 0; + effSize = 0; + instFlags[EffAddrValid] = true; + fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + return fault; + } + // Only split the request if the ISA supports unaligned accesses. if (TheISA::HasUnalignedMemAcc) { splitRequest(req, sreqLow, sreqHigh); diff -r 878f2f30b12d src/cpu/o3/commit_impl.hh --- a/src/cpu/o3/commit_impl.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/o3/commit_impl.hh Fri Jul 04 22:00:13 2014 -0500 @@ -998,6 +998,43 @@ if (interrupt != NoFault) handleInterrupt(); + /* + //////////////////////////////////// + // Handling implicit transaction abort interrupt + // - Used to separate detection and handling + // - Currently not being used since handling and + // detection takes place at the same time + // TODO: + // - Check if handling and detection can be separated + //////////////////////////////////// + + if (!FullSystem && + canHandleInterrupts && + cpu->instList.empty() && + (cpu->tcBase(0))->isTxnImplicitAbort()) + { + DPRINTF(Commit, "Implicit TxnAbort being handled."); + //handle implicit TxnAbort + using namespace TheISA; + ThreadContext *tc = cpu->tcBase(0); + Fault x = new X86TxnAbort(); + x->invoke(tc); + //thread->decoder.reset(); + tc->setTxnImplicitAbort(false); + } + else if(!FullSystem && + (cpu->tcBase(0))->isTxnImplicitAbort()) { + DPRINTF(Commit, "Implicit TxnAbort pending: instruction is %s" + "in flight, ROB is %sempty\n", + canHandleInterrupts ? "not " : "", + cpu->instList.empty() ? "" : "not " ); + } + + //////////////////////////////////// + // end implicit TxnAbort handling + //////////////////////////////////// +*/ + int commit_thread = getCommittingThread(); if (commit_thread == -1 || !rob->isHeadReady(commit_thread)) @@ -1183,6 +1220,17 @@ // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); + // if txn abort, we are going to ignore all faults (since they + // may have been caused cue to aborted memory accesses and their + // effects will anyway be wiped out by abort) + ThreadContext *tc = cpu->tcBase(0); + if (tc->isTxnImplicitAbort() && inst_fault != NoFault) + { + DPRINTF(Commit, "Inst [sn:%lli] PC %s removing fault\n", + head_inst->seqNum, head_inst->pcState()); + inst_fault = NoFault; + } + // Stores mark themselves as completed. if (!head_inst->isStore() && inst_fault == NoFault) { head_inst->setCompleted(); diff -r 878f2f30b12d src/cpu/o3/fetch_impl.hh --- a/src/cpu/o3/fetch_impl.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/o3/fetch_impl.hh Fri Jul 04 22:00:13 2014 -0500 @@ -378,6 +378,51 @@ DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); assert(!cpu->switchedOut()); + ///////////////////////////// + // handle implicit abort in instruction cache access + ///////////////////////////// + + bool impAbort = false; + + if (pkt && (pkt->cmd == MemCmd::TxnAbortResp)) + { + impAbort = true; + ThreadContext *tc = cpu->tcBase(0); + + if (pkt->req->isXabort()) + { + // There should never be a XAbort request to ICache + panic("XAbort request in ICache! Not expected!\n"); + } + else + { + if (tc->isTxnImplicitAbort()) { + DPRINTF(Fetch, "Consequent Abort in ICache\n"); + } + else { + DPRINTF(Fetch, "Implicit Abort in ICache\n"); + // the first one to see the abort sets the implicitAbort + // flag and says that abort hasn't been called yet + tc->setTxnImplicitAbort(true); + tc->setTxnAbortCalled(false); + } + + if(!interruptPending && !tc->isTxnAbortCalled()) { + tc->setTxnAbortCalled(true); + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + } + else if(interruptPending && !tc->isTxnAbortCalled()) { + DPRINTF(Fetch, "Deferring abort invoke because of pending interrupt.\n"); + } + } + } + + ///////////////////////////// + // end handling implicit abort in instruction cache access + ///////////////////////////// + // Only change the status if it's still waiting on the icache access // to return. if (fetchStatus[tid] != IcacheWaitResponse || @@ -388,8 +433,10 @@ return; } - memcpy(fetchBuffer[tid], pkt->getPtr(), fetchBufferSize); - fetchBufferValid[tid] = true; + if(!impAbort) { + memcpy(fetchBuffer[tid], pkt->getPtr(), fetchBufferSize); + fetchBufferValid[tid] = true; + } // Wake up the CPU (if it went to sleep and was waiting on // this completion event). diff -r 878f2f30b12d src/cpu/o3/inst_queue_impl.hh --- a/src/cpu/o3/inst_queue_impl.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/o3/inst_queue_impl.hh Fri Jul 04 22:00:13 2014 -0500 @@ -1151,8 +1151,9 @@ } if (!squashed_inst->isIssued() || - (squashed_inst->isMemRef() && - !squashed_inst->memOpDone())) { + ((squashed_inst->isMemRef() || squashed_inst->isMemBarrier() || + squashed_inst->isWriteBarrier()) && + !squashed_inst->memOpDone())) { DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n", tid, squashed_inst->seqNum, squashed_inst->pcState()); @@ -1202,11 +1203,14 @@ // nonSpecInsts already when they are ready, and so we // cannot always expect to find them if (ns_inst_it == nonSpecInsts.end()) { + ThreadContext *tc = cpu->tcBase(0); + // loads that became ready but stalled on a // blocked cache are alreayd removed from // nonSpecInsts, and have not faulted assert(squashed_inst->getFault() != NoFault || - squashed_inst->isMemRef()); + squashed_inst->isMemRef() || + tc->isTxnImplicitAbort()); } else { (*ns_inst_it).second = NULL; diff -r 878f2f30b12d src/cpu/o3/lsq_unit.hh --- a/src/cpu/o3/lsq_unit.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/o3/lsq_unit.hh Fri Jul 04 22:00:13 2014 -0500 @@ -156,6 +156,9 @@ /** Writes back stores. */ void writebackStores(); + /** Checks for implicit abort in the data access response. */ + bool checkImplicitAbort(PacketPtr pkt, DynInstPtr &inst, const char* rw); + /** Completes the data access that has been returned from the * memory system. */ void completeDataAccess(PacketPtr pkt); @@ -274,6 +277,9 @@ /** Decrements the given load index (circular queue). */ inline void decrLdIdx(int &load_idx) const; + /** Checking if the instruction is an eliding store. */ + void checkStoreElision(int storeWBIdx); + public: /** Debugging function to dump instructions in the LSQ. */ void dumpInsts() const; @@ -405,6 +411,10 @@ /** The load queue. */ std::vector loadQueue; + /** The elision buffers - modelled as CAM */ + std::map elsBuffOrig; + std::map elsBuffCurr; + /** The number of LQ entries, plus a sentinel entry (circular queue). * @todo: Consider having var that records the true number of LQ entries. */ @@ -660,6 +670,41 @@ return NoFault; } + // check in elision buffer before looking for store forwarding + // currently only does full match - probably what is required + Addr a = req->getVaddr(); + if(elsBuffCurr.find(a) != elsBuffCurr.end()) { + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + // memset required? maybe not + memcpy(load_inst->memData, elsBuffCurr[a], req->getSize()); + + DPRINTF(LSQUnit, "Forwarding from elision buffer to load to " + "addr %#x, data %#x\n", req->getVaddr(), data); + + PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq); + data_pkt->dataStatic(load_inst->memData); + + WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + cpu->schedule(wb, curTick()); + + // Don't need to do anything special for split loads. + if (TheISA::HasUnalignedMemAcc && sreqLow) { + delete sreqLow; + delete sreqHigh; + } + + ++lsqForwLoads; + return NoFault; + } + + // end elision forwarding + while (store_idx != -1) { // End once we've reached the top of the LSQ if (store_idx == storeWBIdx) { @@ -798,7 +843,17 @@ bool completedFirst = false; if (!lsq->cacheBlocked()) { MemCmd command = - req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq; + req->isLLSC() ? MemCmd::LoadLockedReq : + req->isXbegin() ? MemCmd::TxnStartReq : + req->isXabort() ? MemCmd::TxnAbortReq : + req->isXend() ? MemCmd::TxnCommitReq : MemCmd::ReadReq; + + // if abort clear the two elision buffers + if(req->isXabort()) { + elsBuffOrig.clear(); + elsBuffCurr.clear(); + } + PacketPtr data_pkt = new Packet(req, command); PacketPtr fst_data_pkt = NULL; PacketPtr snd_data_pkt = NULL; diff -r 878f2f30b12d src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/o3/lsq_unit_impl.hh Fri Jul 04 22:00:13 2014 -0500 @@ -90,6 +90,73 @@ return "Store writeback"; } +//////////////////////////////////// +// Handling implicit transaction abort interrupt +//////////////////////////////////// + +template +bool +LSQUnit::checkImplicitAbort(PacketPtr pkt, DynInstPtr &inst, const char* rw) +{ + if (pkt->cmd == MemCmd::TxnAbortResp) { + ThreadContext *tc = cpu->tcBase(0); + + if (pkt->req->isXabort()) + { + DPRINTF(LSQUnit, "Clearing Implicit abort %s: sn[%lli]\n", + rw, inst->seqNum); + + // Explicit abort has been seen - set implicitAbort flag + // to false + // This has to be actually set during commit of the + // abort instruction. Think about how. + tc->setTxnImplicitAbort(false); + } + else + { + if (tc->isTxnImplicitAbort()) { + DPRINTF(LSQUnit, "Consequent Abort in Data %s: sn[%lli]\n", + rw, inst->seqNum); + } + else { + DPRINTF(LSQUnit, "Implicit Abort in Data %s: sn[%lli]\n", + rw, inst->seqNum); + + // the first one to see the abort sets the implicitAbort + // flag and says that abort hasn't been called yet + tc->setTxnImplicitAbort(true); + tc->setTxnAbortCalled(true); + + // BRILIANT! + // why not let fetch handle all implicit aborts? + // if() { + + // How should the implicit abort be handled? + // - as an interrupt? (no flush) - CURRENT + // - combination of misprediction and interrupt (flush) + + // tc->setTxnImplicitAbort(true); + // tc->setTxnAbortCalled(false); + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + // } + // else { + // DPRINTF(IEW, "Deferring abort invoke because of pending interrupt.\n"); + // } + } + } + + return true; + } + + return false; +} + +//////////////////////////////////// +// end implicit TxnAbort handling +//////////////////////////////////// + template void LSQUnit::completeDataAccess(PacketPtr pkt) @@ -122,6 +189,9 @@ } if (inst->isStore()) { + // check if there was an implicit abort + checkImplicitAbort(pkt, inst, "Write"); + completeStore(state->idx); } } @@ -774,6 +844,100 @@ template void +LSQUnit::checkStoreElision(int storeWBIdx) +{ + if(storeQueue[storeWBIdx].inst->isElisionOrig() || + storeQueue[storeWBIdx].inst->isElisionBegin() || + storeQueue[storeWBIdx].inst->isElisionEnd() + ) + { + //panic("To handle: Elision instructions.\n"); + ThreadContext *tc = cpu->tcBase(0); + + // on orig, add address and data to elsBuffOrig + if(storeQueue[storeWBIdx].inst->isElisionOrig()) { + Addr a = storeQueue[storeWBIdx].req->getVaddr(); + char *d = new char[16]; + memcpy(d, storeQueue[storeWBIdx].data, 16); + + if(elsBuffOrig.find(a) == elsBuffOrig.end()) { + DPRINTF(LSQUnit, "Adding to elsBuffOrig: " + "Addr: %#x, Data:'%s'\n", a, d); + elsBuffOrig[a] = d; + } + else { + // eliding already elided address - abort + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + } + } + + // on begin, add address and data to elsBuffCurr + // verify that address is in orig, else abort + if(storeQueue[storeWBIdx].inst->isElisionBegin()) { + Addr a = storeQueue[storeWBIdx].req->getVaddr(); + char *d = new char[16]; + memcpy(d, storeQueue[storeWBIdx].data, 16*sizeof(char)); + + if(elsBuffOrig.find(a) == elsBuffOrig.end()) { + DPRINTF(LSQUnit, "Addr: %#x not found in " + "elsBuffOrig. Aborting\n", a); + + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + } + else { + DPRINTF(LSQUnit, "Adding to elsBuffCurr: " + "Addr: %#x, Data:'%s'\n", a, d); + elsBuffCurr[a] = d; + } + } + + // on end, remove address from elisionBuffer(s) + // if not present, or data different from orig, abort + if(storeQueue[storeWBIdx].inst->isElisionEnd()) { + Addr a = storeQueue[storeWBIdx].req->getVaddr(); + + // checking on orig for address + if(elsBuffOrig.find(a) == elsBuffOrig.end()) { + DPRINTF(LSQUnit, "Addr: %#x not found in " + "elsBuffOrig. Aborting\n", a); + + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + } + else { + char *d1 = storeQueue[storeWBIdx].data; + char *d2 = elsBuffOrig[a]; + if(memcmp(d1, d2, 16*sizeof(char)) == 0) { + DPRINTF(LSQUnit, "Removing Addr: %#x from " + "elision buffers\n", a); + elsBuffCurr.erase(a); + elsBuffOrig.erase(a); + } + else { + DPRINTF(LSQUnit, "Addr: %#x Data: '%s' not found " + "in elsBuffOrig ('%s'). Aborting\n", a, d1, d2); + + using namespace TheISA; + Fault x = new X86TxnAbort(); + x->invoke(tc); + } + + } + } + + // set size to 0 so that the instruction doesnot write to + // cache and completes + storeQueue[storeWBIdx].size = 0; + } +} + +template +void LSQUnit::writebackStores() { // First writeback the second packet from any split store that didn't @@ -795,6 +959,8 @@ break; } + checkStoreElision(storeWBIdx); + // Store didn't write any data so no need to write it back to // memory. if (storeQueue[storeWBIdx].size == 0) { @@ -1139,8 +1305,12 @@ if (!inst->isExecuted()) { inst->setExecuted(); - // Complete access to copy data to proper place. - inst->completeAcc(pkt); + // check if there was an implicit abort and set instruction fault + bool isImplicitAbort = checkImplicitAbort(pkt, inst, "Read"); + if (!isImplicitAbort) { + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } } // Need to insert instruction into queue to commit diff -r 878f2f30b12d src/cpu/static_inst.hh --- a/src/cpu/static_inst.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/static_inst.hh Fri Jul 04 22:00:13 2014 -0500 @@ -179,6 +179,10 @@ bool isMicroBranch() const { return flags[IsMicroBranch]; } //@} + bool isElisionBegin() const { return flags[IsElisionBegin]; } + bool isElisionEnd() const { return flags[IsElisionEnd]; } + bool isElisionOrig() const { return flags[IsElisionOrig]; } + void setLastMicroop() { flags[IsLastMicroop] = true; } void setDelayedCommit() { flags[IsDelayedCommit] = true; } void setFlag(Flags f) { flags[f] = true; } diff -r 878f2f30b12d src/cpu/thread_context.hh --- a/src/cpu/thread_context.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/cpu/thread_context.hh Fri Jul 04 22:00:13 2014 -0500 @@ -99,6 +99,9 @@ typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::CCReg CCReg; typedef TheISA::MiscReg MiscReg; + bool isAbort; + bool abortCalled; + public: enum Status @@ -297,6 +300,22 @@ virtual void setCCRegFlat(int idx, CCReg val) = 0; /** @} */ + /* Transaction functions */ + bool isTxnImplicitAbort() { + return isAbort; + } + + void setTxnImplicitAbort(bool aborted) { + isAbort = aborted; + } + + bool isTxnAbortCalled() { + return abortCalled; + } + + void setTxnAbortCalled(bool called) { + abortCalled = called; + } }; /** @@ -314,7 +333,7 @@ { public: ProxyThreadContext(TC *actual_tc) - { actualTC = actual_tc; } + { actualTC = actual_tc; this->setTxnImplicitAbort(false); } private: TC *actualTC; diff -r 878f2f30b12d src/mem/packet.hh --- a/src/mem/packet.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/packet.hh Fri Jul 04 22:00:13 2014 -0500 @@ -128,6 +128,13 @@ PrintReq, // Print state matching address FlushReq, //request for a cache flush InvalidationReq, // request for address to be invalidated from lsq + // Transaction commands + TxnStartReq, // request to start a transaction + TxnStartResp, + TxnCommitReq, // request to commit a transaction + TxnCommitResp, + TxnAbortReq, // request to abort a transaction (explicit) + TxnAbortResp, // response to transaction abort (implicit/explicit) NUM_MEM_CMDS }; @@ -616,6 +623,11 @@ flags.set(VALID_ADDR); _isSecure = req->isSecure(); } + + if (req->isTxn()) { + flags.set(VALID_ADDR); + } + if (req->hasSize()) { size = req->getSize(); flags.set(VALID_SIZE); @@ -732,6 +744,35 @@ clearSrc(); } + /** + * Take a request packet and modify it in place to be suitable for + * returning as a response to that request. The source field is + * turned into the destination, and subsequently cleared. Note + * that the latter is not necessary for atomic requests, but + * causes no harm as neither field is valid. + */ + void + makeTxnResponse(bool isImplicitAbort) + { + assert(needsResponse()); + assert(isRequest()); + origCmd = cmd; + + if(isImplicitAbort) { + cmd = MemCmd::TxnAbortResp; + } + else { + cmd = cmd.responseCommand(); + } + + // responses are never express, even if the snoop that + // triggered them was + flags.clear(EXPRESS_SNOOP); + + dest = src; + clearSrc(); + } + void makeAtomicResponse() { diff -r 878f2f30b12d src/mem/packet.cc --- a/src/mem/packet.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/packet.cc Fri Jul 04 22:00:13 2014 -0500 @@ -169,6 +169,18 @@ /* Invalidation Request */ { SET3(NeedsExclusive, IsInvalidate, IsRequest), InvalidCmd, "InvalidationReq" }, + /* TxnStartReq */ + { SET3(IsRead, IsRequest, NeedsResponse), TxnStartResp, "TxnStartReq" }, + /* TxnStartResp */ + { SET2(IsRead, IsResponse), InvalidCmd, "TxnStartResp" }, + /* TxnCommitReq */ + { SET3(IsRead, IsRequest, NeedsResponse), TxnCommitResp, "TxnCommitReq" }, + /* TxnCommitResp */ + { SET2(IsRead, IsResponse), InvalidCmd, "TxnCommitResp" }, + /* TxnAbortReq */ + { SET3(IsRead, IsRequest, NeedsResponse), TxnAbortResp, "TxnAbortReq" }, + /* TxnAbortResp */ + { SET2(IsRead, IsResponse), InvalidCmd, "TxnAbortResp" }, }; bool diff -r 878f2f30b12d src/mem/protocol/MESI_Three_Level-msg.sm --- a/src/mem/protocol/MESI_Three_Level-msg.sm Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/protocol/MESI_Three_Level-msg.sm Fri Jul 04 22:00:13 2014 -0500 @@ -35,6 +35,7 @@ GET_INSTR, desc="Get Instruction"; INV, desc="INValidate"; PUTX, desc="Replacement message"; + PUTX_COPY, desc="Data block to be copied in L1. L0 will still be in M state"; WB_ACK, desc="Writeback ack"; @@ -51,6 +52,10 @@ // shared block before it got the data. So the L0 cache can use the data // but not store it. STALE_DATA; + + // This is used by L0 to tell L1 that it cannot provide the latest + // value + NAK; } // Class for messages sent between the L0 and the L1 controllers. diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level-L0cache.sm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mem/protocol/MESI_Txn_Three_Level-L0cache.sm Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,1088 @@ +/* + * Copyright (c) 2013 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(L0Cache, "MESI Directory L0 Cache") + : Sequencer * sequencer, + CacheMemory * Icache, + CacheMemory * Dcache, + Cycles request_latency = 2, + Cycles response_latency = 2, + bool send_evictions, +{ + // Transaction specific variables + bool isTransaction, default="false"; + bool abortTransaction, default="false"; + int txnDepth, default="0"; + int abortCode, default="0"; + + // NODE L0 CACHE + // From this node's L0 cache to the network + MessageBuffer requestFromCache, network="To", physical_network="0", ordered="true"; + + // To this node's L0 cache FROM the network + MessageBuffer bufferToCache, network="From", physical_network="0", ordered="true"; + + // Message queue between this controller and the processor + MessageBuffer mandatoryQueue, ordered="false"; + + // STATES + state_declaration(State, desc="Cache states", default="L0Cache_State_I") { + // Base states + + // The cache entry has not been allocated. + NP, AccessPermission:Invalid, desc="Not present in either cache"; + + // The cache entry has been allocated, but is not in use. + I, AccessPermission:Invalid; + + // The cache entry is in shared mode. The processor can read this entry + // but it cannot write to it. + S, AccessPermission:Read_Only; + + // The cache entry is in exclusive mode. The processor can read this + // entry. It can write to this entry without informing the directory. + // On writing, the entry moves to M state. + E, AccessPermission:Read_Only; + + // The processor has read and write permissions on this entry. + M, AccessPermission:Read_Write; + + // Transient States + + // The cache controller has requested that this entry be fetched in + // shared state so that the processor can read it. + IS, AccessPermission:Busy; + + // The cache controller has requested that this entry be fetched in + // modify state so that the processor can read/write it. + IM, AccessPermission:Busy; + + // The cache controller had read permission over the entry. But now the + // processor needs to write to it. So, the controller has requested for + // write permission. + SM, AccessPermission:Read_Only; + } + + // EVENTS + enumeration(Event, desc="Cache events") { + // L0 events + Load, desc="Load request from the home processor"; + Ifetch, desc="I-fetch request from the home processor"; + Store, desc="Store request from the home processor"; + + Inv, desc="Invalidate request from L2 bank"; + + // internal generated request + L0_Replacement, desc="L0 Replacement", format="!r"; + + // other requests + Fwd_GETX, desc="GETX from other processor"; + Fwd_GETS, desc="GETS from other processor"; + Fwd_GET_INSTR, desc="GET_INSTR from other processor"; + + // other requests - transaction + Fwd_GETS_Txn, desc="GETS from other processor during txn"; + Fwd_GET_INSTR_Txn, desc="GET_INSTR from other processor during txn"; + + // + Data, desc="Data for processor"; + Data_Exclusive, desc="Data for processor"; + Data_Stale, desc="Data for processor, but not for storage"; + + Ack, desc="Ack for processor"; + Ack_all, desc="Last ack for processor"; + + WB_Ack, desc="Ack for replacement"; + + // internal request for transactional pop of request + Txn_PopQ, desc="Fake signal to pop the mandatory queue"; + Txn_NAK, desc="Signal to send a NAK after aborting"; + Txn_InvAck, desc="Signal to send an Inv ack after aborting txn"; + + // Transaction events: check if necessary. o/w delete + Start_Txn, desc="Start transaction request from home processor"; + Abort_Txn, desc="Abort transaction request from home processor"; + End_Txn, desc="End transaction request from home processor"; + Abort_Ack, desc="Abort ack to processor"; + Commit_Ack, desc="Commit ack to processor"; + } + + // TYPES + + // CacheEntry + structure(Entry, desc="...", interface="AbstractCacheEntry" ) { + State CacheState, desc="cache state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, default="false", desc="data is dirty"; + + void setReadSet(), external="yes"; + void setWriteSet(), external="yes"; + bool getWriteSet(), external="yes"; + bool getReadSet(), external="yes"; + + // override invalidateEntry + void invalidateEntry(bool dummy) { + CacheState := State:I; + Dirty := false; + } + } + + // TBE fields + structure(TBE, desc="...") { + Address Addr, desc="Physical address for this TBE"; + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Buffer for the data block"; + bool Dirty, default="false", desc="data is dirty"; + int pendingAcks, default="0", desc="number of pending acks"; + } + + structure(TBETable, external="yes") { + TBE lookup(Address); + TBE getNullEntry(); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + + GenericBloomFilter TxnBF, constructor="m_bloom_filter_id"; + + void set_cache_entry(AbstractCacheEntry a); + void unset_cache_entry(); + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpBuffers(Address a); + void wakeUpAllBuffers(Address a); + void profileMsgDelay(int virtualNetworkType, Cycles c); + void clearBlockMap(); + + // inclusive cache returns L0 entries only + Entry getCacheEntry(Address addr), return_by_pointer="yes" { + Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]); + if(is_valid(Dcache_entry)) { + return Dcache_entry; + } + + Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]); + return Icache_entry; + } + + Entry getDCacheEntry(Address addr), return_by_pointer="yes" { + Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]); + return Dcache_entry; + } + + Entry getICacheEntry(Address addr), return_by_pointer="yes" { + Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]); + return Icache_entry; + } + + State getState(TBE tbe, Entry cache_entry, Address addr) { + assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false); + + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:NP; + } + + void setState(TBE tbe, Entry cache_entry, Address addr, State state) { + assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false); + + // MUST CHANGE + if(is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Address addr) { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(tbe.TBEState)); + return L0Cache_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(cache_entry.CacheState)); + return L0Cache_State_to_permission(cache_entry.CacheState); + } + + DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent); + return AccessPermission:NotPresent; + } + + DataBlock getDataBlock(Address addr), return_by_ref="yes" { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + return tbe.DataBlk; + } + + return getCacheEntry(addr).DataBlk; + } + + void setAccessPermission(Entry cache_entry, Address addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(L0Cache_State_to_permission(state)); + } + } + + Event mandatory_request_type_to_event(RubyRequestType type) { + if (type == RubyRequestType:LD) { + return Event:Load; + } else if (type == RubyRequestType:IFETCH) { + return Event:Ifetch; + } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) { + return Event:Store; + } else { + error("Invalid RubyRequestType"); + } + } + + int getPendingAcks(TBE tbe) { + return tbe.pendingAcks; + } + + out_port(requestNetwork_out, CoherenceMsg, requestFromCache); + + // Messages for this L0 cache from the L1 cache + in_port(messgeBuffer_in, CoherenceMsg, bufferToCache, rank = 1) { + if (messgeBuffer_in.isReady()) { + peek(messgeBuffer_in, CoherenceMsg, block_on="Addr") { + assert(in_msg.Destination == machineID); + + DPRINTF(RubySlicc, "DEBUG: Trying next request from L1Q: %s\n", + in_msg.Addr); + + Entry cache_entry := getCacheEntry(in_msg.Addr); + TBE tbe := TBEs[in_msg.Addr]; + + if(in_msg.Class == CoherenceClass:DATA_EXCLUSIVE) { + trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe); + } else if(in_msg.Class == CoherenceClass:DATA) { + trigger(Event:Data, in_msg.Addr, cache_entry, tbe); + } else if(in_msg.Class == CoherenceClass:STALE_DATA) { + trigger(Event:Data_Stale, in_msg.Addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:ACK) { + trigger(Event:Ack, in_msg.Addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:WB_ACK) { + trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:INV) { + // if isTransaction and cache entry NULL + // and bloom filter match, then abort + if(isTransaction == true && + is_valid(cache_entry) == false && + TxnBF.isSet(in_msg.Addr) == true) + { + Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE ntbe := TBEs.getNullEntry(); + trigger(Event:Txn_InvAck, in_msg.Addr, nce, ntbe); + } + else { + trigger(Event:Inv, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Class == CoherenceClass:GETX || + in_msg.Class == CoherenceClass:UPGRADE) { + // if isTransaction and cache entry NULL + // and bloom filter match, then abort + if(isTransaction == true && + is_valid(cache_entry) == false && + TxnBF.isSet(in_msg.Addr) == true) + { + Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE ntbe := TBEs.getNullEntry(); + trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe); + } + else { + // upgrade transforms to GETX due to race + trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Class == CoherenceClass:GETS) { + // if isTransaction and cache entry NULL + // and bloom filter match, then abort + if(isTransaction == true && + is_valid(cache_entry) == false && + TxnBF.isSet(in_msg.Addr) == true) + { + Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE ntbe := TBEs.getNullEntry(); + trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe); + } + else if(cache_entry.getWriteSet() == true && + isTransaction == true) { + assert(cache_entry.CacheState == State:M); + trigger(Event:Fwd_GETS_Txn, in_msg.Addr, cache_entry, tbe); + } + else { + trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Class == CoherenceClass:GET_INSTR) { + // if isTransaction and cache entry NULL + // and bloom filter match, then abort + if(isTransaction == true && + is_valid(cache_entry) == false && + TxnBF.isSet(in_msg.Addr) == true) + { + Entry nce := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE ntbe := TBEs.getNullEntry(); + trigger(Event:Txn_NAK, in_msg.Addr, nce, ntbe); + } + else if(cache_entry.getWriteSet() == true && + isTransaction == true) { + assert(cache_entry.CacheState == State:M); + trigger(Event:Fwd_GET_INSTR_Txn, in_msg.Addr, cache_entry, tbe); + } + else { + trigger(Event:Fwd_GET_INSTR, in_msg.Addr, cache_entry, tbe); + } + } else { + error("Invalid forwarded request type"); + } + } + } + else { + DPRINTF(RubySlicc, "DEBUG: L1Q not ready\n"); + } + } + + // Mandatory Queue betweens Node's CPU and it's L0 caches + in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) { + if (mandatoryQueue_in.isReady()) { + DPRINTF(RubySlicc, "DEBUG: Trying next request from MDQ\n"); + peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { + + if(in_msg.Type == RubyRequestType:Txn_Start) { + DPRINTF(RubySlicc, "DEBUG: Starting transaction: %s\n", + txnDepth); + isTransaction := true; + txnDepth := txnDepth + 1; + sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode); + DPRINTF(RubySlicc, "DEBUG: Started transaction: %s\n", + txnDepth); + + // trigger call to deque the mandatoryQueue + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + // both cache_entry and tbe should be NULL here + assert(is_invalid(cache_entry) && is_invalid(tbe)); + trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe); + //mandatoryQueue_in.dequeue(); + } + else if(in_msg.Type == RubyRequestType:Txn_Commit) { + if(abortTransaction) { + sequencer.txnCallback(in_msg.LineAddress, false, true, abortCode); + } + else { + DPRINTF(RubySlicc, "DEBUG: Committing transaction: %s\n", + txnDepth); + txnDepth := txnDepth - 1; + DPRINTF(RubySlicc, "DEBUG: Committed transaction: %s\n", + txnDepth); + if(txnDepth == 0) { + TxnBF.clear(); + Icache.commitTransaction(); + Dcache.commitTransaction(); + isTransaction := false; + abortTransaction := false; + txnDepth := 0; + DPRINTF(RubySlicc, "DEBUG: Committed full transaction: %s\n", + txnDepth); + } + + sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode); + } + + // trigger call to deque the mandatoryQueue + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + // both cache_entry and tbe should be NULL here + assert(is_invalid(cache_entry) && is_invalid(tbe)); + trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe); + //mandatoryQueue_in.dequeue(); + } + else if(in_msg.Type == RubyRequestType:Txn_Abort) { + DPRINTF(RubySlicc, "DEBUG: Aborting transaction: %s\n", + txnDepth); + TxnBF.clear(); + Icache.abortTransaction(); + Dcache.abortTransaction(); + clearBlockMap(); + isTransaction := false; + abortTransaction := false; + txnDepth := 0; + sequencer.txnCallback(in_msg.LineAddress, false, false, abortCode); + DPRINTF(RubySlicc, "DEBUG: Aborted transaction: %s\n", + txnDepth); + + // trigger call to deque the mandatoryQueue + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + // both cache_entry and tbe should be NULL here + assert(is_invalid(cache_entry) && is_invalid(tbe)); + trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe); + //mandatoryQueue_in.dequeue(); + } + else if(abortTransaction) { + if ((in_msg.Type == RubyRequestType:ST) || + (in_msg.Type == RubyRequestType:ATOMIC) || + (in_msg.Type == RubyRequestType:RMW_Read) || + (in_msg.Type == RubyRequestType:RMW_Write) || + (in_msg.Type == RubyRequestType:Load_Linked) || + (in_msg.Type == RubyRequestType:Store_Conditional) || + (in_msg.Type == RubyRequestType:Locked_RMW_Read) || + (in_msg.Type == RubyRequestType:Locked_RMW_Write) || + (in_msg.Type == RubyRequestType:FLUSH)) { + sequencer.txnCallback(in_msg.LineAddress, true, true, abortCode); + } + else { + sequencer.txnCallback(in_msg.LineAddress, false, true, abortCode); + } + + // trigger call to deque the mandatoryQueue + Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry()); + TBE tbe := TBEs.getNullEntry(); + // both cache_entry and tbe should be NULL here + assert(is_invalid(cache_entry) && is_invalid(tbe)); + trigger(Event:Txn_PopQ, in_msg.LineAddress, cache_entry, tbe); + //mandatoryQueue_in.dequeue(); + } + else { + // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache + + if (in_msg.Type == RubyRequestType:IFETCH) { + // ** INSTRUCTION ACCESS *** + + Entry Icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(Icache_entry)) { + // The tag matches for the L0, so the L0 asks the L2 for it. + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } else { + + // Check to see if it is in the OTHER L0 + Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress); + if (is_valid(Dcache_entry)) { + // The block is in the wrong L0, put the request on the queue to the shared L2 + trigger(Event:L0_Replacement, in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } + + if (Icache.cacheAvail(in_msg.LineAddress)) { + // L0 does't have the line, but we have space for it + // in the L0 so let's see if the L2 has it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L0, so we need to make room in the L0 + trigger(Event:L0_Replacement, Icache.cacheProbe(in_msg.LineAddress), + getICacheEntry(Icache.cacheProbe(in_msg.LineAddress)), + TBEs[Icache.cacheProbe(in_msg.LineAddress)]); + } + } + } else { + // *** DATA ACCESS *** + Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress); + if (is_valid(Dcache_entry)) { + // The tag matches for the L0, so the L0 ask the L1 for it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } else { + // Check to see if it is in the OTHER L0 + Entry Icache_entry := getICacheEntry(in_msg.LineAddress); + if (is_valid(Icache_entry)) { + // The block is in the wrong L0, put the request on the queue to the private L1 + trigger(Event:L0_Replacement, in_msg.LineAddress, + Icache_entry, TBEs[in_msg.LineAddress]); + } + + if (Dcache.cacheAvail(in_msg.LineAddress)) { + // L1 does't have the line, but we have space for it + // in the L0 let's see if the L1 has it + trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, + Dcache_entry, TBEs[in_msg.LineAddress]); + } else { + // No room in the L1, so we need to make room in the L0 + trigger(Event:L0_Replacement, Dcache.cacheProbe(in_msg.LineAddress), + getDCacheEntry(Dcache.cacheProbe(in_msg.LineAddress)), + TBEs[Dcache.cacheProbe(in_msg.LineAddress)]); + } + } + } + } + } + } + else { + DPRINTF(RubySlicc, "DEBUG: Mandatory Queue not ready!\n"); + } + } + + //***************************************************** + // ACTIONS + //***************************************************** + + action(a_issueGETS, "a", desc="Issue GETS") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:GETS; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(b_issueGETX, "b", desc="Issue GETX") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:GETX; + out_msg.Sender := machineID; + DPRINTF(RubySlicc, "%s\n", machineID); + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(c_issueUPGRADE, "c", desc="Issue GETX") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, CoherenceMsg, request_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:UPGRADE; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(fn_abortsendNakToL1, "fnan", desc="abort and send NAK to the L2 cache saying that it has latest data") { + abortTransaction := true; + + // send NAK to L1 + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Class := CoherenceClass:NAK; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(fn_abortsendInvAckToL1, "fnai", desc="abort and send Inv ACK to the L2 cache saying that it has latest data") { + abortTransaction := true; + + // send NAK to L1 + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:INV_ACK; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(fn_sendNakToL1, "fn", desc="send NAK to the L2 cache saying that it has latest data") { + // send NAK to L1 + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:NAK; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + + action(f_sendDataToL1, "f", desc="send data to the L2 cache") { + if(isTransaction == true && + cache_entry.getWriteSet() == true && + cache_entry.CacheState == State:M) + { + // send NAK to L1 + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Class := CoherenceClass:NAK; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + else { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Class := CoherenceClass:INV_DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + } + } + + action(fi_sendInvAck, "fi", desc="send data to the L2 cache") { + peek(messgeBuffer_in, CoherenceMsg) { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:INV_ACK; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { + if(isTransaction == true && + (cache_entry.CacheState == State:S) && + cache_entry.getReadSet() == true) + { + // Do not send eviction to cpu + // The cache entry is added to the Bloom Filter because it is in the read set of the transaction + } + else { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address); + sequencer.evictionCallback(address); + } + } + } + + action(g_issuePUTX, "g", desc="send data to the L2 cache") { + if(isTransaction == true && + cache_entry.getWriteSet() == true) + { } + else { + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Class := CoherenceClass:PUTX; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender:= machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + + if (cache_entry.Dirty) { + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + } + + action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + sequencer.readCallback(address, cache_entry.DataBlk); + } + + action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + sequencer.writeCallback(address, cache_entry.DataBlk); + cache_entry.Dirty := true; + } + + action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := cache_entry.Dirty; + tbe.DataBlk := cache_entry.DataBlk; + } + + action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { + mandatoryQueue_in.dequeue(); + } + + action(l_popRequestQueue, "l", + desc="Pop incoming request queue and profile the delay within this virtual network") { + profileMsgDelay(2, messgeBuffer_in.dequeue()); + } + + action(o_popIncomingResponseQueue, "o", + desc="Pop Incoming Response queue and profile the delay within this virtual network") { + profileMsgDelay(1, messgeBuffer_in.dequeue()); + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(u_writeDataToCache, "u", desc="Write data to cache") { + peek(messgeBuffer_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(ff_deallocateCacheBlock, "\f", + desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { + if (Dcache.isTagPresent(address)) { + Dcache.deallocate(address); + } else { + Icache.deallocate(address); + } + unset_cache_entry(); + } + + action(oo_allocateDCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") { + if (is_invalid(cache_entry)) { + set_cache_entry(Dcache.allocate(address, new Entry)); + } + } + + action(pp_allocateICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") { + if (is_invalid(cache_entry)) { + set_cache_entry(Icache.allocate(address, new Entry)); + } + } + + action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle cpu request queue") { + DPRINTF(RubySlicc, "DEBUG: Stalling access: %s\n", address); + stall_and_wait(mandatoryQueue_in, address); + DPRINTF(RubySlicc, "DEBUG: Completing stall call: %s\n", address); + } + + action(kd_wakeUpDependents, "kd", desc="wake-up dependents") { + DPRINTF(RubySlicc, "DEBUG: Waking up all buffers stalled on: %s\n", address); + wakeUpAllBuffers(address); + DPRINTF(RubySlicc, "DEBUG: Completed wake up all buffers stalled on: %s\n", address); + } + + action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") { + ++Icache.demand_misses; + } + + action(uu_profileInstHit, "\uih", desc="Profile the demand miss") { + ++Icache.demand_hits; + } + + action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") { + ++Dcache.demand_misses; + } + + action(uu_profileDataHit, "\udh", desc="Profile the demand miss") { + ++Dcache.demand_hits; + } + + action(rs_addToReadSet, "rs", desc="Add to read set") { + if (isTransaction) { + // set read set bit + cache_entry.setReadSet(); + } + } + + action(rw_addToWriteSet, "rw", desc="Add to write set") { + if (isTransaction) { + // ON DEMAND write-back (may be easier) + // if dirty and not in write set + // write back and retain M state + if((cache_entry.getWriteSet() == false) && (cache_entry.CacheState == State:M)){ + if(cache_entry.Dirty) { + // code copied from issuePUTX + enqueue(requestNetwork_out, CoherenceMsg, response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Class := CoherenceClass:PUTX_COPY; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender:= machineID; + out_msg.Destination := createMachineID(MachineType:L1Cache, version); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + } + } + + // set write set bit + cache_entry.setWriteSet(); + } + } + + action(abf_addToBloomFilter, "abf", desc="Add to Bloom Filter") { + if(isTransaction == true && + (cache_entry.CacheState == State:S) && + cache_entry.getReadSet() == true) + { + TxnBF.set(address); + } + } + + // action(stx_startTransaction, "stx", desc="Start a transaction") { + // } + + // action(ctx_commitTransaction, "ctx", desc="Commit a transaction") { + // } + + action(atx_abortTransaction, "atx", desc="Abort a transaction") { + if(isTransaction && (cache_entry.getWriteSet() || cache_entry.getReadSet())) { + DPRINTF(RubySlicc, "Implicit abort of a transaction: %s, %s, %s\n", + cache_entry.getReadSet(), cache_entry.getWriteSet(), + address); + abortTransaction := true; + } + } + + + + //***************************************************** + // TRANSITIONS + //***************************************************** + + // False Transition to just pop the mandatory queue + // making sure all mandatory queue ends with a trigger + // Currently in testing phase + transition(NP, Txn_PopQ) { + k_popMandatoryQueue; + } + + transition(NP, Txn_NAK) { + fn_abortsendNakToL1; + l_popRequestQueue; + } + + transition(NP, Txn_InvAck) { + fn_abortsendInvAckToL1; + l_popRequestQueue; + } + + // Transitions for Load/Store/Replacement/WriteBack from transient states + transition({IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) { + z_stallAndWaitMandatoryQueue; + } + + // Transitions from Idle + transition({NP,I}, L0_Replacement) { + ff_deallocateCacheBlock; + } + + transition({NP,I}, Load, IS) { + oo_allocateDCacheBlock; + i_allocateTBE; + rs_addToReadSet; // TSX_Support + a_issueGETS; + uu_profileDataMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, Ifetch, IS) { + pp_allocateICacheBlock; + i_allocateTBE; + rs_addToReadSet; // TSX_Support + a_issueGETS; + uu_profileInstMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, Store, IM) { + oo_allocateDCacheBlock; + i_allocateTBE; + rw_addToWriteSet; // TSX_Support + b_issueGETX; + uu_profileDataMiss; + k_popMandatoryQueue; + } + + transition({NP, I}, Inv) { + fi_sendInvAck; + l_popRequestQueue; + } + + transition({IS, IM}, Inv) { + fi_sendInvAck; + atx_abortTransaction; // TSX_Support + l_popRequestQueue; + } + + transition(SM, Inv, IM) { + fi_sendInvAck; + atx_abortTransaction; // TSX_Support + l_popRequestQueue; + } + + // Transitions from Shared + transition({S,E,M}, Load) { + h_load_hit; + rs_addToReadSet; // TSX_Support + uu_profileDataHit; + k_popMandatoryQueue; + } + + transition({S,E,M}, Ifetch) { + h_load_hit; + rs_addToReadSet; // TSX_Support + uu_profileInstHit; + k_popMandatoryQueue; + } + + transition(S, Store, SM) { + i_allocateTBE; + rw_addToWriteSet; // TSX_Support + c_issueUPGRADE; + uu_profileDataMiss; + k_popMandatoryQueue; + } + + transition(S, L0_Replacement, I) { + // Do not abort. Just add entry in BloomFilter TxnBF + // atx_abortTransaction; // TSX_Support + abf_addToBloomFilter; // TSX_Support + forward_eviction_to_cpu; + ff_deallocateCacheBlock; + } + + transition(S, Inv, I) { + atx_abortTransaction; // TSX_Support + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + // Transitions from Exclusive + transition({E,M}, Store, M) { + rw_addToWriteSet; // TSX_Support + hh_store_hit; + uu_profileDataHit; + k_popMandatoryQueue; + } + + transition(E, L0_Replacement, I) { + atx_abortTransaction; // TSX_Support + forward_eviction_to_cpu; + g_issuePUTX; + ff_deallocateCacheBlock; + } + + transition(E, {Inv, Fwd_GETX}, I) { + // don't send data + atx_abortTransaction; // TSX_Support + forward_eviction_to_cpu; + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) { + f_sendDataToL1; + l_popRequestQueue; + } + + // Transitions from Modified + transition(M, L0_Replacement, I) { + atx_abortTransaction; // TSX_Support + forward_eviction_to_cpu; + g_issuePUTX; + ff_deallocateCacheBlock; + } + + transition(M, {Inv, Fwd_GETX}, I) { + atx_abortTransaction; // TSX_Support + forward_eviction_to_cpu; + f_sendDataToL1; + ff_deallocateCacheBlock; + l_popRequestQueue; + } + + transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) { + //atx_abortTransaction; // TSX_Support + f_sendDataToL1; + l_popRequestQueue; + } + + // If transaction, abort transaction and go to I + transition(M, {Fwd_GETS_Txn, Fwd_GET_INSTR_Txn}, I) { + atx_abortTransaction; // TSX_Support + f_sendDataToL1; + l_popRequestQueue; + } + + transition(IS, Data, S) { + u_writeDataToCache; + h_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(IS, Data_Exclusive, E) { + u_writeDataToCache; + h_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(IS, Data_Stale, I) { + atx_abortTransaction; // TSX_Support + u_writeDataToCache; + h_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition({IM,SM}, Data_Exclusive, M) { + u_writeDataToCache; + hh_store_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition(I, {Fwd_GETS, Fwd_GET_INSTR, Fwd_GETX}) { + fn_sendNakToL1; + l_popRequestQueue; + } +} diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level-L1cache.sm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mem/protocol/MESI_Txn_Three_Level-L1cache.sm Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,1093 @@ +/* + * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(L1Cache, "MESI Directory L1 Cache CMP") + : CacheMemory * cache, + int l2_select_num_bits, + Cycles l1_request_latency = 4, + Cycles l1_response_latency = 4, + Cycles to_l2_latency = 1, +{ + // From this node's L1 cache TO the network + // a local L1 -> this L2 bank, currently ordered with directory forwarded requests + MessageBuffer requestFromCache, network="To", virtual_network="0", ordered="false", vnet_type="request"; + // a local L1 -> this L2 bank + MessageBuffer responseFromCache, network="To", virtual_network="1", ordered="false", vnet_type="response"; + MessageBuffer unblockFromCache, network="To", virtual_network="2", ordered="false", vnet_type="unblock"; + + + // To this node's L1 cache FROM the network + // a L2 bank -> this L1 + MessageBuffer requestToCache, network="From", virtual_network="0", ordered="false", vnet_type="request"; + // a L2 bank -> this L1 + MessageBuffer responseToCache, network="From", virtual_network="1", ordered="false", vnet_type="response"; + + // Message Buffers between the L1 and the L0 Cache + // From the L1 cache to the L0 cache + MessageBuffer bufferFromL1ToL0, network="To", physical_network="0", ordered="true"; + // From the L0 cache to the L1 cache + MessageBuffer bufferToL1FromL0, network="From", physical_network="0", ordered="true"; + + // STATES + state_declaration(State, desc="Cache states", default="L1Cache_State_I") { + // Base states + NP, AccessPermission:Invalid, desc="Not present in either cache"; + I, AccessPermission:Invalid, desc="a L1 cache entry Idle"; + S, AccessPermission:Read_Only, desc="a L1 cache entry Shared"; + SS, AccessPermission:Read_Only, desc="a L1 cache entry Shared"; + E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive"; + EE, AccessPermission:Read_Write, desc="a L1 cache entry Exclusive"; + M, AccessPermission:Maybe_Stale, desc="a L1 cache entry Modified", format="!b"; + MM, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b"; + + // Transient States + IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet"; + IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet"; + SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet"; + IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit"; + + M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK"; + SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2"; + + // For all of the following states, invalidate + // message has been sent to L0 cache. The response + // from the L0 cache has not been seen yet. + S_IL0, AccessPermission:Busy; + E_IL0, AccessPermission:Busy; + M_IL0, AccessPermission:Busy; + MM_IL0, AccessPermission:Read_Write; + SM_IL0, AccessPermission:Busy; + } + + // EVENTS + enumeration(Event, desc="Cache events") { + // Requests from the L0 cache + Load, desc="Load request"; + Store, desc="Store request"; + WriteBack, desc="Writeback request"; + L0_DataCopy, desc="Data Block from L0. Should remain in M state."; + + // Responses from the L0 Cache + // L0 cache received the invalidation message + // and has sent the data. + L0_DataAck; + + // L0 cache received the invalidation message + // and has sent a NAK (because of txn abort) + // saying that the data in L1 is the latest value. + L0_DataNak; + + Inv, desc="Invalidate request from L2 bank"; + + // internal generated request + // Invalidate the line in L0 due to own requirements + L0_Invalidate_Own; + // Invalidate the line in L0 due to some other cache's requirements + L0_Invalidate_Else; + // Invalidate the line in the cache due to some one else / space needs. + L1_Replacement; + + // other requests + Fwd_GETX, desc="GETX from other processor"; + Fwd_GETS, desc="GETS from other processor"; + + Data, desc="Data for processor"; + Data_Exclusive, desc="Data for processor"; + DataS_fromL1, desc="data for GETS request, need to unblock directory"; + Data_all_Acks, desc="Data for processor, all acks"; + + L0_Ack, desc="Ack for processor"; + Ack, desc="Ack for processor"; + Ack_all, desc="Last ack for processor"; + + WB_Ack, desc="Ack for replacement"; + } + + // TYPES + + // CacheEntry + structure(Entry, desc="...", interface="AbstractCacheEntry" ) { + State CacheState, desc="cache state"; + DataBlock DataBlk, desc="data for the block"; + bool Dirty, default="false", desc="data is dirty"; + } + + // TBE fields + structure(TBE, desc="...") { + Address Addr, desc="Physical address for this TBE"; + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Buffer for the data block"; + bool Dirty, default="false", desc="data is dirty"; + int pendingAcks, default="0", desc="number of pending acks"; + } + + structure(TBETable, external="yes") { + TBE lookup(Address); + void allocate(Address); + void deallocate(Address); + bool isPresent(Address); + } + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + + int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + + void set_cache_entry(AbstractCacheEntry a); + void unset_cache_entry(); + void set_tbe(TBE a); + void unset_tbe(); + void wakeUpBuffers(Address a); + void wakeUpAllBuffers(Address a); + void profileMsgDelay(int virtualNetworkType, Cycles c); + + // inclusive cache returns L1 entries only + Entry getCacheEntry(Address addr), return_by_pointer="yes" { + Entry cache_entry := static_cast(Entry, "pointer", cache[addr]); + return cache_entry; + } + + State getState(TBE tbe, Entry cache_entry, Address addr) { + if(is_valid(tbe)) { + return tbe.TBEState; + } else if (is_valid(cache_entry)) { + return cache_entry.CacheState; + } + return State:NP; + } + + void setState(TBE tbe, Entry cache_entry, Address addr, State state) { + // MUST CHANGE + if(is_valid(tbe)) { + tbe.TBEState := state; + } + + if (is_valid(cache_entry)) { + cache_entry.CacheState := state; + } + } + + AccessPermission getAccessPermission(Address addr) { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState)); + return L1Cache_State_to_permission(tbe.TBEState); + } + + Entry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState)); + return L1Cache_State_to_permission(cache_entry.CacheState); + } + + DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent); + return AccessPermission:NotPresent; + } + + DataBlock getDataBlock(Address addr), return_by_ref="yes" { + TBE tbe := TBEs[addr]; + if(is_valid(tbe)) { + return tbe.DataBlk; + } + + return getCacheEntry(addr).DataBlk; + } + + void setAccessPermission(Entry cache_entry, Address addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(L1Cache_State_to_permission(state)); + } + } + + Event mandatory_request_type_to_event(CoherenceClass type) { + if (type == CoherenceClass:GETS) { + return Event:Load; + } else if ((type == CoherenceClass:GETX) || + (type == CoherenceClass:UPGRADE)) { + return Event:Store; + } else if (type == CoherenceClass:PUTX) { + return Event:WriteBack; + } else { + error("Invalid RequestType"); + } + } + + int getPendingAcks(TBE tbe) { + return tbe.pendingAcks; + } + + bool inL0Cache(State state) { + if (state == State:S || state == State:E || state == State:M || + state == State:S_IL0 || state == State:E_IL0 || + state == State:M_IL0 || state == State:SM_IL0) { + return true; + } + + return false; + } + + out_port(requestNetwork_out, RequestMsg, requestFromCache); + out_port(responseNetwork_out, ResponseMsg, responseFromCache); + out_port(unblockNetwork_out, ResponseMsg, unblockFromCache); + out_port(bufferToL0_out, CoherenceMsg, bufferFromL1ToL0); + + // Response From the L2 Cache to this L1 cache + in_port(responseNetwork_in, ResponseMsg, responseToCache, rank = 3) { + if (responseNetwork_in.isReady()) { + peek(responseNetwork_in, ResponseMsg) { + assert(in_msg.Destination.isElement(machineID)); + + Entry cache_entry := getCacheEntry(in_msg.Addr); + TBE tbe := TBEs[in_msg.Addr]; + + if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe); + } else if(in_msg.Type == CoherenceResponseType:DATA) { + if ((getState(tbe, cache_entry, in_msg.Addr) == State:IS || + getState(tbe, cache_entry, in_msg.Addr) == State:IS_I) && + machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) { + + trigger(Event:DataS_fromL1, in_msg.Addr, cache_entry, tbe); + + } else if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) { + trigger(Event:Data_all_Acks, in_msg.Addr, cache_entry, tbe); + } else { + trigger(Event:Data, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:ACK) { + if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) { + trigger(Event:Ack_all, in_msg.Addr, cache_entry, tbe); + } else { + trigger(Event:Ack, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceResponseType:WB_ACK) { + trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe); + } else { + error("Invalid L1 response type"); + } + } + } + } + + // Request to this L1 cache from the shared L2 + in_port(requestNetwork_in, RequestMsg, requestToCache, rank = 2) { + if(requestNetwork_in.isReady()) { + peek(requestNetwork_in, RequestMsg) { + assert(in_msg.Destination.isElement(machineID)); + Entry cache_entry := getCacheEntry(in_msg.Addr); + TBE tbe := TBEs[in_msg.Addr]; + + if (in_msg.Type == CoherenceRequestType:INV) { + if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) { + trigger(Event:L0_Invalidate_Else, in_msg.Addr, + cache_entry, tbe); + } else { + trigger(Event:Inv, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:GETX || + in_msg.Type == CoherenceRequestType:UPGRADE) { + if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) { + trigger(Event:L0_Invalidate_Else, in_msg.Addr, + cache_entry, tbe); + } else { + trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe); + } + } else if (in_msg.Type == CoherenceRequestType:GETS) { + if (is_valid(cache_entry) && inL0Cache(cache_entry.CacheState)) { + trigger(Event:L0_Invalidate_Else, in_msg.Addr, + cache_entry, tbe); + } else { + trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe); + } + } else { + error("Invalid forwarded request type"); + } + } + } + } + + // Requests to this L1 cache from the L0 cache. + in_port(messageBufferFromL0_in, CoherenceMsg, bufferToL1FromL0, rank = 0) { + if (messageBufferFromL0_in.isReady()) { + peek(messageBufferFromL0_in, CoherenceMsg) { + Entry cache_entry := getCacheEntry(in_msg.Addr); + TBE tbe := TBEs[in_msg.Addr]; + + if(in_msg.Class == CoherenceClass:INV_DATA) { + trigger(Event:L0_DataAck, in_msg.Addr, cache_entry, tbe); + } + else if (in_msg.Class == CoherenceClass:NAK) { + trigger(Event:L0_DataNak, in_msg.Addr, cache_entry, tbe); + } + else if (in_msg.Class == CoherenceClass:PUTX_COPY) { + trigger(Event:L0_DataCopy, in_msg.Addr, cache_entry, tbe); + } else if (in_msg.Class == CoherenceClass:INV_ACK) { + trigger(Event:L0_Ack, in_msg.Addr, cache_entry, tbe); + } else { + if (is_valid(cache_entry)) { + trigger(mandatory_request_type_to_event(in_msg.Class), + in_msg.Addr, cache_entry, tbe); + } else { + if (cache.cacheAvail(in_msg.Addr)) { + // L1 does't have the line, but we have space for it + // in the L1 let's see if the L2 has it + trigger(mandatory_request_type_to_event(in_msg.Class), + in_msg.Addr, cache_entry, tbe); + } else { + // No room in the L1, so we need to make room in the L1 + Entry victim_entry := + getCacheEntry(cache.cacheProbe(in_msg.Addr)); + TBE victim_tbe := TBEs[cache.cacheProbe(in_msg.Addr)]; + + if (is_valid(victim_entry) && inL0Cache(victim_entry.CacheState)) { + trigger(Event:L0_Invalidate_Own, + cache.cacheProbe(in_msg.Addr), + victim_entry, victim_tbe); + } else { + trigger(Event:L1_Replacement, + cache.cacheProbe(in_msg.Addr), + victim_entry, victim_tbe); + } + } + } + } + } + } + } + + // ACTIONS + action(a_issueGETS, "a", desc="Issue GETS") { + peek(messageBufferFromL0_in, CoherenceMsg) { + enqueue(requestNetwork_out, RequestMsg, l1_request_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceRequestType:GETS; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(b_issueGETX, "b", desc="Issue GETX") { + peek(messageBufferFromL0_in, CoherenceMsg) { + enqueue(requestNetwork_out, RequestMsg, l1_request_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceRequestType:GETX; + out_msg.Requestor := machineID; + DPRINTF(RubySlicc, "%s\n", machineID); + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(c_issueUPGRADE, "c", desc="Issue GETX") { + peek(messageBufferFromL0_in, CoherenceMsg) { + enqueue(requestNetwork_out, RequestMsg, l1_request_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceRequestType:UPGRADE; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + DPRINTF(RubySlicc, "address: %s, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:Control; + out_msg.AccessMode := in_msg.AccessMode; + } + } + } + + action(d_sendDataToRequestor, "d", desc="send data to requestor") { + peek(requestNetwork_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") { + peek(requestNetwork_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") { + peek(requestNetwork_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:ACK; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + } + + action(f_sendDataToL2, "f", desc="send data to the L2 cache") { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + } + + action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } + } + + action(fi_sendInvAck, "fi", desc="send data to the L2 cache") { + peek(requestNetwork_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, l1_response_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:ACK; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + out_msg.AckCount := 1; + } + } + } + + action(forward_eviction_to_L0, "\cc", desc="sends eviction information to the processor") { + enqueue(bufferToL0_out, CoherenceMsg, l1_request_latency) { + out_msg.Addr := address; + out_msg.Class := CoherenceClass:INV; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L0Cache, version); + out_msg.MessageSize := MessageSizeType:Control; + } + } + + action(g_issuePUTX, "g", desc="send data to the L2 cache") { + enqueue(requestNetwork_out, RequestMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.Addr := address; + out_msg.Type := CoherenceRequestType:PUTX; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Requestor:= machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + if (cache_entry.Dirty) { + out_msg.MessageSize := MessageSizeType:Writeback_Data; + } else { + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(j_sendUnblock, "j", desc="send unblock to the L2 cache") { + enqueue(unblockNetwork_out, ResponseMsg, to_l2_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:UNBLOCK; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", address); + } + } + + action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") { + enqueue(unblockNetwork_out, ResponseMsg, to_l2_latency) { + out_msg.Addr := address; + out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, clusterID)); + out_msg.MessageSize := MessageSizeType:Response_Control; + DPRINTF(RubySlicc, "%s\n", address); + + } + } + + action(h_data_to_l0, "h", desc="If not prefetch, send data to the L0 cache.") { + enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + + out_msg.Addr := address; + out_msg.Class := CoherenceClass:DATA; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L0Cache, version); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(h_stale_data_to_l0, "hs", desc="If not prefetch, send data to the L0 cache.") { + enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + + out_msg.Addr := address; + out_msg.Class := CoherenceClass:STALE_DATA; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L0Cache, version); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(hh_xdata_to_l0, "\h", desc="If not prefetch, notify sequencer that store completed.") { + enqueue(bufferToL0_out, CoherenceMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + + out_msg.Addr := address; + out_msg.Class := CoherenceClass:DATA_EXCLUSIVE; + out_msg.Sender := machineID; + out_msg.Destination := createMachineID(MachineType:L0Cache, version); + + DPRINTF(RubySlicc, "DEBUG: address: %s, destination: %s\n", + address, out_msg.Destination); + + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.MessageSize := MessageSizeType:Response_Data; + + cache_entry.Dirty := true; + } + } + + action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") { + check_allocate(TBEs); + assert(is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := cache_entry.Dirty; + tbe.DataBlk := cache_entry.DataBlk; + } + + action(k_popL0RequestQueue, "k", desc="Pop mandatory queue.") { + messageBufferFromL0_in.dequeue(); + } + + action(l_popL2RequestQueue, "l", + desc="Pop incoming request queue and profile the delay within this virtual network") { + profileMsgDelay(2, requestNetwork_in.dequeue()); + } + + action(o_popL2ResponseQueue, "o", + desc="Pop Incoming Response queue and profile the delay within this virtual network") { + profileMsgDelay(1, responseNetwork_in.dequeue()); + } + + action(s_deallocateTBE, "s", desc="Deallocate TBE") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(u_writeDataFromL0Request, "ureql0", desc="Write data to cache") { + peek(messageBufferFromL0_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(u_writeDataFromL2Response, "uresl2", desc="Write data to cache") { + peek(responseNetwork_in, ResponseMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(u_writeDataFromL0Response, "uresl0", desc="Write data to cache") { + peek(messageBufferFromL0_in, CoherenceMsg) { + assert(is_valid(cache_entry)); + cache_entry.DataBlk := in_msg.DataBlk; + cache_entry.Dirty := in_msg.Dirty; + } + } + + action(q_updateAckCount, "q", desc="Update ack count") { + peek(responseNetwork_in, ResponseMsg) { + assert(is_valid(tbe)); + tbe.pendingAcks := tbe.pendingAcks - in_msg.AckCount; + APPEND_TRANSITION_COMMENT(in_msg.AckCount); + APPEND_TRANSITION_COMMENT(" p: "); + APPEND_TRANSITION_COMMENT(tbe.pendingAcks); + } + } + + action(ff_deallocateCacheBlock, "\f", + desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") { + if (cache.isTagPresent(address)) { + cache.deallocate(address); + } + unset_cache_entry(); + } + + action(oo_allocateCacheBlock, "\o", desc="Set cache tag equal to tag of block B.") { + if (is_invalid(cache_entry)) { + set_cache_entry(cache.allocate(address, new Entry)); + } + } + + action(z0_stallAndWaitL0Queue, "\z0", desc="recycle L0 request queue") { + stall_and_wait(messageBufferFromL0_in, address); + } + + action(z2_stallAndWaitL2Queue, "\z2", desc="recycle L2 request queue") { + stall_and_wait(requestNetwork_in, address); + } + + action(kd_wakeUpDependents, "kd", desc="wake-up dependents") { + wakeUpAllBuffers(address); + } + + action(uu_profileMiss, "\um", desc="Profile the demand miss") { + ++cache.demand_misses; + } + + action(uu_profileHit, "\uh", desc="Profile the demand hit") { + ++cache.demand_hits; + } + + + //***************************************************** + // TRANSITIONS + //***************************************************** + + // Transitions for Load/Store/Replacement/WriteBack from transient states + transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK, S_IL0, M_IL0, E_IL0, MM_IL0}, + {Load, Store, L1_Replacement}) { + z0_stallAndWaitL0Queue; + } + + // Transitions from Idle + transition({NP,I}, L1_Replacement) { + ff_deallocateCacheBlock; + } + + transition({NP,I}, Load, IS) { + oo_allocateCacheBlock; + i_allocateTBE; + a_issueGETS; + uu_profileMiss; + k_popL0RequestQueue; + } + + transition({NP,I}, Store, IM) { + oo_allocateCacheBlock; + i_allocateTBE; + b_issueGETX; + uu_profileMiss; + k_popL0RequestQueue; + } + + transition({NP, I}, Inv) { + fi_sendInvAck; + l_popL2RequestQueue; + } + + // Transitions from Shared + transition({S,SS}, Load, S) { + h_data_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + // if transaction has aborted, L0 could re-request data which is in E state in L1 + // Therefore support Load from L0 in both E and EE state. + transition({EE,E}, Load, E) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + // if transaction has aborted, L0 could re-request data which is in M state in L1 + // Therefore support Load from L0 in both M and MM state. + transition({MM,M}, Load, M) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + transition({S,SS}, Store, SM) { + i_allocateTBE; + c_issueUPGRADE; + uu_profileMiss; + k_popL0RequestQueue; + } + + transition(SS, L1_Replacement, I) { + ff_deallocateCacheBlock; + } + + transition(S, {L0_Invalidate_Own, L0_Invalidate_Else}, S_IL0) { + forward_eviction_to_L0; + } + + transition(SS, Inv, I) { + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popL2RequestQueue; + } + + // Transitions from Exclusive + + // if transaction has aborted, L0 could re-request data which is in M state in L1 + // Therefore support Load from L0 in both M and MM state. + transition({E,M}, Store, M) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + transition({EE,MM}, Store, M) { + hh_xdata_to_l0; + uu_profileHit; + k_popL0RequestQueue; + } + + transition(EE, L1_Replacement, M_I) { + // silent E replacement?? + i_allocateTBE; + g_issuePUTX; // send data, but hold in case forwarded request + ff_deallocateCacheBlock; + } + + transition(EE, Inv, I) { + // don't send data + fi_sendInvAck; + ff_deallocateCacheBlock; + l_popL2RequestQueue; + } + + transition(EE, Fwd_GETX, I) { + d_sendDataToRequestor; + ff_deallocateCacheBlock; + l_popL2RequestQueue; + } + + transition(EE, Fwd_GETS, SS) { + d_sendDataToRequestor; + d2_sendDataToL2; + l_popL2RequestQueue; + } + + transition(E, {L0_Invalidate_Own, L0_Invalidate_Else}, E_IL0) { + forward_eviction_to_L0; + } + + // Transitions from Modified + transition(MM, L1_Replacement, M_I) { + i_allocateTBE; + g_issuePUTX; // send data, but hold in case forwarded request + ff_deallocateCacheBlock; + } + + transition({M,E}, WriteBack, MM) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + } + + transition({M,E}, L0_DataCopy, M) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + } + + transition(M_I, WB_Ack, I) { + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition(MM, Inv, I) { + f_sendDataToL2; + ff_deallocateCacheBlock; + l_popL2RequestQueue; + } + + transition(M_I, Inv, SINK_WB_ACK) { + ft_sendDataToL2_fromTBE; + l_popL2RequestQueue; + } + + transition(MM, Fwd_GETX, I) { + d_sendDataToRequestor; + l_popL2RequestQueue; + } + + transition(MM, Fwd_GETS, SS) { + d_sendDataToRequestor; + d2_sendDataToL2; + l_popL2RequestQueue; + } + + transition(M, {L0_Invalidate_Own, L0_Invalidate_Else}, M_IL0) { + forward_eviction_to_L0; + } + + transition(M_I, Fwd_GETX, SINK_WB_ACK) { + dt_sendDataToRequestor_fromTBE; + l_popL2RequestQueue; + } + + transition(M_I, Fwd_GETS, SINK_WB_ACK) { + dt_sendDataToRequestor_fromTBE; + d2t_sendDataToL2_fromTBE; + l_popL2RequestQueue; + } + + // Transitions from IS + transition({IS,IS_I}, Inv, IS_I) { + fi_sendInvAck; + l_popL2RequestQueue; + } + + transition(IS, Data_all_Acks, S) { + u_writeDataFromL2Response; + h_data_to_l0; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition(IS_I, Data_all_Acks, I) { + u_writeDataFromL2Response; + h_stale_data_to_l0; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition(IS, DataS_fromL1, S) { + u_writeDataFromL2Response; + j_sendUnblock; + h_data_to_l0; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition(IS_I, DataS_fromL1, I) { + u_writeDataFromL2Response; + j_sendUnblock; + h_stale_data_to_l0; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + // directory is blocked when sending exclusive data + transition(IS, Data_Exclusive, E) { + u_writeDataFromL2Response; + hh_xdata_to_l0; + jj_sendExclusiveUnblock; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + // directory is blocked when sending exclusive data + transition(IS_I, Data_Exclusive, E) { + u_writeDataFromL2Response; + hh_xdata_to_l0; + jj_sendExclusiveUnblock; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + // Transitions from IM + transition({IM,SM}, Inv, IM) { + fi_sendInvAck; + l_popL2RequestQueue; + } + + transition(IM, Data, SM) { + u_writeDataFromL2Response; + q_updateAckCount; + o_popL2ResponseQueue; + } + + transition(IM, Data_all_Acks, M) { + u_writeDataFromL2Response; + hh_xdata_to_l0; + jj_sendExclusiveUnblock; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition({SM, IM}, Ack) { + q_updateAckCount; + o_popL2ResponseQueue; + } + + transition(SM, Ack_all, M) { + jj_sendExclusiveUnblock; + hh_xdata_to_l0; + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition(SM, L0_Invalidate_Else, SM_IL0) { + forward_eviction_to_L0; + } + + transition(SINK_WB_ACK, Inv){ + fi_sendInvAck; + l_popL2RequestQueue; + } + + transition(SINK_WB_ACK, WB_Ack, I){ + s_deallocateTBE; + o_popL2ResponseQueue; + kd_wakeUpDependents; + } + + transition({M_IL0, E_IL0}, WriteBack, MM_IL0) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + // a transaction may have tried to modify a cache block + // in M state jut before it was evicted from L0 + // transition to M_IL0 since data is now modified + transition({M_IL0, E_IL0}, L0_DataCopy, M_IL0) { + u_writeDataFromL0Request; + k_popL0RequestQueue; + } + + transition({M_IL0, E_IL0}, L0_DataAck, MM) { + u_writeDataFromL0Response; + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + // transition on NAK from + transition({M_IL0, E_IL0}, L0_DataNak, MM) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition({M_IL0, MM_IL0}, L0_Ack, MM) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition(E_IL0, L0_Ack, EE) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition(S_IL0, L0_Ack, SS) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition(SM_IL0, L0_Ack, IM) { + k_popL0RequestQueue; + kd_wakeUpDependents; + } + + transition({S_IL0, M_IL0, E_IL0, SM_IL0, SM}, L0_Invalidate_Own) { + z0_stallAndWaitL0Queue; + } + + transition({S_IL0, M_IL0, E_IL0, SM_IL0}, L0_Invalidate_Else) { + z2_stallAndWaitL2Queue; + } + + transition({S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) { + z2_stallAndWaitL2Queue; + } +} diff -r 878f2f30b12d src/mem/protocol/MESI_Txn_Three_Level.slicc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mem/protocol/MESI_Txn_Three_Level.slicc Fri Jul 04 22:00:13 2014 -0500 @@ -0,0 +1,9 @@ +protocol "MESI_Three_Level"; +include "RubySlicc_interfaces.slicc"; +include "MESI_Two_Level-msg.sm"; +include "MESI_Three_Level-msg.sm"; +include "MESI_Txn_Three_Level-L0cache.sm"; +include "MESI_Txn_Three_Level-L1cache.sm"; +include "MESI_Two_Level-L2cache.sm"; +include "MESI_Two_Level-dir.sm"; +include "MESI_Two_Level-dma.sm"; diff -r 878f2f30b12d src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/protocol/RubySlicc_Exports.sm Fri Jul 04 22:00:13 2014 -0500 @@ -135,6 +135,9 @@ COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; FLUSH, desc="Flush request type"; + Txn_Start, desc="Start transaction request type"; + Txn_Commit, desc="Commit transaction request type"; + Txn_Abort, desc="Abort transaction request type"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff -r 878f2f30b12d src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/protocol/RubySlicc_Types.sm Fri Jul 04 22:00:13 2014 -0500 @@ -112,6 +112,9 @@ void recordRequestType(SequencerRequestType); bool checkResourceAvailable(CacheResourceType, Address); void invalidateSC(Address); + + // adding callback for abort transaction + void txnCallback(Address, bool, bool, int); } structure(RubyRequest, desc="...", interface="Message", external="yes") { @@ -153,6 +156,11 @@ void recordRequestType(CacheRequestType); bool checkResourceAvailable(CacheResourceType, Address); + // added functions for transactions + void commitTransaction(); + void abortTransaction(); + AbstractCacheEntry getNullEntry(); + Scalar demand_misses; Scalar demand_hits; } @@ -180,14 +188,14 @@ } structure (GenericBloomFilter, external = "yes") { - void clear(int); - void increment(Address, int); - void decrement(Address, int); - void set(Address, int); - void unset(Address, int); + void clear(); + void increment(Address); + void decrement(Address); + void set(Address); + void unset(Address); - bool isSet(Address, int); - int getCount(Address, int); + bool isSet(Address); + int getCount(Address); } structure (Prefetcher, external = "yes") { diff -r 878f2f30b12d src/mem/protocol/SConsopts --- a/src/mem/protocol/SConsopts Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/protocol/SConsopts Fri Jul 04 22:00:13 2014 -0500 @@ -35,6 +35,7 @@ all_protocols.extend([ 'MESI_Two_Level', 'MESI_Three_Level', + 'MESI_Txn_Three_Level', 'MI_example', 'MOESI_CMP_directory', 'MOESI_CMP_token', diff -r 878f2f30b12d src/mem/request.hh --- a/src/mem/request.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/request.hh Fri Jul 04 22:00:13 2014 -0500 @@ -150,6 +150,18 @@ /** The request is a page table walk */ static const FlagsType PT_WALK = 0x20000000; + /** The request is for Transaction Begin */ + static const FlagsType XBEGIN = 0x00020000; + + /** The request is for Transaction Abort */ + static const FlagsType XABORT = 0x00040000; + + /** The request is for Transaction End */ + static const FlagsType XEND = 0x40000000; + + /** The request is for Elided Memory ops */ + static const FlagsType XELIDE = 0x80000000; + /** These flags are *not* cleared when a Request object is reused (assigned a new address). */ static const FlagsType STICKY_FLAGS = INST_FETCH; @@ -631,6 +643,10 @@ bool isClearLL() const { return _flags.isSet(CLEAR_LL); } bool isSecure() const { return _flags.isSet(SECURE); } bool isPTWalk() const { return _flags.isSet(PT_WALK); } + bool isXbegin() const { return _flags.isSet(XBEGIN); } + bool isXabort() const { return _flags.isSet(XABORT); } + bool isXend() const { return _flags.isSet(XEND); } + bool isTxn() const { return (_flags.isSet(XBEGIN) || _flags.isSet(XABORT) || _flags.isSet(XEND)); } }; #endif // __MEM_REQUEST_HH__ diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractCacheEntry.hh --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh Fri Jul 04 22:00:13 2014 -0500 @@ -53,6 +53,16 @@ Address m_Address; // Address of this block, required by CacheMemory int m_locked; // Holds info whether the address is locked, // required for implementing LL/SC + + bool readSet; // set if the cache entry is part of read set of transaction + bool writeSet; // set if the cache entry is part of write set of transaction + + void setReadSet(); + void setWriteSet(); + bool getReadSet(); + bool getWriteSet(); + + virtual void invalidateEntry(const bool& param_dummy); }; inline std::ostream& diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractCacheEntry.cc --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc Fri Jul 04 22:00:13 2014 -0500 @@ -33,6 +33,8 @@ m_Permission = AccessPermission_NotPresent; m_Address.setAddress(0); m_locked = -1; + readSet = false; + writeSet = false; } AbstractCacheEntry::~AbstractCacheEntry() @@ -48,3 +50,32 @@ m_locked = -1; } } + +void +AbstractCacheEntry::setReadSet() +{ + readSet = true; +} + +void +AbstractCacheEntry::setWriteSet() +{ + writeSet = true; +} + +bool +AbstractCacheEntry::getReadSet() +{ + return readSet; +} + +bool +AbstractCacheEntry::getWriteSet() +{ + return writeSet; +} + +void +AbstractCacheEntry::invalidateEntry(const bool& param_dummy) +{ +} diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractController.hh --- a/src/mem/ruby/slicc_interface/AbstractController.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/slicc_interface/AbstractController.hh Fri Jul 04 22:00:13 2014 -0500 @@ -128,6 +128,7 @@ void wakeUpBuffers(Address addr); void wakeUpAllBuffers(Address addr); void wakeUpAllBuffers(); + void clearBlockMap(); protected: NodeID m_version; @@ -148,6 +149,7 @@ int m_transitions_per_cycle; unsigned int m_buffer_size; Cycles m_recycle_latency; + std::string m_bloom_filter_id; //! Map from physical network number to the Message Buffer. std::map peerQueueMap; diff -r 878f2f30b12d src/mem/ruby/slicc_interface/AbstractController.cc --- a/src/mem/ruby/slicc_interface/AbstractController.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/slicc_interface/AbstractController.cc Fri Jul 04 22:00:13 2014 -0500 @@ -40,6 +40,7 @@ m_buffer_size = p->buffer_size; m_recycle_latency = p->recycle_latency; m_number_of_TBEs = p->number_of_TBEs; + m_bloom_filter_id = "H3_1024_2_Regular"; m_is_blocking = false; if (m_version == 0) { @@ -194,3 +195,10 @@ m_is_blocking = false; } } + +void +AbstractController::clearBlockMap() +{ + m_block_map.clear(); + m_is_blocking = false; +} diff -r 878f2f30b12d src/mem/ruby/system/CacheMemory.hh --- a/src/mem/ruby/system/CacheMemory.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/CacheMemory.hh Fri Jul 04 22:00:13 2014 -0500 @@ -110,6 +110,11 @@ bool checkResourceAvailable(CacheResourceType res, Address addr); void recordRequestType(CacheRequestType requestType); + // Transaction functions + void abortTransaction(); + void commitTransaction(); + AbstractCacheEntry* getNullEntry(); + public: Stats::Scalar m_demand_hits; Stats::Scalar m_demand_misses; diff -r 878f2f30b12d src/mem/ruby/system/CacheMemory.cc --- a/src/mem/ruby/system/CacheMemory.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/CacheMemory.cc Fri Jul 04 22:00:13 2014 -0500 @@ -563,3 +563,62 @@ return true; } } + +// Transaction functions +void +CacheMemory::abortTransaction() +{ + // std::vector > m_cache; + for (std::vector >::iterator i = m_cache.begin(); + i != m_cache.end(); + ++i) + { + std::vector temp = *i; + + for (std::vector::iterator j = temp.begin(); + j != temp.end(); + ++j) + { + AbstractCacheEntry *ace = *j; + + if(ace != NULL) { + if(ace->writeSet) { + // set state as invalid + ace->invalidateEntry(true); + } + + ace->writeSet = false; + ace->readSet = false; + } + } + } +} + +void +CacheMemory::commitTransaction() +{ + // std::vector > m_cache; + for (std::vector >::iterator i = m_cache.begin(); + i != m_cache.end(); + ++i) + { + std::vector temp = *i; + + for (std::vector::iterator j = temp.begin(); + j != temp.end(); + ++j) + { + AbstractCacheEntry *ace = *j; + if(ace != NULL) { + ace->writeSet = false; + ace->readSet = false; + } + } + } +} + +AbstractCacheEntry* +CacheMemory::getNullEntry() +{ + return NULL; +} \ No newline at end of file diff -r 878f2f30b12d src/mem/ruby/system/RubyPort.hh --- a/src/mem/ruby/system/RubyPort.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/RubyPort.hh Fri Jul 04 22:00:13 2014 -0500 @@ -82,6 +82,7 @@ MemSlavePort(const std::string &_name, RubyPort *_port, RubySystem*_system, bool _access_phys_mem, PortID id); void hitCallback(PacketPtr pkt); + void txnCallback(PacketPtr pkt, bool isImplicitAbort); void evictionCallback(const Address& address); protected: @@ -165,6 +166,7 @@ protected: void ruby_hit_callback(PacketPtr pkt); + void ruby_txn_callback(PacketPtr pkt, bool isImplicitAbort); void testDrainComplete(); void ruby_eviction_callback(const Address& address); diff -r 878f2f30b12d src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/RubyPort.cc Fri Jul 04 22:00:13 2014 -0500 @@ -225,7 +225,7 @@ // Check for pio requests and directly send them to the dedicated // pio port. - if (!isPhysMemAddress(pkt->getAddr())) { + if (!isPhysMemAddress(pkt->getAddr()) && !pkt->req->isTxn()) { assert(ruby_port->memMasterPort.isConnected()); DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n", pkt->getAddr()); @@ -243,8 +243,10 @@ // Save the port id to be used later to route the response pkt->setSrc(id); - assert(Address(pkt->getAddr()).getOffset() + pkt->getSize() <= - RubySystem::getBlockSizeBytes()); + if(!pkt->req->isTxn()) { + assert(Address(pkt->getAddr()).getOffset() + pkt->getSize() <= + RubySystem::getBlockSizeBytes()); + } // Submit the ruby request RequestStatus requestStatus = ruby_port->makeRequest(pkt); @@ -563,3 +565,72 @@ r.pioSlavePort.sendRangeChange(); } } + +void +RubyPort::ruby_txn_callback(PacketPtr pkt, bool isImplicitAbort) +{ + DPRINTF(RubyPort, "Txn callback for %s\n", pkt->cmdString()); + + // The packet has not yet been turned into a response + assert(pkt->isRequest()); + + // As it has not yet been turned around, the source field tells us + // which port it came from. + assert(pkt->getSrc() < slave_ports.size()); + + slave_ports[pkt->getSrc()]->txnCallback(pkt, isImplicitAbort); + + // + // If we had to stall the MemSlavePorts, wake them up because the sequencer + // likely has free resources now. + // + if (!retryList.empty()) { + // + // Record the current list of ports to retry on a temporary list before + // calling sendRetry on those ports. sendRetry will cause an + // immediate retry, which may result in the ports being put back on the + // list. Therefore we want to clear the retryList before calling + // sendRetry. + // + std::vector curRetryList(retryList); + + retryList.clear(); + + for (auto i = curRetryList.begin(); i != curRetryList.end(); ++i) { + DPRINTF(RubyPort, + "Sequencer may now be free. SendRetry to port %s\n", + (*i)->name()); + (*i)->sendRetry(); + } + } + + testDrainComplete(); +} + +void +RubyPort::MemSlavePort::txnCallback(PacketPtr pkt, bool isImplicitAbort) +{ + bool needsResponse = pkt->needsResponse(); + + DPRINTF(RubyPort, "Txn callback: isImplicitAbort=%d, xbegin=%d, xabort=%d, xend=%d, needsResponse = %d\n", + isImplicitAbort, pkt->req->isXbegin(), pkt->req->isXabort(), pkt->req->isXend(), needsResponse); + + if (needsResponse) { + pkt->makeTxnResponse(isImplicitAbort); + } + + // turn packet around to go back to requester if response expected + if (needsResponse) { + DPRINTF(RubyPort, "Sending packet back over port\n"); + // send next cycle + schedTimingResp(pkt, curTick() + g_system_ptr->clockPeriod()); + } else { + // ADDING DOUBLE DELETE DETECTION // + bool b = pkt->req->isTxn(); + if(b) + DPRINTF(RubyPort, "Deleting packet with pointer address7: %p\n", pkt); + // ENDING DOUBLE DELETE DETECTION // + delete pkt; + } + DPRINTF(RubyPort, "Txn callback done!\n"); +} \ No newline at end of file diff -r 878f2f30b12d src/mem/ruby/system/Sequencer.hh --- a/src/mem/ruby/system/Sequencer.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/Sequencer.hh Fri Jul 04 22:00:13 2014 -0500 @@ -84,6 +84,10 @@ const Cycles forwardRequestTime = Cycles(0), const Cycles firstResponseTime = Cycles(0)); + // callback to abort a transaction + void txnCallback(const Address& address, bool isWrite, + bool isImplicitAbort, int abortCode); + RequestStatus makeRequest(PacketPtr pkt); bool empty() const; int outstandingCount() const { return m_outstanding_count; } diff -r 878f2f30b12d src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/Sequencer.cc Fri Jul 04 22:00:13 2014 -0500 @@ -629,7 +629,20 @@ if (pkt->isRead()) { if (pkt->req->isInstFetch()) { primary_type = secondary_type = RubyRequestType_IFETCH; - } else { + } + else if(pkt->req->isTxn()) { + if(pkt->req->isXbegin()) { + primary_type = secondary_type = RubyRequestType_Txn_Start; + } + else if(pkt->req->isXabort()) { + primary_type = secondary_type = RubyRequestType_Txn_Abort; + } + else if(pkt->req->isXend()) { + primary_type = secondary_type = RubyRequestType_Txn_Commit; + } + + } + else { #if THE_ISA == X86_ISA uint32_t flags = pkt->req->getFlags(); bool storeCheck = flags & @@ -756,6 +769,69 @@ } void +Sequencer::txnCallback(const Address& address, bool isWrite, + bool isImplicitAbort, int abortCode) +{ + assert(address == line_address(address)); + SequencerRequest* request; + + DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n", + curTick(), m_version, "Seq", + "TxnCallBackDone", "", "", + address, 0); + + if(isWrite) { + assert(m_writeRequestTable.count(line_address(address))); + + RequestTable::iterator i = m_writeRequestTable.find(address); + assert(i != m_writeRequestTable.end()); + request = i->second; + + m_writeRequestTable.erase(i); + markRemoved(); + + // unblocking if any of the previous requests lead to blocking + if (request->m_type == RubyRequestType_Locked_RMW_Write) { + m_controller->unblock(address); + } + + assert((request->m_type == RubyRequestType_ST) || + (request->m_type == RubyRequestType_ATOMIC) || + (request->m_type == RubyRequestType_RMW_Read) || + (request->m_type == RubyRequestType_RMW_Write) || + (request->m_type == RubyRequestType_Load_Linked) || + (request->m_type == RubyRequestType_Store_Conditional) || + (request->m_type == RubyRequestType_Locked_RMW_Read) || + (request->m_type == RubyRequestType_Locked_RMW_Write) || + (request->m_type == RubyRequestType_FLUSH)); + } + else { + assert(m_readRequestTable.count(line_address(address))); + + RequestTable::iterator i = m_readRequestTable.find(address); + assert(i != m_readRequestTable.end()); + request = i->second; + + m_readRequestTable.erase(i); + markRemoved(); + + assert((request->m_type == RubyRequestType_LD) || + (request->m_type == RubyRequestType_IFETCH) || + (request->m_type == RubyRequestType_Txn_Start) || + (request->m_type == RubyRequestType_Txn_Commit) || + (request->m_type == RubyRequestType_Txn_Abort)); + } + + PacketPtr pkt = request->pkt; + + // copy abort code to pkt's data + uint64_t abc = abortCode; // changing to 8 bytes + memcpy(pkt->getPtr(), &abc, pkt->getSize()); + + ruby_txn_callback(pkt, isImplicitAbort); +} + +void Sequencer::regStats() { m_store_waiting_on_load diff -r 878f2f30b12d src/mem/ruby/system/TBETable.hh --- a/src/mem/ruby/system/TBETable.hh Wed Jul 02 13:19:13 2014 -0400 +++ b/src/mem/ruby/system/TBETable.hh Fri Jul 04 22:00:13 2014 -0500 @@ -54,6 +54,9 @@ ENTRY* lookup(const Address& address); + // addition for transaction + ENTRY* getNullEntry(); + // Print cache contents void print(std::ostream& out) const; @@ -114,6 +117,13 @@ return NULL; } +// addition for transactions +template +inline ENTRY* +TBETable::getNullEntry() +{ + return NULL; +} template inline void