diff -r ff116dd4f3e0 -r c399153c3c5a src/arch/arm/isa/formats/m5ops.isa --- a/src/arch/arm/isa/formats/m5ops.isa Thu May 19 20:37:31 2016 +0100 +++ b/src/arch/arm/isa/formats/m5ops.isa Fri May 20 12:33:15 2016 +0100 @@ -68,6 +68,13 @@ case 0x54: return new M5panic(machInst); case 0x5a: return new M5workbegin(machInst); case 0x5b: return new M5workend(machInst); + + // The following 5 ops are for McVerSi (sim/mcversi.hh) + case 0x62: return new M5marktestmemrange(machInst); + case 0x63: return new M5maketestthread(machInst); + case 0x64: return new M5verifyresetconflict(machInst); + case 0x65: return new M5verifyresetall(machInst); + case 0x66: return new M5barrierasync(machInst); } } ''' diff -r ff116dd4f3e0 -r c399153c3c5a src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh Thu May 19 20:37:31 2016 +0100 +++ b/src/cpu/o3/lsq_unit_impl.hh Fri May 20 12:33:15 2016 +0100 @@ -59,6 +59,7 @@ #include "debug/O3PipeView.hh" #include "mem/packet.hh" #include "mem/request.hh" +#include "sim/mcversi.hh" template LSQUnit::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, @@ -125,6 +126,11 @@ } if (inst->isStore()) { + // If McVerSi workload is running, pass observed value (what did we + // overwrite) to checker; otherwise this call does nothing. + McVerSi::updateObs(inst->instAddr(), inst->microPC(), inst->effAddr, + pkt->getPtr(), inst->effSize); + completeStore(state->idx); } } diff -r ff116dd4f3e0 -r c399153c3c5a src/cpu/o3/dyn_inst_impl.hh --- a/src/cpu/o3/dyn_inst_impl.hh Thu May 19 20:37:31 2016 +0100 +++ b/src/cpu/o3/dyn_inst_impl.hh Fri May 20 12:33:15 2016 +0100 @@ -46,6 +46,7 @@ #include "base/cp_annotate.hh" #include "cpu/o3/dyn_inst.hh" #include "sim/full_system.hh" +#include "sim/mcversi.hh" #include "debug/O3PipeView.hh" template @@ -173,6 +174,27 @@ template Fault +BaseO3DynInst::writeMem(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) +{ + if (this->isStore() && McVerSi::isValidInstPtr(this->instAddr())) { + // To check memory consistency efficiently, we want to know what a + // write overwrites (to tell which write happened before at this + // address). Once the write completes, the overwritten value is passed + // to the memory consistency model checker used in McVerSi. + // + // Prerequisite for using MEM_SWAP is, that it should not be treated + // any differently than a regular write, with the only difference being + // that the data being overwritten is returned with the response. + // AFAIK, this holds for classic and Ruby. + flags |= Request::MEM_SWAP; + } + + return BaseDynInst::writeMem(data, size, addr, flags, res); +} + +template +Fault BaseO3DynInst::completeAcc(PacketPtr pkt) { // @todo: Pretty convoluted way to avoid squashing from happening diff -r ff116dd4f3e0 -r c399153c3c5a src/cpu/o3/dyn_inst.hh --- a/src/cpu/o3/dyn_inst.hh Thu May 19 20:37:31 2016 +0100 +++ b/src/cpu/o3/dyn_inst.hh Fri May 20 12:33:15 2016 +0100 @@ -101,6 +101,10 @@ /** Initiates the access. Only valid for memory operations. */ Fault initiateAcc(); + /** Override of BaseDynInst::writeMem. */ + Fault writeMem(uint8_t *data, unsigned size, Addr addr, unsigned flags, + uint64_t *res); + /** Completes the access. Only valid for memory operations. */ Fault completeAcc(PacketPtr pkt); diff -r ff116dd4f3e0 -r c399153c3c5a src/cpu/o3/commit_impl.hh --- a/src/cpu/o3/commit_impl.hh Thu May 19 20:37:31 2016 +0100 +++ b/src/cpu/o3/commit_impl.hh Fri May 20 12:33:15 2016 +0100 @@ -67,6 +67,7 @@ #include "params/DerivO3CPU.hh" #include "sim/faults.hh" #include "sim/full_system.hh" +#include "sim/mcversi.hh" using namespace std; @@ -1264,6 +1265,16 @@ } DPRINTF(Commit, "Committing instruction with [sn:%lli] PC %s\n", head_inst->seqNum, head_inst->pcState()); + + if (head_inst->memData && head_inst->isLoad()) { + // In case we are running the McVerSi workload, pass the observed value + // to the checker. This does nothing if the McVerSi workload is not + // running. + McVerSi::updateObs(head_inst->instAddr(), head_inst->microPC(), + head_inst->effAddr, head_inst->memData, + head_inst->effSize); + } + if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); head_inst->traceData->setCPSeq(thread[tid]->numOp); diff -r ff116dd4f3e0 -r c399153c3c5a src/arch/arm/isa/insts/m5ops.isa --- a/src/arch/arm/isa/insts/m5ops.isa Thu May 19 20:37:31 2016 +0100 +++ b/src/arch/arm/isa/insts/m5ops.isa Fri May 20 12:33:15 2016 +0100 @@ -564,4 +564,148 @@ header_output += BasicDeclare.subst(m5workendIop) decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + + + #===============================================================# + # The following are for McVerSi (see sim/mcversi.hh) # + + m5marktestmemrangeCode = ''' + int n = 2; + uint64_t stride = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 4; + Addr addr_mask = getArgument(xc->tcBase(), n, 4, false); + PseudoInst::markTestMemRange(xc->tcBase(), R0, R1, stride, addr_mask); + ''' + + m5marktestmemrangeCode64 = ''' + int n = 2; + uint64_t stride = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 3; + Addr addr_mask = getArgument(xc->tcBase(), n, sizeof(Addr), false); + PseudoInst::markTestMemRange(xc->tcBase(), X0, X1, stride, addr_mask); + ''' + + m5marktestmemrangeIop = InstObjParams("m5marktestmemrange", "M5marktestmemrange", "PredOp", + { "code": m5marktestmemrangeCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5marktestmemrangeIop) + decoder_output += BasicConstructor.subst(m5marktestmemrangeIop) + exec_output += PredOpExecute.subst(m5marktestmemrangeIop) + + m5marktestmemrangeIop = InstObjParams("m5marktestmemrange", "M5marktestmemrange64", "PredOp", + { "code": m5marktestmemrangeCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5marktestmemrangeIop) + decoder_output += BasicConstructor.subst(m5marktestmemrangeIop) + exec_output += PredOpExecute.subst(m5marktestmemrangeIop) + + m5maketestthreadCode = ''' + uint64_t val = PseudoInst::makeTestThread(xc->tcBase(), R0, join32to64(R3, R2)); + R0 = bits(val, 31, 0); + R1 = bits(val, 63, 32); + ''' + + m5maketestthreadCode64 = ''' + X0 = PseudoInst::makeTestThread(xc->tcBase(), X0, X1); + ''' + + m5maketestthreadIop = InstObjParams("m5maketestthread", "M5maketestthread", "PredOp", + { "code": m5maketestthreadCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5maketestthreadIop) + decoder_output += BasicConstructor.subst(m5maketestthreadIop) + exec_output += PredOpExecute.subst(m5maketestthreadIop) + + m5maketestthreadIop = InstObjParams("m5maketestthread", "M5maketestthread64", "PredOp", + { "code": m5maketestthreadCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5maketestthreadIop) + decoder_output += BasicConstructor.subst(m5maketestthreadIop) + exec_output += PredOpExecute.subst(m5maketestthreadIop) + + m5verifyresetconflictCode = ''' + uint64_t val = PseudoInst::verifyResetConflict(xc->tcBase(), R0, join32to64(R3, R2)); + R0 = bits(val, 31, 0); + R1 = bits(val, 63, 32); + ''' + + m5verifyresetconflictCode64 = ''' + X0 = PseudoInst::verifyResetConflict(xc->tcBase(), X0, X1); + ''' + + m5verifyresetconflictIop = InstObjParams("m5verifyresetconflict", "M5verifyresetconflict", "PredOp", + { "code": m5verifyresetconflictCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5verifyresetconflictIop) + decoder_output += BasicConstructor.subst(m5verifyresetconflictIop) + exec_output += PredOpExecute.subst(m5verifyresetconflictIop) + + m5verifyresetconflictIop = InstObjParams("m5verifyresetconflict", "M5verifyresetconflict64", "PredOp", + { "code": m5verifyresetconflictCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5verifyresetconflictIop) + decoder_output += BasicConstructor.subst(m5verifyresetconflictIop) + exec_output += PredOpExecute.subst(m5verifyresetconflictIop) + + m5verifyresetallCode = ''' + uint64_t val = PseudoInst::verifyResetAll(xc->tcBase(), R0, join32to64(R3, R2)); + R0 = bits(val, 31, 0); + R1 = bits(val, 63, 32); + ''' + + m5verifyresetallCode64 = ''' + X0 = PseudoInst::verifyResetAll(xc->tcBase(), X0, X1); + ''' + + m5verifyresetallIop = InstObjParams("m5verifyresetall", "M5verifyresetall", "PredOp", + { "code": m5verifyresetallCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5verifyresetallIop) + decoder_output += BasicConstructor.subst(m5verifyresetallIop) + exec_output += PredOpExecute.subst(m5verifyresetallIop) + + m5verifyresetallIop = InstObjParams("m5verifyresetall", "M5verifyresetall64", "PredOp", + { "code": m5verifyresetallCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5verifyresetallIop) + decoder_output += BasicConstructor.subst(m5verifyresetallIop) + exec_output += PredOpExecute.subst(m5verifyresetallIop) + + m5barrierasyncCode = ''' + uint64_t val = PseudoInst::barrierAsync(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); + R0 = bits(val, 31, 0); + R1 = bits(val, 63, 32); + ''' + + m5barrierasyncCode64 = ''' + X0 = PseudoInst::barrierAsync(xc->tcBase(), X0, X1); + ''' + + m5barrierasyncIop = InstObjParams("m5barrierasync", "M5barrierasync", "PredOp", + { "code": m5barrierasyncCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5barrierasyncIop) + decoder_output += BasicConstructor.subst(m5barrierasyncIop) + exec_output += PredOpExecute.subst(m5barrierasyncIop) + + m5barrierasyncIop = InstObjParams("m5barrierasync", "M5barrierasync64", "PredOp", + { "code": m5barrierasyncCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5barrierasyncIop) + decoder_output += BasicConstructor.subst(m5barrierasyncIop) + exec_output += PredOpExecute.subst(m5barrierasyncIop) + + # End of McVerSi related ops # + #===============================================================# + }}; diff -r ff116dd4f3e0 -r c399153c3c5a src/sim/pseudo_inst.hh --- a/src/sim/pseudo_inst.hh Thu May 19 20:37:31 2016 +0100 +++ b/src/sim/pseudo_inst.hh Fri May 20 12:33:15 2016 +0100 @@ -88,6 +88,13 @@ void switchcpu(ThreadContext *tc); void workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid); void workend(ThreadContext *tc, uint64_t workid, uint64_t threadid); +void markTestMemRange(ThreadContext *tc, Addr vaddr_start, Addr vaddr_end, + uint64_t stride, Addr addr_mask); +uint64_t makeTestThread(ThreadContext *tc, Addr code_vaddr, uint64_t len); +uint64_t verifyResetConflict(ThreadContext *tc, Addr used_addrs, uint64_t len); +uint64_t verifyResetAll(ThreadContext *tc, Addr used_addrs, uint64_t len); +uint64_t barrierAsync(ThreadContext *tc, uint64_t num_threads, + uint64_t with_quiesce); } // namespace PseudoInst diff -r ff116dd4f3e0 -r c399153c3c5a src/sim/mcversi.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sim/mcversi.cc Fri May 20 12:33:15 2016 +0100 @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2014-2016 Marco Elver + * Copyright (c) 2016 The University of Edinburgh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Marco Elver + */ + +#include +#include +#include +#include +#include + +#include "base/misc.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "debug/McVerSi.hh" +#include "sim/mcversi.hh" +#include "sim/sim_exit.hh" + +namespace mc2lib { +namespace types { + +// Specialize types; this must only be done once in the entire codebase. +template +struct Types; +template <> +struct Types +{ + typedef uint16_t Poi; + typedef uint16_t Pid; + typedef ::Addr Addr; + + typedef Addr InstPtr; + +// See comment on TEST_SIZE below. +#if THE_ISA == ARM_ISA + typedef uint8_t WriteID; +#else // if THE_ISA == X86_ISA + typedef uint16_t WriteID; +#endif +}; + +} // namespace types +} // namespace mc2lib + +#include "mc2lib/codegen/compiler.hpp" +#include "mc2lib/codegen/rit.hpp" +#include "mc2lib/mcversi.hpp" +#include "mc2lib/memconsistency/cats.hpp" +#include "mc2lib/simplega.hpp" + +#if THE_ISA == ARM_ISA +#include "mc2lib/codegen/ops/armv7.hpp" +#else // if THE_ISA == X86_ISA +// Fallback option for unsupported ISAs; it goes without saying that using an +// unsupported ISA with this will not work. +#include "mc2lib/codegen/ops/x86_64.hpp" +#endif + +namespace { + +namespace mc = mc2lib::memconsistency; +namespace codegen = mc2lib::codegen; + +typedef std::mt19937_64 URNG; + +#if THE_ISA == ARM_ISA +typedef mc::cats::Arch_ARMv7 Arch; +typedef codegen::armv7::RandomFactory RandomFactory; +typedef codegen::armv7::MemOperation MemOperation; +typedef codegen::armv7::Return ReturnOp; +typedef codegen::Compiler + Compiler; +#else // if THE_ISA == X86_ISA +typedef mc::cats::Arch_TSO Arch; +typedef codegen::strong::RandomFactory RandomFactory; +typedef codegen::strong::MemOperation MemOperation; +typedef codegen::strong::Return ReturnOp; +typedef codegen::Compiler Compiler; +#endif + +typedef codegen::RandInstTest RIT; +typedef mc2lib::simplega::GenePool GenePool; + +void +writeDotGraph(const mc::cats::Architecture& arch, + const mc::cats::ExecWitness& ew, const mc::EventRel::Path& path, + const std::string& name) +{ + auto os = simout.create(name + ".dot"); + auto stream = os->stream(); + + *stream << "digraph " << name << " {\n"; + *stream << "init [style=invisible, label=\"\", width=0, height=0];\n"; + + const mc::Event* prev = nullptr; + for (const auto& e : path) { + const auto& from_write = + e.AnyType(arch.EventTypeRead()) ? ew.rf : ew.co; + // Use raw(), to keep graph limited (including transitive edges + // would blow up the graph). + for (const auto& w : from_write.Raw()) { + if (w.second.Contains(e)) { + *stream << "\"" << static_cast(w.first) + << "\" -> \"" << static_cast(e) + << "\";\n"; + } + } + + *stream << "\"" << (prev ? static_cast(*prev) : "init") + << "\" -> \"" << static_cast(e) << "\";\n"; + + prev = &e; + } + + *stream << "}\n"; + simout.close(os); +} + +void +writeDotGraph(const mc::cats::Architecture& arch, + const mc::cats::ExecWitness& ew, const mc::EventRel& rel, + const std::string& name) +{ + auto os = simout.create(name + ".dot"); + auto stream = os->stream(); + + *stream << "digraph " << name << " {\n"; + + const auto domain = rel.Domain(); + for (const auto& e1 : domain.Get()) { + const auto writes = rel.Reachable(e1); + for (const auto& e2 : writes.Get()) { + *stream << "\"" << static_cast(e1) << "\" -> \"" + << static_cast(e2) << "\";\n"; + } + } + + *stream << "}\n"; + simout.close(os); +} + +// This class implements the interface to mc2lib, and maintains all relevant +// state. The exposed interface (in mcversi.hh), does not rely on mc2lib, but +// rather just makes calls to McVerSi_State. This separation helps with keeping +// the overheads introduced by mc2lib to a minimum. +class McVerSi_State +{ + public: + static constexpr double MCVERSI_P_BFA = 0.05; + static constexpr double MCVERSI_P_USEL = 0.2; + static constexpr float GP_MUTATION_PROB = 0.005f; + static constexpr size_t GP_MATES = 2; + static constexpr size_t GP_POPULATION_SIZE = 100; // 1 for pseudo-random + static constexpr size_t GP_TOURNAMENT_SIZE = 2; + static constexpr size_t GP_SELECTION_SIZE = GP_TOURNAMENT_SIZE * GP_MATES; + + // With WriteID size of only 1 byte, and a bias of slightly fewer writes + // than other operations, 500 operations is a reasonable limit without + // incurring frequent WriteID exhaustion. Increase WriteID size to increase + // test-size (e.g. in McVerSi paper WriteID size used was 2 and test size + // of 1000; prerequesite is that the code generation backend supports the + // selected WriteID size). +#if THE_ISA == ARM_ISA + static constexpr size_t TEST_SIZE = 500; +#else // if THE_ISA == X86_ISA + static constexpr size_t TEST_SIZE = 1000; +#endif + + void setRanges(mc2lib::types::Pid min_pid, mc2lib::types::Pid max_pid, + mc2lib::types::Addr min_vaddr, + mc2lib::types::Addr max_vaddr, size_t stride, + mc2lib::types::Addr addr_mask) + { + DPRINTF(McVerSi, + "McVerSi::setRanges(%d, %d, 0x%x, 0x%x, 0x%x, 0x%x)\n", + min_pid, max_pid, min_vaddr, max_vaddr, stride, addr_mask); + + randomFactory.Reset(min_pid, max_pid, min_vaddr, max_vaddr, stride); + + if (addr_mask) { + compiler.evts()->set_addr_mask(addr_mask); + } + + updateCoverage(); + initGenePool(); + } + + uint64_t emitThread(mc2lib::types::Pid pid, mc2lib::types::InstPtr base, + void* code, size_t len) + { + DPRINTF(McVerSi, "McVerSi::emitThread(%d, 0x%x, ...)\n", pid, base); + + size_t emit_len = compiler.Emit(pid, base, code, len); + emit_len += compiler.Emit(base + emit_len, &returnOp, + static_cast(code) + emit_len, + len - emit_len, nullptr, nullptr); + + if (compiler.evts()->Exhausted()) { + warn("McVerSi_State: exhausted!\n"); + } + + // events accumulates all events that compiler emits; assume that all + // threads are emitted before the first updateObs is called. + nonInitEventCount = execWitness.events.size(); + +#ifndef NDEBUG + // Write out generated code for this thread. + auto os = simout.create(csprintf("mcversi_code-%d.bin", pid)); + os->stream()->write((char*)code, emit_len); + simout.close(os); +#endif + + return emit_len; + } + + void updateObs(mc2lib::types::InstPtr ip, int part, + mc2lib::types::Addr vaddr, + const mc2lib::types::WriteID* from_id, size_t size) + { + if (isValidAddr(vaddr) && isValidInstPtr(ip)) { + DPRINTF(McVerSi, "McVerSi::updateObs(0x%x, %d, 0x%x, 0x%x, %d)\n", + ip, part, vaddr, *from_id, size); + + compiler.UpdateObs(ip, part, vaddr, from_id, size); + } + } + + bool verify() + { + DPRINTF(McVerSi, "McVerSi::verify()\n"); + + if (outstanding() > 0) { + panic("Outstanding memory operations (%d) in flight!\n", + outstanding()); + return false; + } + + // Temporarily set required properties; outstanding() relies on + // properties unset. + execWitness.po.set_props(mc::EventRel::kTransitiveClosure); + execWitness.co.set_props(mc::EventRel::kTransitiveClosure); + + // Instantiate memoization proxy and checker. + mc::cats::ArchProxy arch_proxy(&arch); + arch_proxy.Memoize(execWitness); + auto checker = arch_proxy.MakeChecker(&execWitness); + + mc::EventRel::Path cyclic; + + // Verify! + try { + checker->valid_exec(&cyclic); + } catch (const mc::Error& e) { + execWitness.co.clear_props(); + + writeDotGraph(arch, execWitness, execWitness.rf, "mcversi_rf"); + writeDotGraph(arch, execWitness, execWitness.co, "mcversi_co"); + writeDotGraph(arch, execWitness, cyclic, "mcversi_cycle"); + + warn("Memory consistency violation: %s!\n", e.what()); + exitSimLoop("MCM bug found!", curTick() + 1); + } + + execWitness.po.clear_props(); + execWitness.co.clear_props(); + + return true; + } + + std::vector resetAll(size_t block_size_bytes) + { + assert(outstanding() == 0); + ++resetCount; + + DPRINTF(McVerSi, "%llu: McVerSi::resetAll(): reset_count = %d, " + "iteration = %d, sys_coverage = %.2f%%\n", + curTick(), getResetCount(), getIteration(), + getSystemCoverage() * 100.0f); + + observations |= execWitness.rf; + observations |= execWitness.co; + + auto result = getUsedAddrs(block_size_bytes); + updateCoverage(); + evalCurrentTest(); + nextTest(); + + iteration = 1; + + return result; + } + + std::vector resetConflict(size_t block_size_bytes) + { + DPRINTF(McVerSi, "McVerSi::resetConflict()\n"); + + observations |= execWitness.rf; + observations |= execWitness.co; + + auto result = getUsedAddrs(block_size_bytes); + execWitness.rf.Clear(); + execWitness.co.Clear(); + + ++iteration; + + return result; + } + + bool isValidInstPtr(mc2lib::types::InstPtr ip) const + { + const auto op_ptr = compiler.IpToOp(ip); + return op_ptr != nullptr && op_ptr != &returnOp; + } + + bool isValidAddr(mc2lib::types::Addr vaddr) const + { + return randomFactory.min_addr() <= vaddr && + randomFactory.max_addr() >= vaddr; + } + + size_t outstanding() const + { + return nonInitEventCount - + (execWitness.rf.size() + execWitness.co.size()); + } + + size_t getResetCount() const { return resetCount; } + + size_t getIteration() const { return iteration; } + + RandomFactory* getRandomFactory() { return &randomFactory; } + + Compiler* getCompiler() { return &compiler; } + + // Get singleton instance. + static McVerSi_State* get() + { + if (!instance) { + instance.reset(new McVerSi_State); + } + + return instance.get(); + } + + private: + // Only supports singleton usage. + McVerSi_State() + : urng(1234) + , randomFactory(0, 0, 0, 0) + , genePool(nullptr) + , compiler(std::unique_ptr( + new codegen::EvtStateCats(&execWitness, &arch))) + , nonInitEventCount(0) + , resetCount(0) + , iteration(1) + , crossoverMutate(MCVERSI_P_USEL, MCVERSI_P_BFA) + { + } + + // Disallow copy and assignment. + McVerSi_State(const McVerSi_State& rhs) = delete; + McVerSi_State& operator=(McVerSi_State& rhs) = delete; + + void updateCoverage() + { + // FIXME: Implement coverage computation here and update current + // test's individual coverage (to be returned by getTestCoverage). + // Although using without coverage is also useful, as results are + // likely better than random tests---especially as there is a + // correlation between the racyness of a test and the corner cases + // that are triggered---however without coverage we're effectively + // "flying blind", and very likely some corner cases won't be + // explored or will take longer to find. + // + // For example: in the McVerSi paper, the coherence protocol's + // covered state transitions (by a single test) are used as coverage; + // each test's reported coverage changes over time to focus on + // uncovered transitions in order to avoid local maxima. + } + + float getTestCoverage() + { + // FIXME: return current test's individual coverage. + return 0.1f; + } + + float getSystemCoverage() + { + // FIXME: Get system's total coverage. + return 0.1f; + } + + void setThreads(RIT* rit) + { + nonInitEventCount = 0; + + // resets execWitness and arch + compiler.Reset(rit->threads()); + observations.Clear(); + } + + void initGenePool() + { + GenePool::Population initial_population; + + for (size_t i = 0; i < GP_POPULATION_SIZE; ++i) { + initial_population.emplace_back(urng, &randomFactory, TEST_SIZE); + } + + genePool.reset(new GenePool(initial_population, GP_MUTATION_PROB)); + + if (GP_POPULATION_SIZE == 1) { // Pseudo-random + curSelection = genePool->SelectAll(); + } else { + curSelection = genePool->SelectUniform(urng, GP_SELECTION_SIZE); + } + + curTest = curSelection.begin(); + + assert(curTest != curSelection.end()); + setThreads(*curTest); + } + + void evalCurrentTest() + { + // NDt -- average non-determinism of test. + float non_det_test = static_cast(observations.size()) / + static_cast(nonInitEventCount); + + // Compute fitaddrs. + assert((*curTest)->fitaddrsptr()->Empty()); + const auto obs_inv = observations.Inverse(); + for (const auto& obs : obs_inv.Raw()) { + if (obs.second.size() > // NDe + static_cast(non_det_test + 0.5f)) { + (*curTest)->fitaddrsptr()->Insert(obs.first.addr); + } + } + + (*curTest)->set_fitness(getTestCoverage()); + + DPRINTF(McVerSi, "%llu: McVerSi::evalCurrentTest(): " + "NDt = %.4f, fitness = %.4f\n", + curTick(), non_det_test, (*curTest)->Fitness()); + } + + void nextTest() + { + assert(genePool != nullptr); + + for (;;) { + curTest++; + + if (curTest == curSelection.end()) { + DPRINTF(McVerSi, "McVerSi::nextTest(): steps = %d, " + "avg_fitness = %.4f\n", + genePool->steps(), averageFitness()); + + if (GP_POPULATION_SIZE == 1) { + // Pseudo-random + assert(genePool->GetPtr()->size() == 1); + genePool->GetPtr()->clear(); + + // Make new test + genePool->GetPtr()->emplace_back(urng, &randomFactory, + TEST_SIZE); + curSelection = genePool->SelectAll(); + assert(curSelection.size() == 1); + } else { + GenePool::Selection mate_selection; + + // + // Implements Steady-State GA, with Tournament Selection, + // the oldest genome being replaced by the child. + // + + // Select tournament winners. + for (size_t i = 0; i < curSelection.size(); + i += GP_TOURNAMENT_SIZE) { + auto winner = curSelection[i]; + + for (size_t j = 1; j < GP_TOURNAMENT_SIZE; ++j) { + if (*(curSelection[j]) < *winner) { + winner = curSelection[j]; + } + } + + mate_selection.push_back(winner); + } + + assert(mate_selection.size() == GP_MATES); + + // Delete oldest + mate_selection.push_back(&genePool->GetPtr()->front()); + + genePool->Step(urng, crossoverMutate, mate_selection, + GP_MATES, GP_MATES); + + assert(genePool->population_size() == GP_POPULATION_SIZE); + curSelection = + genePool->SelectUniform(urng, GP_SELECTION_SIZE); + } + + curTest = curSelection.begin(); + } + + if ((*curTest)->Fitness() > 0.0f || + !(*curTest)->fitaddrs().Empty()) { + // Do not re-evaluate. + continue; + } + + break; + } + + assert((*curTest)->fitaddrs().Empty()); + assert(curTest != curSelection.end()); + assert((*curTest)->Get().size() == TEST_SIZE); + + setThreads(*curTest); + } + + float averageFitness() const + { + float sum = 0.0f; + + size_t count = 0; + for (const auto& genome : genePool->Get()) { + if (genome.Fitness() > 0.0f) { + sum += genome.Fitness(); + ++count; + } + } + + return sum / static_cast(count); + } + + // Copy addresses used by current test, so that the guest-workload does not + // have to iterate through entire allocated memory (significant performance + // improvements if we allocate several MB of memory to be used by tests). + std::vector getUsedAddrs(size_t block_size_bytes) + { + std::unordered_set used; + + for (const auto& obs : observations.Raw()) { + auto addr = + obs.first.addr - (obs.first.addr % block_size_bytes); + + // Events store only the masked out address for the set of + // possible synonyms. Use the lowest address as base. + Addr base = + randomFactory.min_addr() & (~compiler.evts()->addr_mask()); + + used.insert(base + addr); + } + + std::vector result(used.begin(), used.end()); + // end marker + result.push_back(0); + + return result; + } + + private: + ReturnOp returnOp; + + URNG urng; + RandomFactory randomFactory; + + // Gene-pool initialized lazily (on first use). + std::unique_ptr genePool; + + GenePool::Selection curSelection; + GenePool::Selection::iterator curTest; + + mc::cats::ExecWitness execWitness; + + Arch arch; + Compiler compiler; + + // Observations across entire test-run. + mc::EventRel observations; + + // The number of events without the initial events being generated on the + // fly. + size_t nonInitEventCount; + + // The number of tests run. + size_t resetCount; + + // Current iteration. + size_t iteration; + + mc2lib::mcversi::CrossoverMutate crossoverMutate; + + // Singleton instance. + static std::unique_ptr instance; +}; + +constexpr double McVerSi_State::MCVERSI_P_BFA; +constexpr double McVerSi_State::MCVERSI_P_USEL; +constexpr float McVerSi_State::GP_MUTATION_PROB; +constexpr size_t McVerSi_State::GP_MATES; +constexpr size_t McVerSi_State::GP_POPULATION_SIZE; +constexpr size_t McVerSi_State::GP_TOURNAMENT_SIZE; +constexpr size_t McVerSi_State::GP_SELECTION_SIZE; +constexpr size_t McVerSi_State::TEST_SIZE; +std::unique_ptr McVerSi_State::instance; + +} // namespace + +namespace McVerSi { + +void +setRanges(unsigned min_pid, unsigned max_pid, Addr min_vaddr, Addr max_vaddr, + size_t stride, Addr addr_mask) +{ + McVerSi_State::get()->setRanges(min_pid, max_pid, min_vaddr, max_vaddr, + stride, addr_mask); +} + +uint64_t +emitThread(unsigned pid, Addr base, void* code, size_t len) +{ + return McVerSi_State::get()->emitThread(pid, base, code, len); +} + +bool +verify() +{ + return McVerSi_State::get()->verify(); +} + +std::vector +resetAll(size_t block_size_bytes) +{ + return McVerSi_State::get()->resetAll(block_size_bytes); +} + +std::vector +resetConflict(size_t block_size_bytes) +{ + return McVerSi_State::get()->resetConflict(block_size_bytes); +} + +void +forEachAddrRangeOnce(std::function fun) +{ + // RandomFactory is oblivious to addr-mask, but since this function + // should only be used to zero out test memory (see pseudo_inst.cc), we + // do not want to iterate through virtual address synonyms. + Addr max_max_masked = 0; + McVerSi_State::get()->getRandomFactory()->for_each_AddrRange( + [&fun, &max_max_masked](Addr min, Addr max) { + auto max_masked = + (max & McVerSi_State::get()->getCompiler()->evts()->addr_mask()); + if (max_max_masked < max_masked) { + max_max_masked = max_masked; + fun(min, max); + } + }); +} + +bool +isValidInstPtr(Addr ip) +{ + return McVerSi_State::get()->isValidInstPtr(ip); +} + +bool +isValidAddr(Addr vaddr) +{ + return McVerSi_State::get()->isValidAddr(vaddr); +} + +void +updateObs(Addr ip, int part, Addr vaddr, const void* from_id, size_t size) +{ + McVerSi_State::get()->updateObs( + ip, part, vaddr, static_cast(from_id), + size); +} + +} // namespace McVerSi diff -r ff116dd4f3e0 -r c399153c3c5a src/sim/mcversi.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sim/mcversi.hh Fri May 20 12:33:15 2016 +0100 @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2014-2016 Marco Elver + * Copyright (c) 2016 The University of Edinburgh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Marco Elver + */ + +#ifndef __SIM_MCVERSI_HH__ +#define __SIM_MCVERSI_HH__ + +#include +#include + +#include "base/types.hh" + +/** + * This implements the simulator-specific aspects of the McVerSi framework, + * which relies on ext/mc2lib (simulator-independent portion, which provides + * memory consistency models and checker, code generator, etc.). + * + * This interface is primarily to be used by the pseudo instructions, that are + * used by the guest-workload to drive the test-generation, execution and + * verification loop. + * + * For more details see the paper (available from + * http://ac.marcoelver.com/research/mcversi): + * + * Marco Elver and Vijay Nagarajan. "McVerSi: A Test Generation Framework for + * Fast Memory Consistency Verification in Simulation." In IEEE International + * Symposium on High Performance Computer Architecture (HPCA). Barcelona, + * Spain, March 2016. + */ +namespace McVerSi { + +/** + * Configures the test generator. + * + * @param min_pid Minimum processor/thread ID. + * @param max_pid Maximum processor/thread ID. + * @param min_vaddr Minimum virtual address. + * @param max_vaddr Maximum virtual address. + * @param addr_mask Address mask; use to let test generator and checker know + * which addresses (synonyms) map to same physical address. + */ +void setRanges(unsigned min_pid, unsigned max_pid, Addr min_vaddr, + Addr max_vaddr, size_t stride, Addr addr_mask); + +/** + * Emit code for thread. + * + * @param pid The processor/thread ID for which to emit code. + * @param base The code offset. + * @param[out] code Where code generator should write the code. + * @param len Maximum length of to be emitted code. + * @return Bytes written to code. + */ +uint64_t emitThread(unsigned pid, Addr base, void* code, size_t len); + +/** + * Verify execution after test iteration. + * + * @return Valid execution or not. + */ +bool verify(); + +/** + * Reset all relations (static orders and conflict orders of checker), set up + * next test and provide information of which addresses were used by the test + * to speed up resetting (in case of guest side operation needed for reset, + * e.g. cache flush instructions). + * + * @param block_size_bytes The cache block size of the system. + * @return all used addresses of the test modulo block_size_bytes. + */ +std::vector resetAll(size_t block_size_bytes); + +/** + * Reset only conflict order relations (but keep static relations), as the same + * test is to be run again. nd provide information of which addresses were used + * by the test. + * + * @param block_size_bytes The cache block size of the system. + * @return all used addresses of the test modulo block_size_bytes. + */ +std::vector resetConflict(size_t block_size_bytes); + +/** + * Iterators over all address ranges used in the test. Used for host-assisted + * resetting of test memory. + * + * @param fun Function to be called with address range. + */ +void forEachAddrRangeOnce(std::function fun); + +/** + * Check if instruction belongs to a test. + * + * @param ip The instruction pointer of the instruction. + * @return True if instruction is part of current test, false otherwise. + */ +bool isValidInstPtr(Addr ip); + +/** + * Check if virtual memory address belongs to a test. + * + * @param vaddr The virtual memory address. + * @param True if address if part of current test, false otherwise. + */ +bool isValidAddr(Addr vaddr); + +/** + * Update observation. + * + * @param ip The instruction pointer for this observation. + * @param part The part of the instruction (microcode counter), e.g. for RMWs. + * @param from_id Pointer to observed bytes. + * @param size Size of observed bytes. + */ +void updateObs(Addr ip, int part, Addr vaddr, const void* from_id, size_t size); + +} // namespace McVerSi + +#endif // __SIM_MCVERSI_HH__ diff -r ff116dd4f3e0 -r c399153c3c5a src/sim/SConscript --- a/src/sim/SConscript Thu May 19 20:37:31 2016 +0100 +++ b/src/sim/SConscript Fri May 20 12:33:15 2016 +0100 @@ -69,6 +69,7 @@ Source('linear_solver.cc') Source('system.cc') Source('dvfs_handler.cc') +Source('mcversi.cc') if env['TARGET_ISA'] != 'null': SimObject('InstTracer.py') @@ -90,6 +91,7 @@ DebugFlag('IPR') DebugFlag('Interrupt') DebugFlag('Loader') +DebugFlag('McVerSi') DebugFlag('PseudoInst') DebugFlag('Stack') DebugFlag('SyscallBase') diff -r ff116dd4f3e0 -r c399153c3c5a util/m5/m5ops.h --- a/util/m5/m5ops.h Thu May 19 20:37:31 2016 +0100 +++ b/util/m5/m5ops.h Fri May 20 12:33:15 2016 +0100 @@ -78,6 +78,12 @@ #define syscall_func 0x60 // Reserved for user #define pagefault_func 0x61 // Reserved for user +#define mark_test_mem_range 0x62 +#define make_test_thread 0x63 +#define verify_reset_conflict 0x64 +#define verify_reset_all 0x65 +#define barrier_async 0x66 + // These operations are for critical path annotation #define annotate_func 0x55 #define an_bsm 0x1 @@ -121,7 +127,12 @@ M5OP(m5_addsymbol, addsymbol_func, 0); \ M5OP(m5_panic, panic_func, 0); \ M5OP(m5_work_begin, work_begin_func, 0); \ - M5OP(m5_work_end, work_end_func, 0); + M5OP(m5_work_end, work_end_func, 0); \ + M5OP(m5_mark_test_mem_range, mark_test_mem_range, 0); \ + M5OP(m5_make_test_thread, make_test_thread, 0); \ + M5OP(m5_verify_reset_conflict, verify_reset_conflict, 0); \ + M5OP(m5_verify_reset_all, verify_reset_all, 0); \ + M5OP(m5_barrier_async, barrier_async, 0); #define FOREACH_M5_ANNOTATION \ M5_ANNOTATION(m5a_bsm, an_bsm); \ diff -r ff116dd4f3e0 -r c399153c3c5a util/m5/m5op.h --- a/util/m5/m5op.h Thu May 19 20:37:31 2016 +0100 +++ b/util/m5/m5op.h Fri May 20 12:33:15 2016 +0100 @@ -62,6 +62,13 @@ void m5_work_begin(uint64_t workid, uint64_t threadid); void m5_work_end(uint64_t workid, uint64_t threadid); +// McVerSi +void m5_mark_test_mem_range(void *start, void *end, uint64_t stride, void *addr_mask); +uint64_t m5_make_test_thread(void *code, uint64_t len); +uint64_t m5_verify_reset_conflict(void **used_addrs, uint64_t len); +uint64_t m5_verify_reset_all(void **used_addrs, uint64_t len); +uint64_t m5_barrier_async(uint64_t num_threads, uint64_t with_quiesce); + // These operations are for critical path annotation void m5a_bsm(char *sm, const void *id, int flags); void m5a_esm(char *sm); diff -r ff116dd4f3e0 -r c399153c3c5a util/m5/m5op_x86.S --- a/util/m5/m5op_x86.S Thu May 19 20:37:31 2016 +0100 +++ b/util/m5/m5op_x86.S Fri May 20 12:33:15 2016 +0100 @@ -83,3 +83,11 @@ TWO_BYTE_OP(m5_panic, panic_func) TWO_BYTE_OP(m5_work_begin, work_begin_func) TWO_BYTE_OP(m5_work_end, work_end_func) + +#ifdef M5OP_ADDR +TWO_BYTE_OP(m5_mark_test_mem_range, mark_test_mem_range) +TWO_BYTE_OP(m5_make_test_thread, make_test_thread) +TWO_BYTE_OP(m5_verify_reset_conflict, verify_reset_conflict) +TWO_BYTE_OP(m5_verify_reset_all, verify_reset_all) +TWO_BYTE_OP(m5_barrier_async, barrier_async) +#endif diff -r ff116dd4f3e0 -r c399153c3c5a src/sim/pseudo_inst.cc --- a/src/sim/pseudo_inst.cc Thu May 19 20:37:31 2016 +0100 +++ b/src/sim/pseudo_inst.cc Fri May 20 12:33:15 2016 +0100 @@ -67,6 +67,7 @@ #include "params/BaseCPU.hh" #include "sim/full_system.hh" #include "sim/initparam_keys.hh" +#include "sim/mcversi.hh" #include "sim/process.hh" #include "sim/pseudo_inst.hh" #include "sim/serialize.hh" @@ -211,6 +212,23 @@ m5PageFault(tc); break; + /* McVerSi */ + case 0x62: + markTestMemRange(tc, args[0], args[1], args[2], args[3]); + break; + + case 0x63: + return makeTestThread(tc, args[0], args[1]); + + case 0x64: + return verifyResetConflict(tc, args[0], args[1]); + + case 0x65: + return verifyResetAll(tc, args[0], args[1]); + + case 0x66: + return barrierAsync(tc, args[0], args[1]); + default: warn("Unhandled m5 op: 0x%x\n", func); break; @@ -765,4 +783,212 @@ } } +// Host-assisted 0'ing of test memory for McVerSi. +static void +zeroTestMem(ThreadContext *tc) +{ + McVerSi::forEachAddrRangeOnce([tc](Addr min_addr, Addr max_addr) { + size_t test_mem_size = max_addr - min_addr + 1; + char *buf = new char[test_mem_size]; + memset(buf, 0, test_mem_size); + CopyIn(tc, min_addr, buf, test_mem_size); + delete [] buf; + }); +} + +void +markTestMemRange(ThreadContext *tc, Addr vaddr_start, Addr vaddr_end, + uint64_t stride, Addr addr_mask) +{ + DPRINTF(PseudoInst, + "PseudoInst::markTestMemRange(0x%x, 0x%x, 0x%x, 0x%x)\n", + vaddr_start, vaddr_end, stride, addr_mask); + + assert(tc->getSystemPtr()->numContexts() != 0); + McVerSi::setRanges(0, tc->getSystemPtr()->numContexts() - 1, + vaddr_start, vaddr_end, stride, addr_mask); + + zeroTestMem(tc); +} + +uint64_t +makeTestThread(ThreadContext *tc, Addr code_vaddr, uint64_t len) +{ + DPRINTF(PseudoInst, "PseudoInst::makeTestThread(0x%x, 0x%x)\n", + code_vaddr, len); + if (!FullSystem) { + panicFsOnlyPseudoInst("makeTestThread"); + return 0; + } + + auto cpu_id = tc->getCpuPtr()->cpuId(); + char *buf = new char[len]; + uint64_t result = McVerSi::emitThread(cpu_id, code_vaddr, buf, len); + CopyIn(tc, code_vaddr, buf, result); + delete [] buf; + + return result; +} + +uint64_t +verifyResetConflict(ThreadContext *tc, Addr used_addrs, uint64_t len) +{ + DPRINTF(PseudoInst, "PseudoInst::verifyResetConflict(0x%x, 0x%x)\n", + used_addrs, len); + + if (!McVerSi::verify()) + return 0; + + auto used = McVerSi::resetConflict(tc->getSystemPtr()->cacheLineSize()); + size_t used_bytes = used.size() * sizeof(Addr); + panic_if(used_bytes > len, "Guest did not allocate enough memory!"); + CopyIn(tc, used_addrs, used.data(), used_bytes); + + zeroTestMem(tc); + + return 1; +} + +uint64_t +verifyResetAll(ThreadContext *tc, Addr used_addrs, uint64_t len) +{ + DPRINTF(PseudoInst, "PseudoInst::verifyResetAll(0x%x, 0x%x)\n", + used_addrs, len); + + if (!McVerSi::verify()) + return 0; + + auto used = McVerSi::resetAll(tc->getSystemPtr()->cacheLineSize()); + size_t used_bytes = used.size() * sizeof(Addr); + panic_if(used_bytes > len, "Guest did not allocate enough memory!"); + CopyIn(tc, used_addrs, used.data(), used_bytes); + + zeroTestMem(tc); + + return 1; +} + +// +// This is a non-blocking barrier implementation (host-assisted barrier), which +// is supposed to be used by guest threads to synchronize with very high +// precision and much faster compared to using pthread's barrier (or similar). +// +// It provides support for quiescing a CPU that is waiting, but appears to no +// longer work properly (very fragile, with possible deadlocks) at present. +// +// The main user of this is the McVerSi guest-workload. +// +uint64_t +barrierAsync(ThreadContext *tc, uint64_t num_threads, uint64_t with_quiesce) +{ + static std::vector flags; + static uint64_t num_threads_last = 0; + static uint64_t count_notarrived = 0; + static uint64_t count_departed = 0; + static int last_cpu_id = -1; + + auto num_contexts = tc->getSystemPtr()->numContexts(); + auto cpu_id = tc->getCpuPtr()->cpuId(); + + DPRINTF(PseudoInst, "PseudoInst::barrierAsync(%d, %d): cpu_id = %d, " + "notarrived = %d, departed = %d, last_cpu_id = %d\n", + num_threads, with_quiesce, cpu_id, count_notarrived, + count_departed, last_cpu_id); + + if (flags.size() == 0 || num_threads_last != num_threads) { + // First time barrierAsync called or changed number of threads. + + assert(num_contexts != 0); + assert(num_threads > 0 && num_threads <= num_contexts); + assert(count_notarrived == num_threads_last); + assert(count_departed == 0); + assert(last_cpu_id == -1); + + num_threads_last = num_threads; + + // Initialize flags + flags.resize(num_contexts, false); + for (size_t i = 0; i < flags.size(); ++i) { + flags[i] = false; + } + + count_notarrived = num_threads; + + assert(flags.size() == num_contexts); + } + + assert(count_notarrived <= num_threads); + assert(count_departed <= num_threads - 1); + + // Quiesce policy. + with_quiesce = with_quiesce + ? (num_contexts * 3) + : 0 // count_notarrived + ; + + if (flags[cpu_id]) { + if (count_notarrived == 0) { + // If all threads arrive at barrier, unset thread's flag and signal + // barrier has been left (count_departed). + flags[cpu_id] = false; + ++count_departed; + + if (count_departed == num_threads - 1) { + // All but last have departed, wake up last. + wakeCPU(tc, last_cpu_id); + } + + return 0; + } + + if (with_quiesce) { + quiesceCycles(tc, with_quiesce); + } + + return 1; + } else if (count_notarrived == 0) { + if (last_cpu_id == cpu_id) { + if (count_departed == num_threads - 1) { + // All threads except the last one to have arrived at barrier + // have left barrier -> reset. + count_notarrived = num_threads; + count_departed = 0; + last_cpu_id = -1; + + return 0; + } + + // Last thread must keep waiting. + } else { + // Can reach if a thread left and re-entered barrier. + } + + if (with_quiesce) { + quiesceCycles(tc, with_quiesce); + } + + return 1; + } + + if (--count_notarrived != 0) { + // Last thread to arrive at barrier does not set flag. + flags[cpu_id] = true; + } else { + // Wake all other CPUs in case they went to sleep. + for (int i = 0; i < num_contexts; ++i) { + if (i != cpu_id) { + wakeCPU(tc, i); + } + } + + last_cpu_id = cpu_id; + } + + if (with_quiesce) { + quiesceCycles(tc, with_quiesce); + } + + return 1; +} + } // namespace PseudoInst diff -r ff116dd4f3e0 -r c399153c3c5a src/arch/arm/isa/formats/aarch64.isa --- a/src/arch/arm/isa/formats/aarch64.isa Thu May 19 20:37:31 2016 +0100 +++ b/src/arch/arm/isa/formats/aarch64.isa Fri May 20 12:33:15 2016 +0100 @@ -2027,6 +2027,14 @@ case 0x54: return new M5panic(machInst); case 0x5a: return new M5workbegin64(machInst); case 0x5b: return new M5workend64(machInst); + + // The following 5 ops are for McVerSi (sim/mcversi.hh) + case 0x62: return new M5marktestmemrange64(machInst); + case 0x63: return new M5maketestthread64(machInst); + case 0x64: return new M5verifyresetconflict64(machInst); + case 0x65: return new M5verifyresetall64(machInst); + case 0x66: return new M5barrierasync64(machInst); + default: return new Unknown64(machInst); } }