# Node ID cc93e6609fc3170ccded6612d24ff18478152b82 # Parent 5dd0809b96627bf398900c797d0dc2f128e1201e diff --git a/configs/common/Options.py b/configs/common/Options.py --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -77,11 +77,8 @@ parser.add_option("--cpu-clock", action="store", type="string", default='2GHz', help="Clock for blocks running at CPU speed") - parser.add_option("--smt", action="store_true", default=False, - help = """ - Only used if multiple programs are specified. If true, - then the number of threads per cpu is same as the - number of programs.""") + parser.add_option("-t", "--num-threads", type="int", default=1, + help = """Number of SMT hardware threads""") # Memory Options parser.add_option("--list-mem-types", diff --git a/configs/example/se.py b/configs/example/se.py --- a/configs/example/se.py +++ b/configs/example/se.py @@ -88,8 +88,7 @@ if options.options != "": pargs = options.options.split(';') - idx = 0 - for wrkld in workloads: + for (idx, wrkld) in enumerate(workloads): process = LiveProcess() process.executable = wrkld process.cwd = os.getcwd() @@ -111,13 +110,8 @@ process.errout = errouts[idx] multiprocesses.append(process) - idx += 1 - if options.smt: - assert(options.cpu_type == "detailed") - return multiprocesses, idx - else: - return multiprocesses, 1 + return multiprocesses parser = optparse.OptionParser() @@ -134,12 +128,13 @@ sys.exit(1) multiprocesses = [] -numThreads = 1 +numThreads = options.num_threads +np = options.num_cpus if options.bench: apps = options.bench.split("-") - if len(apps) != options.num_cpus: - print "number of benchmarks not equal to set num_cpus!" + if len(apps) != numThreads * np: + print "number of benchmarks not equal to hardware execution contexts!" sys.exit(1) for app in apps: @@ -159,20 +154,17 @@ buildEnv['TARGET_ISA'], app) sys.exit(1) elif options.cmd: - multiprocesses, numThreads = get_processes(options) + multiprocesses = get_processes(options) + if numThreads * np < len(multiprocesses): + print "number of threads is less than the number of binaries!" + sys.exit(1) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) - (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads -# Check -- do not allow SMT with multiple CPUs -if options.smt and options.num_cpus > 1: - fatal("You cannot use SMT with multiple CPUs!") - -np = options.num_cpus system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], mem_mode = test_mem_mode, mem_ranges = [AddrRange(options.mem_size)], @@ -222,12 +214,14 @@ fatal("SimPoint generation not supported with more than one CPUs") for i in xrange(np): - if options.smt: - system.cpu[i].workload = multiprocesses - elif len(multiprocesses) == 1: - system.cpu[i].workload = multiprocesses[0] + if i * numThreads < len(multiprocesses): + start = i * numThreads + system.cpu[i].workload = multiprocesses[start:start + numThreads] else: - system.cpu[i].workload = multiprocesses[i] + # Even though the number of execution contexts exceeds the number + # of processes a workload still has to be assigned to a cpu + # model. For this reason we will assign the last specified process + system.cpu[i].workload = multiprocesses[-1] if options.fastmem: system.cpu[i].fastmem = True diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -214,11 +214,14 @@ checker = NULL; } - if (!FullSystem) { - thread.resize(numThreads); - tids.resize(numThreads); + if (FullSystem && numThreads != 1) { + warn("O3 does not support SMT in FS mode; resetting numThreads to 1\n"); + numThreads = 1; } + thread.resize(numThreads); + tids.resize(numThreads); + // The stages also need their CPU pointer setup. However this // must be done at the upper level CPU because they have pointers // to the upper level CPU, and not this FullO3CPU. @@ -253,17 +256,12 @@ rename.setIEWStage(&iew); rename.setCommitStage(&commit); - ThreadID active_threads; - if (FullSystem) { - active_threads = 1; - } else { - active_threads = params->workload.size(); + ThreadID active_threads = numThreads; - if (active_threads > Impl::MaxThreads) { - panic("Workload Size too large. Increase the 'MaxThreads' " - "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " - "or edit your workload size."); - } + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads' " + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) " + "or edit your workload size."); } //Make Sure That this a Valid Architeture @@ -351,13 +349,12 @@ //usedTids[tid] = true; //threadMap[tid] = tid; } else { - //Allocate Empty thread so M5 can use later - //when scheduling threads to CPU - Process* dummy_proc = NULL; - + // Allocate Empty thread so it can be used later + // when scheduling threads to CPU, with the last + // specified workload assigned as a process this->thread[tid] = new typename FullO3CPU::Thread( (typename Impl::O3CPU *)(this), - tid, dummy_proc); + tid, params->workload.back()); //usedTids[tid] = false; } } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -568,8 +568,8 @@ // Make sure the instruction is valid assert(new_inst); - DPRINTF(IQ, "Adding instruction [sn:%lli] PC %s to the IQ.\n", - new_inst->seqNum, new_inst->pcState()); + DPRINTF(IQ, "Adding instruction [sn:%i] PC %s thread %d to the IQ.\n", + new_inst->seqNum, new_inst->pcState(), new_inst->threadNumber); assert(freeEntries != 0);