diff -r 6a1f2d99bf79 -r 857838bcc36e configs/common/CacheConfig.py --- a/configs/common/CacheConfig.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/common/CacheConfig.py Thu Feb 19 08:02:47 2015 +0000 @@ -65,14 +65,12 @@ if options.l2cache: # Provide a clock for the L2 and the L1-to-L2 bus here as they # are not connected using addTwoLevelCacheHierarchy. Use the - # same clock as the CPUs, and set the L1-to-L2 bus width to 32 - # bytes (256 bits). + # same clock as the CPUs. system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain, size=options.l2_size, assoc=options.l2_assoc) - system.tol2bus = CoherentXBar(clk_domain = system.cpu_clk_domain, - width = 32) + system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain) system.l2.cpu_side = system.tol2bus.master system.l2.mem_side = system.membus.slave diff -r 6a1f2d99bf79 -r 857838bcc36e configs/common/FSConfig.py --- a/configs/common/FSConfig.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/common/FSConfig.py Thu Feb 19 08:02:47 2015 +0000 @@ -50,7 +50,7 @@ def childImage(self, ci): self.image.child.image_file = ci -class MemBus(CoherentXBar): +class MemBus(SystemXBar): badaddr_responder = BadAddr() default = Self.badaddr_responder.pio @@ -78,7 +78,7 @@ self.tsunami = BaseTsunami() # Create the io bus to connect all device ports - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.tsunami.attachIO(self.iobus) self.tsunami.ide.pio = self.iobus.master @@ -143,7 +143,7 @@ # generic system mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.bridge = Bridge(delay='50ns') self.t1000 = T1000() @@ -205,7 +205,7 @@ mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.membus.badaddr_responder.warn_access = "warn" self.bridge = Bridge(delay='50ns') @@ -311,7 +311,7 @@ # generic system mdesc = SysConfig() self.readfile = mdesc.script() - self.iobus = NoncoherentXBar() + self.iobus = IOXBar() self.membus = MemBus() self.bridge = Bridge(delay='50ns') self.mem_ranges = [AddrRange('1GB')] @@ -358,7 +358,7 @@ x86_sys.membus = MemBus() # North Bridge - x86_sys.iobus = NoncoherentXBar() + x86_sys.iobus = IOXBar() x86_sys.bridge = Bridge(delay='50ns') x86_sys.bridge.master = x86_sys.iobus.slave x86_sys.bridge.slave = x86_sys.membus.master @@ -394,7 +394,7 @@ def connectX86RubySystem(x86_sys): # North Bridge - x86_sys.iobus = NoncoherentXBar() + x86_sys.iobus = IOXBar() # add the ide to the list of dma devices that later need to attach to # dma controllers diff -r 6a1f2d99bf79 -r 857838bcc36e configs/dram/sweep.py --- a/configs/dram/sweep.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/dram/sweep.py Thu Feb 19 08:02:47 2015 +0000 @@ -84,7 +84,7 @@ # start with the system itself, using a multi-layer 1.5 GHz # crossbar, delivering 64 bytes / 5 cycles (one header cycle) # which amounts to 19.2 GByte/s per layer and thus per port -system = System(membus = NoncoherentXBar(width = 16)) +system = System(membus = IOXBar(width = 16)) system.clk_domain = SrcClockDomain(clock = '1.5GHz', voltage_domain = VoltageDomain(voltage = '1V')) diff -r 6a1f2d99bf79 -r 857838bcc36e configs/example/memcheck.py --- a/configs/example/memcheck.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/example/memcheck.py Thu Feb 19 08:02:47 2015 +0000 @@ -243,7 +243,7 @@ if level != 0: # Create a crossbar and add it to the subsystem, note that # we do this even with a single element on this level - xbar = CoherentXBar(width = 32) + xbar = L2XBar(width = 32) subsys.xbar = xbar if next_cache: xbar.master = next_cache.cpu_side @@ -269,7 +269,7 @@ if ntesters > 1: # Create a crossbar and add it to the subsystem - xbar = CoherentXBar(width = 32) + xbar = L2XBar(width = 32) subsys.xbar = xbar xbar.master = next_cache.cpu_side for tester, checker in zip(testers, checkers): diff -r 6a1f2d99bf79 -r 857838bcc36e configs/example/memtest.py --- a/configs/example/memtest.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/example/memtest.py Thu Feb 19 08:02:47 2015 +0000 @@ -233,7 +233,7 @@ if level != 0: # Create a crossbar and add it to the subsystem, note that # we do this even with a single element on this level - xbar = CoherentXBar(width = 32) + xbar = L2XBar() subsys.xbar = xbar if next_cache: xbar.master = next_cache.cpu_side @@ -258,7 +258,7 @@ if ntesters > 1: # Create a crossbar and add it to the subsystem - xbar = CoherentXBar(width = 32) + xbar = L2XBar() subsys.xbar = xbar xbar.master = next_cache.cpu_side for tester in testers: diff -r 6a1f2d99bf79 -r 857838bcc36e configs/example/ruby_mem_test.py --- a/configs/example/ruby_mem_test.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/example/ruby_mem_test.py Thu Feb 19 08:02:47 2015 +0000 @@ -106,7 +106,7 @@ system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False), - funcbus = NoncoherentXBar(), + funcbus = IOXBar(), clk_domain = SrcClockDomain(clock = options.sys_clock), mem_ranges = [AddrRange(options.mem_size)]) diff -r 6a1f2d99bf79 -r 857838bcc36e configs/example/se.py --- a/configs/example/se.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/example/se.py Thu Feb 19 08:02:47 2015 +0000 @@ -265,7 +265,7 @@ system.cpu[i].dtb.walker.port = ruby_port.slave else: MemClass = Simulation.setMemClass(options) - system.membus = CoherentXBar() + system.membus = SystemXBar() system.system_port = system.membus.slave CacheConfig.config_cache(options, system) MemConfig.config_mem(options, system) diff -r 6a1f2d99bf79 -r 857838bcc36e configs/ruby/Ruby.py --- a/configs/ruby/Ruby.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/ruby/Ruby.py Thu Feb 19 08:02:47 2015 +0000 @@ -116,7 +116,7 @@ crossbar = None if len(system.mem_ranges) > 1: - crossbar = NoncoherentXBar() + crossbar = IOXBar() crossbars.append(crossbar) dir_cntrl.memory = crossbar.slave diff -r 6a1f2d99bf79 -r 857838bcc36e configs/splash2/cluster.py --- a/configs/splash2/cluster.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/splash2/cluster.py Thu Feb 19 08:02:47 2015 +0000 @@ -171,7 +171,7 @@ for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -184,7 +184,7 @@ for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -197,7 +197,7 @@ for j in xrange(options.numclusters): clusters[j].id = j for cluster in clusters: - cluster.clusterbus = CoherentXBar(clock=busFrequency) + cluster.clusterbus = L2XBar(clock=busFrequency) all_l1buses += [cluster.clusterbus] cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id, clock=options.frequency) @@ -211,10 +211,10 @@ # ---------------------- system = System(cpu = all_cpus, l1_ = all_l1s, l1bus_ = all_l1buses, physmem = SimpleMemory(), - membus = CoherentXBar(clock = busFrequency)) + membus = SystemXBar(clock = busFrequency)) system.clock = '1GHz' -system.toL2bus = CoherentXBar(clock = busFrequency) +system.toL2bus = L2XBar(clock = busFrequency) system.l2 = L2(size = options.l2size, assoc = 8) # ---------------------- diff -r 6a1f2d99bf79 -r 857838bcc36e configs/splash2/run.py --- a/configs/splash2/run.py Thu Feb 19 08:02:37 2015 +0000 +++ b/configs/splash2/run.py Thu Feb 19 08:02:47 2015 +0000 @@ -196,10 +196,10 @@ # Create a system, and add system wide objects # ---------------------- system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(clock = busFrequency)) + membus = SystemXBar(clock = busFrequency)) system.clock = '1GHz' -system.toL2bus = CoherentXBar(clock = busFrequency) +system.toL2bus = L2XBar(clock = busFrequency) system.l2 = L2(size = options.l2size, assoc = 8) # ---------------------- diff -r 6a1f2d99bf79 -r 857838bcc36e src/cpu/BaseCPU.py --- a/src/cpu/BaseCPU.py Thu Feb 19 08:02:37 2015 +0000 +++ b/src/cpu/BaseCPU.py Thu Feb 19 08:02:47 2015 +0000 @@ -47,7 +47,7 @@ from m5.params import * from m5.proxy import * -from XBar import CoherentXBar +from XBar import L2XBar from InstTracer import InstTracer from CPUTracers import ExeTracer from MemObject import MemObject @@ -285,10 +285,7 @@ def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None): self.addPrivateSplitL1Caches(ic, dc, iwc, dwc) - # Set a width of 32 bytes (256-bits), which is four times that - # of the default bus. The clock of the CPU is inherited by - # default. - self.toL2Bus = CoherentXBar(width = 32) + self.toL2Bus = L2XBar() self.connectCachedPorts(self.toL2Bus) self.l2cache = l2c self.toL2Bus.master = self.l2cache.cpu_side diff -r 6a1f2d99bf79 -r 857838bcc36e src/mem/XBar.py --- a/src/mem/XBar.py Thu Feb 19 08:02:37 2015 +0000 +++ b/src/mem/XBar.py Thu Feb 19 08:02:47 2015 +0000 @@ -66,12 +66,12 @@ # is the latency involved once a decision is made to forward the # request. The response latency, is similar to the forward # latency, but for responses rather than requests. - frontend_latency = Param.Cycles(3, "Frontend latency") - forward_latency = Param.Cycles(4, "Forward latency") - response_latency = Param.Cycles(2, "Response latency") + frontend_latency = Param.Cycles("Frontend latency") + forward_latency = Param.Cycles("Forward latency") + response_latency = Param.Cycles("Response latency") # Width governing the throughput of the crossbar - width = Param.Unsigned(8, "Datapath width per port (bytes)") + width = Param.Unsigned("Datapath width per port (bytes)") # The default port can be left unconnected, or be used to connect # a default slave port @@ -95,7 +95,7 @@ # The coherent crossbar additionally has snoop responses that are # forwarded after a specific latency. - snoop_response_latency = Param.Cycles(4, "Snoop response latency") + snoop_response_latency = Param.Cycles("Snoop response latency") # An optional snoop filter snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter") @@ -111,3 +111,44 @@ lookup_latency = Param.Cycles(1, "Lookup latency") system = Param.System(Parent.any, "System that the crossbar belongs to.") + +# We use a coherent crossbar to connect multiple masters to the L2 +# caches. Normally this crossbar would be part of the cache itself. +class L2XBar(CoherentXBar): + # 256-bit crossbar by default + width = 32 + + # Assume that most of this is covered by the cache latencies, with + # no more than a single pipeline stage for any packet. + frontend_latency = 1 + forward_latency = 0 + response_latency = 1 + snoop_response_latency = 1 + +# One of the key coherent crossbar instances is the system +# interconnect, tying together the CPU clusters, GPUs, and any I/O +# coherent masters, and DRAM controllers. +class SystemXBar(CoherentXBar): + # 128-bit crossbar by default + width = 16 + + # A handful pipeline stages for each portion of the latency + # contributions. + frontend_latency = 3 + forward_latency = 4 + response_latency = 2 + snoop_response_latency = 4 + +# In addition to the system interconnect, we typically also have one +# or more on-chip I/O crossbars. Note that at some point we might want +# to also define an off-chip I/O crossbar such as PCIe. +class IOXBar(NoncoherentXBar): + # 128-bit crossbar by default + width = 16 + + # Assume a simpler datapath than a coherent crossbar, incuring + # less pipeline stages for decision making and forwarding of + # requests. + frontend_latency = 2 + forward_latency = 1 + response_latency = 2 diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/base_config.py --- a/tests/configs/base_config.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/base_config.py Thu Feb 19 08:02:47 2015 +0000 @@ -104,7 +104,7 @@ Returns: A bus that CPUs should use to connect to the shared cache. """ - system.toL2Bus = CoherentXBar(clk_domain=system.cpu_clk_domain) + system.toL2Bus = L2XBar(clk_domain=system.cpu_clk_domain) system.l2c = L2Cache(clk_domain=system.cpu_clk_domain, size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master @@ -186,7 +186,7 @@ def create_system(self): system = System(physmem = self.mem_class(), - membus = CoherentXBar(), + membus = SystemXBar(), mem_mode = self.mem_mode) system.system_port = system.membus.slave system.physmem.port = system.membus.master diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/memtest-filter.py --- a/tests/configs/memtest-filter.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/memtest-filter.py Thu Feb 19 08:02:47 2015 +0000 @@ -38,7 +38,7 @@ # system simulated system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(width=16, snoop_filter = SnoopFilter())) + membus = SystemXBar(width=16, snoop_filter = SnoopFilter())) # Dummy voltage domain for all our clock domains system.voltage_domain = VoltageDomain() system.clk_domain = SrcClockDomain(clock = '1GHz', @@ -49,8 +49,8 @@ system.cpu_clk_domain = SrcClockDomain(clock = '2GHz', voltage_domain = system.voltage_domain) -system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16, - snoop_filter = SnoopFilter()) +system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain, + snoop_filter = SnoopFilter()) system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/memtest.py --- a/tests/configs/memtest.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/memtest.py Thu Feb 19 08:02:47 2015 +0000 @@ -38,7 +38,7 @@ # system simulated system = System(cpu = cpus, physmem = SimpleMemory(), - membus = CoherentXBar(width=16)) + membus = SystemXBar()) # Dummy voltage domain for all our clock domains system.voltage_domain = VoltageDomain() system.clk_domain = SrcClockDomain(clock = '1GHz', @@ -49,7 +49,7 @@ system.cpu_clk_domain = SrcClockDomain(clock = '2GHz', voltage_domain = system.voltage_domain) -system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16) +system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain) system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.master diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/o3-timing-mp-ruby.py --- a/tests/configs/o3-timing-mp-ruby.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/o3-timing-mp-ruby.py Thu Feb 19 08:02:47 2015 +0000 @@ -38,7 +38,7 @@ ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores) # system simulated -system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(), +system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(), mem_mode = "timing", clk_domain = SrcClockDomain(clock = '1GHz')) diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/o3-timing-ruby.py --- a/tests/configs/o3-timing-ruby.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/o3-timing-ruby.py Thu Feb 19 08:02:47 2015 +0000 @@ -39,7 +39,7 @@ system = System(cpu = cpu, physmem = ruby_memory, - membus = CoherentXBar(), + membus = SystemXBar(), mem_mode = "timing", clk_domain = SrcClockDomain(clock = '1GHz')) diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/simple-atomic-mp-ruby.py --- a/tests/configs/simple-atomic-mp-ruby.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/simple-atomic-mp-ruby.py Thu Feb 19 08:02:47 2015 +0000 @@ -38,7 +38,7 @@ ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores) # system simulated -system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(), +system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(), clk_domain = SrcClockDomain(clock = '1GHz')) # Create a seperate clock domain for components that should run at diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/tgen-dram-ctrl.py --- a/tests/configs/tgen-dram-ctrl.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/tgen-dram-ctrl.py Thu Feb 19 08:02:47 2015 +0000 @@ -49,7 +49,7 @@ # system simulated system = System(cpu = cpu, physmem = DDR3_1600_x64(), - membus = NoncoherentXBar(width = 16), + membus = IOXBar(width = 16), clk_domain = SrcClockDomain(clock = '1GHz', voltage_domain = VoltageDomain())) diff -r 6a1f2d99bf79 -r 857838bcc36e tests/configs/tgen-simple-mem.py --- a/tests/configs/tgen-simple-mem.py Thu Feb 19 08:02:37 2015 +0000 +++ b/tests/configs/tgen-simple-mem.py Thu Feb 19 08:02:47 2015 +0000 @@ -49,7 +49,7 @@ # system simulated system = System(cpu = cpu, physmem = SimpleMemory(), - membus = NoncoherentXBar(width = 16), + membus = IOXBar(width = 16), clk_domain = SrcClockDomain(clock = '1GHz', voltage_domain = VoltageDomain()))