diff --git a/configs/common/HMC.py b/configs/common/HMC.py --- a/configs/common/HMC.py +++ b/configs/common/HMC.py @@ -37,6 +37,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Erfan Azarkhish +# Abdul Mutaal Ahmad # A Simplified model of a complete HMC device. Based on: # [1] http://www.hybridmemorycube.org/specification-download/ @@ -48,6 +49,10 @@ # (G. Kim et. al) # [5] Near Data Processing, Are we there yet? (M. Gokhale) # http://www.cs.utah.edu/wondp/gokhale.pdf +# [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller +# (J. Schmidt) +# [7] Hybrid Memory Cube performance characterization on data-centric +# workloads (M. Gokhale) # # This script builds a complete HMC device composed of vault controllers, # serial links, the main internal crossbar, and an external hmc controller. @@ -60,30 +65,68 @@ # This component is simply an instance of the NoncoherentXBar class, and its # parameters are tuned to [2]. # -# - SERIAL LINKS: +# - SERIAL LINKS CONTROLLER: # SerialLink is a simple variation of the Bridge class, with the ability to -# account for the latency of packet serialization. We assume that the -# serializer component at the transmitter side does not need to receive the -# whole packet to start the serialization. But the deserializer waits for -# the complete packet to check its integrity first. +# account for the latency of packet serialization and controller latency. We +# assume that the serializer component at the transmitter side does not need +# to receive the whole packet to start the serialization. But the +# deserializer waits for the complete packet to check its integrity first. +# # * Bandwidth of the serial links is not modeled in the SerialLink component -# itself. Instead bandwidth/port of the HMCController has been adjusted to -# reflect the bandwidth delivered by 1 serial link. +# itself. # -# - HMC CONTROLLER: -# Contains a large buffer (modeled with Bridge) to hide the access latency -# of the memory cube. Plus it simply forwards the packets to the serial -# links in a round-robin fashion to balance load among them. +# * Latency of serial link controller is composed of SerDes latency + link +# controller +# # * It is inferred from the standard [1] and the literature [3] that serial # links share the same address range and packets can travel over any of # them so a load distribution mechanism is required among them. +# +# ----------------------------------------- +# | Host/HMC Controller | +# | ---------------------- | +# | | Link Aggregator | opt | +# | ---------------------- | +# | ---------------------- | +# | | Serial Link + Ser | * 4 | +# | ---------------------- | +# |--------------------------------------- +# ----------------------------------------- +# | Device +# | ---------------------- | +# | | Xbar | * 4 | +# | ---------------------- | +# | ---------------------- | +# | | Vault Controller | * 16 | +# | ---------------------- | +# | ---------------------- | +# | | Memory | | +# | ---------------------- | +# |---------------------------------------| +# +# In this version we have presented 3 different HMC archiecture along with +# alongwith their corresponding test script. +# +# ARCH-0: It has 4 crossbars in HMC memory. All the crossbars are connected +# to each other, providing complete memory range. This archicture also cover +# the added latency for sending a request to non-local vault(bridge in b/t +# crossbars). All the 4 serial links can accesscomplete memory. So each +# link can be connected to separate processor. +# +# ARCH-1: It has 4 crossbars inside the HMC. Crossbars are not connected. +# Through each crossbar only local vaults can be accessed. But to support +# this architecture we need a crossbar between serial links and processor +# +# ARCH-2: This is a hybrid architecture. It has 4 crossbars inside the HMC. +# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a +# request can be forwarded to any other vault. import optparse import m5 from m5.objects import * -# A single Hybrid Memory Cube (HMC) +# A modified single Hybrid Memory Cube (HMC) class HMCSystem(SubSystem): #*****************************CROSSBAR PARAMETERS************************* # Flit size of the main interconnect [1] @@ -107,131 +150,305 @@ # FIFOs at the input and output of the inteconnect) xbar_response_latency = Param.Cycles(2, "Response latency of the XBar") - #*****************************SERIAL LINK PARAMETERS********************** - # Number of serial links [1] - num_serial_links = Param.Unsigned(4, "Number of serial links") + # number of cross which connects 16 Vaults to serial link[7] + number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC" + ) + + #*****************************SERIAL LINK PARAMETERS*********************** + # Number of serial links controllers [1] + num_links_controllers = Param.Unsigned(4, "Number of serial links") # Number of packets (not flits) to store at the request side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_req = Param.Unsigned(16, "Number of packets to buffer " + link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer " "at the request side of the serial link") # Number of packets (not flits) to store at the response side of the serial # link. This number should be adjusted to achive required bandwidth - link_buffer_size_rsp = Param.Unsigned(16, "Number of packets to buffer " + link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer " "at the response side of the serial link") # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus # the PCB trace latency (3ns Estimated based on [5]) link_latency = Param.Latency('4.6ns', "Latency of the serial links") - # Header overhead of the serial links: Header size is 128bits in HMC [1], - # and we have 16 lanes, so the overhead is 8 cycles - link_overhead = Param.Cycles(8, "The number of cycles required to" - " transmit the packet header over the serial link") - - # Clock frequency of the serial links [1] + # Clock frequency of the each serial link(SerDes) [1] link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial" "links") + # Clock frequency of serial link Controller[6] + # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] / + # data_path_width * 10^6 + # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz + link_controller_frequency = Param.Frequency('625MHz', + "Clock Frequency of the link controller") + + # Latency of the serial link controller to process the packets[1][6] + # (ClockDomain = 625 Mhz ) + # used here for calculations only + link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the" + "controller to process the packet") + + # total_ctrl_latency = link_ctrl_latency + link_latency + # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns + total_ctrl_latency = Param.Latency('11ns', "The latency experienced by" + "every packet regardless of size of packet") + # Number of parallel lanes in each serial link [1] - num_lanes_per_link = Param.Unsigned(16, "Number of lanes per each link") + num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link") # Number of serial links [1] num_serial_links = Param.Unsigned(4, "Number of serial links") - #*****************************HMC CONTROLLER PARAMETERS******************* - # Number of packets (not flits) to store at the HMC controller. This - # number should be high enough to be able to hide the high latency of HMC - ctrl_buffer_size_req = Param.Unsigned(256, "Number of packets to buffer " - "at the HMC controller (request side)") + # speed of each lane of serial link - SerDes serial interface 10 Gb/s + serial_link_speed = Param.UInt64(10, "speed of each lane of" + "serial link") - # Number of packets (not flits) to store at the response side of the HMC - # controller. - ctrl_buffer_size_rsp = Param.Unsigned(256, "Number of packets to buffer " - "at the HMC controller (response side)") + # width of each lane of serial link - SerDes parallel interface + width_lane = Param.Unsigned(16, "Width of each lane of serial link") - # Latency of the HMC controller to process the packets - # (ClockDomain = Host clock domain) - ctrl_latency = Param.Cycles(4, "The number of cycles required for the " - " controller to process the packet") - - # Wiring latency from the SoC crossbar to the HMC controller - ctrl_static_latency = Param.Latency('500ps', "Static latency of the HMC" - "controller") - - #*****************************PERFORMANCE MONITORING********************** + #*****************************PERFORMANCE MONITORING************************ # The main monitor behind the HMC Controller - enable_global_monitor = Param.Bool(True, "The main monitor behind the " + enable_global_monitor = Param.Bool(False, "The main monitor behind the " "HMC Controller") # The link performance monitors - enable_link_monitor = Param.Bool(True, "The link monitors") + enable_link_monitor = Param.Bool(False, "The link monitors" ) -# Create an HMC device and attach it to the current system -def config_hmc(options, system): + # link aggregator enable - put a cross between buffers & links + enable_link_aggr = Param.Bool(False, "The crossbar between port and " + "Link Controller") - system.hmc = HMCSystem() + enable_buff_div = Param.Bool(True, "Memory Range of Buffer is" + "divided between total range") - system.buffer = Bridge(ranges=system.mem_ranges, - req_size=system.hmc.ctrl_buffer_size_req, - resp_size=system.hmc.ctrl_buffer_size_rsp, - delay=system.hmc.ctrl_static_latency) +# configure host system with Serial Links +def config_host_hmc(options, system): + + system.hmc_host=HMCSystem() + try: - system.hmc.enable_global_monitor = options.enable_global_monitor + system.hmc_host.enable_global_monitor = options.enable_global_monitor except: pass; try: - system.hmc.enable_link_monitor = options.enable_link_monitor + system.hmc_host.enable_link_monitor = options.enable_link_monitor except: pass; - system.membus.master = system.buffer.slave + # Serial link Controller with 16 SerDes links at 10 Gbps + if options.arch ==1 : + system.hmc_host.seriallink = [SerialLink(ranges= options.ser_ranges[i], + req_size=system.hmc_host.link_buffer_size_req, + resp_size=system.hmc_host.link_buffer_size_rsp, + num_lanes=system.hmc_host.num_lanes_per_link, + link_speed=system.hmc_host.serial_link_speed, + delay=system.hmc_host.total_ctrl_latency) + for i in xrange(system.hmc_host.num_serial_links)] + if options.arch == 2: + system.hmc_host.seriallink = [SerialLink(ranges= options.ser_ranges[i], + req_size=system.hmc_host.link_buffer_size_req, + resp_size=system.hmc_host.link_buffer_size_rsp, + num_lanes=system.hmc_host.num_lanes_per_link, + link_speed=system.hmc_host.serial_link_speed, + delay=system.hmc_host.total_ctrl_latency) + for i in xrange(system.hmc_host.num_serial_links)] + if options.arch == 0 : + system.hmc_host.seriallink =[ SerialLink(ranges = system.mem_ranges, + req_size=system.hmc_host.link_buffer_size_req, + resp_size=system.hmc_host.link_buffer_size_rsp, + num_lanes=system.hmc_host.num_lanes_per_link, + link_speed=system.hmc_host.serial_link_speed, + delay=system.hmc_host.total_ctrl_latency) + for i in xrange(system.hmc_host.num_serial_links)] - # The HMC controller (Clock domain is the same as the host) - system.hmccontroller = HMCController(width=(system.hmc.num_lanes_per_link. - value * system.hmc.num_serial_links/8), - frontend_latency=system.hmc.ctrl_latency, - forward_latency=system.hmc.link_overhead, - response_latency=system.hmc.link_overhead) + # enable global monitor + if system.hmc_host.enable_global_monitor: + system.hmc_host.lmonitor = [ CommMonitor() + for i in xrange(system.hmc_host.num_serial_links)] - system.hmccontroller.clk_domain = SrcClockDomain(clock=system.hmc. - link_frequency, voltage_domain = VoltageDomain(voltage = '1V')) + # set the clock frequency for serial link + for i in xrange(system.hmc_host.num_serial_links): + system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system. + hmc_host.link_controller_frequency, voltage_domain = + VoltageDomain(voltage = '1V')) - # Serial Links - system.hmc.seriallink =[ SerialLink(ranges = system.mem_ranges, - req_size=system.hmc.link_buffer_size_req, - resp_size=system.hmc.link_buffer_size_rsp, - num_lanes=system.hmc.num_lanes_per_link, - delay=system.hmc.link_latency) - for i in xrange(system.hmc.num_serial_links)] + # Connect membus/traffic gen to Serial Link Controller for differrent HMC + # architectures + if options.arch==1: + for i in xrange(system.hmc_host.num_links_controllers): + if system.hmc_host.enable_global_monitor: + system.membus.master = system.hmc_host.lmonitor[i].slave + system.hmc_host.lmonitor[i].master = system.hmc_host.seriallink[i].slave + else: + system.membus.master = system.hmc_host.seriallink[i].slave + if options.arch==2: + if system.hmc_host.enable_global_monitor: + system.membus.master = system.hmc_host.lmonitor[0].slave + system.hmc_host.lmonitor[0].master = system.hmc_host.seriallink[0].slave - if system.hmc.enable_link_monitor: - system.hmc.lmonitor = [ CommMonitor() - for i in xrange(system.hmc.num_serial_links)] + system.membus.master = system.hmc_host.lmonitor[1].slave + system.hmc_host.lmonitor[1].master = system.hmc_host.seriallink[1].slave - # The HMC Crossbar located in its logic-base (LoB) - system.hmc.xbar = NoncoherentXBar(width = system.hmc.xbar_width, - frontend_latency=system.hmc.xbar_frontend_latency, - forward_latency=system.hmc.xbar_forward_latency, - response_latency=system.hmc.xbar_response_latency ) - system.hmc.xbar.clk_domain = SrcClockDomain(clock = - system.hmc.xbar_frequency, voltage_domain = - VoltageDomain(voltage = '1V')) + system.tgen[2].port = system.hmc_host.lmonitor[2].slave + system.hmc_host.lmonitor[2].master = system.hmc_host.seriallink[2].slave - if system.hmc.enable_global_monitor: - system.gmonitor = CommMonitor() - system.buffer.master = system.gmonitor.slave - system.gmonitor.master = system.hmccontroller.slave - else: - system.hmccontroller.slave = system.buffer.master + system.tgen[3].port = system.hmc_host.lmonitor[3].slave + system.hmc_host.lmonitor[3].master = system.hmc_host.seriallink[3].slave + else: + system.membus.master = system.hmc_host.seriallink[0].slave + system.membus.master = system.hmc_host.seriallink[1].slave + system.tgen[2].port = system.hmc_host.seriallink[2].slave + system.tgen[3].port = system.hmc_host.seriallink[3].slave + if options.arch==0 : + for i in xrange(system.hmc_host.num_links_controllers): + if system.hmc_host.enable_global_monitor: + system.tgen[i].port = system.hmc_host.lmonitor[i].slave + system.hmc_host.lmonitor[i].master = system.hmc_host.seriallink[i].slave + else: + system.tgen[i].port = system.hmc_host.seriallink[i].slave - for i in xrange(system.hmc.num_serial_links): - system.hmccontroller.master = system.hmc.seriallink[i].slave - system.hmc.seriallink[i].clk_domain = system.hmccontroller.clk_domain; - if system.hmc.enable_link_monitor: - system.hmc.seriallink[i].master = system.hmc.lmonitor[i].slave - system.hmc.lmonitor[i].master = system.hmc.xbar.slave - else: - system.hmc.seriallink[i].master = system.hmc.xbar.slave + return system + +# Create an HMC device and attach it to the current system +def config_hmc(options, system, hmc_host): + + # Create HMC device + system.hmc_dev=HMCSystem() + + # Global monitor + try: + system.hmc_dev.enable_global_monitor = options.enable_global_monitor + except: + pass; + + try: + system.hmc_dev.enable_link_monitor = options.enable_link_monitor + except: + pass; + + + if system.hmc_dev.enable_link_monitor: + system.hmc_dev.lmonitor = [ CommMonitor() + for i in xrange(system.hmc_dev.num_links_controllers)] + + # 4 HMC Crossbars located in its logic-base (LoB) + system.hmc_dev.xbar = [ NoncoherentXBar(width = system.hmc_dev.xbar_width, + frontend_latency=system.hmc_dev.xbar_frontend_latency, + forward_latency=system.hmc_dev.xbar_forward_latency, + response_latency=system.hmc_dev.xbar_response_latency ) + for i in xrange(system.hmc_host.number_mem_crossbar)] + + for i in xrange(system.hmc_dev.number_mem_crossbar): + system.hmc_dev.xbar[i].clk_domain = SrcClockDomain( + clock=system.hmc_dev.xbar_frequency,voltage_domain = + VoltageDomain(voltage = '1V')) + + # Attach serial link to crossbar/s + if options.num_cross ==4 : + for i in xrange(system.hmc_dev.num_serial_links): + if system.hmc_dev.enable_link_monitor: + system.hmc_host.seriallink[i].master = system.hmc_dev.lmonitor[i].slave + system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave + else: + system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave + else : + for i in xrange(system.hmc_dev.num_serial_links): + system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[0].slave + + # necesarry to add system_port to one fo the xbar + if options.arch ==0 : + system.system_port = system.hmc_dev.xbar[3].slave + + # Connecting xbar with each other for request arriving at the wrong xbar, + # then it will be forward to correct xbar. Bridge is used to connect xbars + if options.arch == 0: + + system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12], + req_size=12, resp_size=12) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave + system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave + + system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8], + req_size=12, resp_size=12) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave + system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4], + req_size=12, resp_size=12) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave + system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16], + req_size=12, resp_size=12) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave + system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave + + system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8], + req_size=12, resp_size=12) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave + system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4], + req_size=12, resp_size=12) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave + system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer13 = Bridge(ranges=system.mem_ranges[12:16], + req_size=12, resp_size=12) + system.hmc_dev.xbar[1].master = system.hmc_dev.buffer13.slave + system.hmc_dev.buffer13.master = system.hmc_dev.xbar[3].slave + + system.hmc_dev.buffer12 = Bridge(ranges=system.mem_ranges[8:12], + req_size=12, resp_size=12) + system.hmc_dev.xbar[1].master = system.hmc_dev.buffer12.slave + system.hmc_dev.buffer12.master = system.hmc_dev.xbar[2].slave + + system.hmc_dev.buffer10 = Bridge(ranges=system.mem_ranges[0:4], + req_size=12, resp_size=12) + system.hmc_dev.xbar[1].master = system.hmc_dev.buffer10.slave + system.hmc_dev.buffer10.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer01 = Bridge(ranges=system.mem_ranges[4:8], + req_size=12, resp_size=12) + system.hmc_dev.xbar[0].master = system.hmc_dev.buffer01.slave + system.hmc_dev.buffer01.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer02 = Bridge(ranges=system.mem_ranges[8:12], + req_size=12, resp_size=12) + system.hmc_dev.xbar[0].master = system.hmc_dev.buffer02.slave + system.hmc_dev.buffer02.master = system.hmc_dev.xbar[2].slave + + system.hmc_dev.buffer03 = Bridge(ranges=system.mem_ranges[12:16], + req_size=12, resp_size=12) + system.hmc_dev.xbar[0].master = system.hmc_dev.buffer03.slave + system.hmc_dev.buffer03.master = system.hmc_dev.xbar[3].slave + + if options.arch == 2: + + system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave + system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave + system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12]) + system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave + system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave + + + system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave + system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave + + system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave + system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave + + system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16]) + system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave + system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave + diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -153,9 +153,10 @@ """ if ( options.mem_type == "HMC_2500_x32"): - HMC.config_hmc(options, system) - subsystem = system.hmc - xbar = system.hmc.xbar + HMChost = HMC.config_host_hmc(options, system) + HMC.config_hmc(options, system, HMChost.hmc_host) + subsystem = system.hmc_dev + xbar = system.hmc_dev.xbar else: subsystem = system xbar = system.membus @@ -222,4 +223,7 @@ # Connect the controllers to the membus for i in xrange(len(subsystem.mem_ctrls)): - subsystem.mem_ctrls[i].port = xbar.master + if (options.mem_type == "HMC_2500_x32"): + subsystem.mem_ctrls[i].port = xbar[i/4].master + else: + subsystem.mem_ctrls[i].port = xbar.master diff --git a/src/mem/SerialLink.py b/src/mem/SerialLink.py --- a/src/mem/SerialLink.py +++ b/src/mem/SerialLink.py @@ -61,3 +61,5 @@ # link belongs to and the number of lanes: num_lanes = Param.Unsigned(1, "Number of parallel lanes inside the serial" "link. (aka. lane width)") + link_speed = Param.UInt64(1, "Speed of each parallel lane inside the" + "serial link. (aka. lane speed)") diff --git a/src/mem/serial_link.hh b/src/mem/serial_link.hh --- a/src/mem/serial_link.hh +++ b/src/mem/serial_link.hh @@ -312,6 +312,9 @@ /** Number of parallel lanes in this serial link */ unsigned num_lanes; + /** Speed of each link in this serial link */ + uint64_t link_speed; + public: virtual BaseMasterPort& getMasterPort(const std::string& if_name, diff --git a/src/mem/serial_link.cc b/src/mem/serial_link.cc --- a/src/mem/serial_link.cc +++ b/src/mem/serial_link.cc @@ -87,7 +87,9 @@ ticksToCycles(p->delay), p->resp_size, p->ranges), masterPort(p->name + ".master", *this, slavePort, ticksToCycles(p->delay), p->req_size), - num_lanes(p->num_lanes) + num_lanes(p->num_lanes), + link_speed(p->link_speed) + { } @@ -153,8 +155,9 @@ // have to wait to receive the whole packet. So we only account for the // deserialization latency. Cycles cycles = delay; - cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes)); - Tick t = serial_link.clockEdge(cycles); + cycles += Cycles(divCeil(pkt->getSize() * 8, serial_link.num_lanes + * serial_link.link_speed)); + Tick t = serial_link.clockEdge(cycles); //@todo: If the processor sends two uncached requests towards HMC and the // second one is smaller than the first one. It may happen that the second @@ -211,11 +214,16 @@ // standard). But the deserializer waits for the complete packet // to check its integrity first. So everytime a packet crosses a // serial link, we should account for its deserialization latency - // only. + // only. Size of packet is increased by 16 Bytes to account header + // + tail overhead Cycles cycles = delay; + DPRINTF(SerialLink, "size of this packet %d\n", pkt->getSize()); + DPRINTF(SerialLink, "Initial delay for each packet: %d\n", delay); cycles += Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); + DPRINTF(SerialLink, "Delay after serializations: %d\n", cycles); Tick t = serial_link.clockEdge(cycles); + DPRINTF(SerialLink, "Clock Tick now: %d\n", t); //@todo: If the processor sends two uncached requests towards HMC // and the second one is smaller than the first one. It may happen @@ -301,7 +309,7 @@ // Make sure bandwidth limitation is met Cycles cycles = Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); Tick t = serial_link.clockEdge(cycles); serial_link.schedule(sendEvent, std::max(next_req.tick, t)); } @@ -346,7 +354,7 @@ // Make sure bandwidth limitation is met Cycles cycles = Cycles(divCeil(pkt->getSize() * 8, - serial_link.num_lanes)); + serial_link.num_lanes * serial_link.link_speed)); Tick t = serial_link.clockEdge(cycles); serial_link.schedule(sendEvent, std::max(next_resp.tick, t)); }