diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -176,7 +176,6 @@ if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) { - return true; } @@ -386,7 +385,7 @@ } bool locMemBusRdy = false; bool locMemIssueRdy = false; - if (type == I_SHARED) { + if (type == I_SHARED || type == I_FLAT) { for (int j=0; j < computeUnit->numLocMemUnits; ++j) { if (computeUnit->vrfToLocalMemPipeBus[j].prerdy()) locMemBusRdy = true; @@ -635,7 +634,6 @@ DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); - return 1; } # Node ID 5b712931394b0f5598fa47ba9894ae0a5c671268 # Parent 1ea74d06b65b494a652221072fe75503dca05803 diff --git a/src/gpu-compute/code_enums.hh b/src/gpu-compute/code_enums.hh --- a/src/gpu-compute/code_enums.hh +++ b/src/gpu-compute/code_enums.hh @@ -84,6 +84,7 @@ ||(a)==Enums::OT_PRIVATE_ATOMIC \ ||(a)==Enums::OT_SPILL_ATOMIC \ ||(a)==Enums::OT_READONLY_ATOMIC \ + ||(a)==Enums::OT_BOTH_MEMFENCE \ ||(a)==Enums::OT_FLAT_ATOMIC) #define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \ @@ -93,8 +94,7 @@ ||(a)==Enums::OT_BOTH_MEMFENCE) #define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \ - ||(a)==Enums::OT_SHARED_MEMFENCE \ - ||(a)==Enums::OT_BOTH_MEMFENCE) + ||(a)==Enums::OT_SHARED_MEMFENCE) #define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC) diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -599,8 +599,8 @@ ComputeUnit::init() { // Initialize CU Bus models - glbMemToVrfBus.init(&shader->tick_cnt, 1); - locMemToVrfBus.init(&shader->tick_cnt, 1); + glbMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1)); + locMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1)); nextGlbMemBus = 0; nextLocMemBus = 0; fatal_if(numGlbMemUnits > 1, @@ -608,7 +608,7 @@ vrfToGlobalMemPipeBus.resize(numGlbMemUnits); for (int j = 0; j < numGlbMemUnits; ++j) { vrfToGlobalMemPipeBus[j] = WaitClass(); - vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, 1); + vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1)); } fatal_if(numLocMemUnits > 1, @@ -616,7 +616,7 @@ vrfToLocalMemPipeBus.resize(numLocMemUnits); for (int j = 0; j < numLocMemUnits; ++j) { vrfToLocalMemPipeBus[j] = WaitClass(); - vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, 1); + vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1)); } vectorRegsReserved.resize(numSIMDs, 0); scalarRegsReserved.resize(numSIMDs, 0); @@ -625,12 +625,12 @@ for (int i = 0; i < numSIMDs + numLocMemUnits + numGlbMemUnits; ++i) { wfWait[i] = WaitClass(); - wfWait[i].init(&shader->tick_cnt, 1); + wfWait[i].init(&shader->tick_cnt, shader->ticks(1)); } for (int i = 0; i < numSIMDs; ++i) { aluPipe[i] = WaitClass(); - aluPipe[i].init(&shader->tick_cnt, 1); + aluPipe[i].init(&shader->tick_cnt, shader->ticks(1)); } // Setup space for call args