diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -44,6 +44,7 @@
 
 #include <csignal>
 #include <memory>
+#include <stack>
 
 #include "base/statistics.hh"
 #include "cpu/kvm/perfevent.hh"
@@ -94,8 +95,6 @@
     void switchOut() override;
     void takeOverFrom(BaseCPU *cpu) override;
 
-    void verifyMemoryMode() const override;
-
     MasterPort &getDataPort() override { return dataPort; }
     MasterPort &getInstPort() override { return instPort; }
 
@@ -110,6 +109,12 @@
     Counter totalInsts() const override;
     Counter totalOps() const override;
 
+    /**
+     * Callback from KvmCPUPort to transition the CPU out of RunningMMIOPending
+     * when all timing requests have completed.
+     */
+    void finishMMIOPending();
+
     /** Dump the internal state to the terminal. */
     virtual void dump() const;
 
@@ -152,6 +157,7 @@
      *     Running;
      *     RunningService;
      *     RunningServiceCompletion;
+     *     RunningMMIOPending;
      *
      *     Idle -> Idle;
      *     Idle -> Running [label="activateContext()", URL="\ref activateContext"];
@@ -160,7 +166,9 @@
      *     Running -> Idle [label="suspendContext()", URL="\ref suspendContext"];
      *     Running -> Idle [label="drain()", URL="\ref drain"];
      *     Idle -> Running [label="drainResume()", URL="\ref drainResume"];
-     *     RunningService -> RunningServiceCompletion [label="handleKvmExit()", URL="\ref handleKvmExit"];
+     *     RunningService -> RunningServiceCompletion [label="doMMIOAccess()", URL="\ref doMMIOAccess"];
+     *     RunningService -> RunningMMIOPending [label="doMMIOAccess()", URL="\ref doMMIOAccess"];
+     *     RunningMMIOPending -> RunningServiceCompletion [label="finishMMIOPending()", URL="\ref finishMMIOPending"];
      *     RunningServiceCompletion -> Running [label="tick()", URL="\ref tick"];
      *     RunningServiceCompletion -> RunningService [label="tick()", URL="\ref tick"];
      *   }
@@ -187,15 +195,25 @@
          *
          * The virtual machine has exited and requires service, tick()
          * will call handleKvmExit() on the next cycle. The next state
-         * after running service is determined in handleKvmExit() and
-         * depends on what kind of service the guest requested:
+         * after running service is determined in handleKvmExit() or more
+         * specific handlers and depends on what kind of service the guest
+         * requested:
          * <ul>
-         *   <li>IO/MMIO: RunningServiceCompletion
+         *   <li>IO/MMIO (Atomic): RunningServiceCompletion
+         *   <li>IO/MMIO (Timing): RunningMMIOPending
          *   <li>Halt: Idle
          *   <li>Others: Running
          * </ul>
          */
         RunningService,
+        /** Timing MMIO request in flight or stalled.
+         *
+         *  The VM has requested IO/MMIO and we are in timing mode.  A timing
+         *  request is either stalled (and will be retried with recvReqRetry())
+         *  or it is in flight.  After the timing request is complete, the CPU
+         *  will transition to the RunningServiceCompletion state.
+         */
+        RunningMMIOPending,
         /** Service completion in progress.
          *
          * The VM has requested service that requires KVM to be
@@ -543,28 +561,39 @@
 
 
     /**
-     * KVM memory port. Uses the default MasterPort behavior, but
-     * panics on timing accesses.
+     * KVM memory port.  Uses default MasterPort behavior and provides an
+     * interface for KVM to transparently submit atomic or timing requests.
      */
     class KVMCpuPort : public MasterPort
     {
 
       public:
         KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu)
-            : MasterPort(_name, _cpu)
+            : MasterPort(_name, _cpu), cpu(_cpu), activeMMIOReqs(0)
         { }
+        /**
+         * Interface to send Atomic or Timing IO request.  Assumes that the pkt
+         * and corresponding req have been dynamically allocated and deletes
+         * them both if the system is in atomic mode.
+         */
+        Tick submitIO(PacketPtr pkt);
+
+        /** Returns next valid state after one or more IO accesses */
+        Status nextIOState() const;
 
       protected:
-        bool recvTimingResp(PacketPtr pkt)
-        {
-            panic("The KVM CPU doesn't expect recvTimingResp!\n");
-            return true;
-        }
+        /** KVM cpu pointer for finishMMIOPending() callback */
+        BaseKvmCPU *cpu;
 
-        void recvReqRetry()
-        {
-            panic("The KVM CPU doesn't expect recvReqRetry!\n");
-        }
+        /** Pending MMIO packets */
+        std::stack<PacketPtr> pendingMMIOPkts;
+
+        /** Number of MMIO requests in flight */
+        int activeMMIOReqs;
+
+        bool recvTimingResp(PacketPtr pkt) override;
+
+        void recvReqRetry() override;
 
     };
 
# Node ID 7e03a1d08a414818c3454db3afcb6c5cc099999f
# Parent  d1e2ec14ab0c761a839ee25c5c96000390430e1e
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -170,6 +170,77 @@
     schedule(startupEvent, curTick());
 }
 
+BaseKvmCPU::Status
+BaseKvmCPU::KVMCpuPort::nextIOState() const
+{
+    return (activeMMIOReqs || pendingMMIOPkts.size())
+        ? RunningMMIOPending : RunningServiceCompletion;
+}
+
+Tick
+BaseKvmCPU::KVMCpuPort::submitIO(PacketPtr pkt)
+{
+    if (cpu->system->isAtomicMode()) {
+        Tick delay = sendAtomic(pkt);
+        delete pkt->req;
+        delete pkt;
+        return delay;
+    } else {
+        if (sendTimingReq(pkt)) {
+            activeMMIOReqs++;
+        } else {
+            pendingMMIOPkts.push(pkt);
+        }
+        // Return value is irrelevant for timing-mode accesses.
+        return 0;
+    }
+}
+
+bool
+BaseKvmCPU::KVMCpuPort::recvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(KvmIO, "KVM: Finished timing request\n");
+
+    delete pkt->req;
+    delete pkt;
+    activeMMIOReqs--;
+
+    // If there are any pending packets, try to resubmit them now.
+    if (pendingMMIOPkts.size())
+        recvReqRetry();
+
+    // We can switch back into KVM when all pending and in-flight MMIO
+    // operations have completed.
+    if (!(activeMMIOReqs || pendingMMIOPkts.size())) {
+        DPRINTF(KvmIO, "KVM: Finished all outstanding timing requests\n");
+        cpu->finishMMIOPending();
+    }
+    return true;
+}
+
+void
+BaseKvmCPU::KVMCpuPort::recvReqRetry()
+{
+    DPRINTF(KvmIO, "KVM: Retry for timing request\n");
+
+    assert(pendingMMIOPkts.size());
+
+    while (pendingMMIOPkts.size() && sendTimingReq(pendingMMIOPkts.top())) {
+        pendingMMIOPkts.pop();
+        activeMMIOReqs++;
+    }
+}
+
+void
+BaseKvmCPU::finishMMIOPending()
+{
+    assert(_status = RunningMMIOPending);
+    assert(!tickEvent.scheduled());
+
+    _status = RunningServiceCompletion;
+    schedule(tickEvent, nextCycle());
+}
+
 void
 BaseKvmCPU::startupThread()
 {
@@ -329,6 +400,12 @@
                 "requesting drain.\n");
         return DrainState::Draining;
 
+      case RunningMMIOPending:
+        // We need to drain since there are in-flight timing accesses
+        DPRINTF(Drain, "KVM CPU is waiting for timing accesses to complete, "
+                "requesting drain.\n");
+        return DrainState::Draining;
+
       case RunningService:
         // We need to drain since the CPU is waiting for service (e.g., MMIOs)
         DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
@@ -351,7 +428,6 @@
         return;
 
     DPRINTF(Kvm, "drainResume\n");
-    verifyMemoryMode();
 
     // The tick event is de-scheduled as a part of the draining
     // process. Re-schedule it if the thread context is active.
@@ -423,15 +499,6 @@
 }
 
 void
-BaseKvmCPU::verifyMemoryMode() const
-{
-    if (!(system->isAtomicMode() && system->bypassCaches())) {
-        fatal("The KVM-based CPUs requires the memory system to be in the "
-              "'atomic_noncaching' mode.\n");
-    }
-}
-
-void
 BaseKvmCPU::wakeup(ThreadID tid)
 {
     DPRINTF(Kvm, "wakeup()\n");
@@ -536,7 +603,7 @@
 BaseKvmCPU::tick()
 {
     Tick delay(0);
-    assert(_status != Idle);
+    assert(_status != Idle && _status != RunningMMIOPending);
 
     switch (_status) {
       case RunningService:
@@ -620,7 +687,7 @@
     }
 
     // Schedule a new tick if we are still running
-    if (_status != Idle)
+    if (_status != Idle && _status != RunningMMIOPending)
         schedule(tickEvent, clockEdge(ticksToCycles(delay)));
 }
 
@@ -629,8 +696,9 @@
 {
     // By default, the only thing we need to drain is a pending IO
     // operation which assumes that we are in the
-    // RunningServiceCompletion state.
-    assert(_status == RunningServiceCompletion);
+    // RunningServiceCompletion or RunningMMIOPending state.
+    assert(_status == RunningServiceCompletion ||
+           _status == RunningMMIOPending);
 
     // Deliver the data from the pending IO operation and immediately
     // exit.
@@ -922,7 +990,6 @@
         return handleKvmExitException();
 
       case KVM_EXIT_IO:
-        _status = RunningServiceCompletion;
         ++numIO;
         return handleKvmExitIO();
 
@@ -942,7 +1009,6 @@
         return 0;
 
       case KVM_EXIT_MMIO:
-        _status = RunningServiceCompletion;
         /* Service memory mapped IO requests */
         DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
                 _kvmRun->mmio.is_write,
@@ -1025,40 +1091,46 @@
 {
     ThreadContext *tc(thread->getTC());
     syncThreadContext();
+    Tick delay(0);
 
-    Request mmio_req(paddr, size, Request::UNCACHEABLE, dataMasterId());
-    mmio_req.setContext(tc->contextId());
+    RequestPtr mmio_req = new Request(paddr, size, Request::UNCACHEABLE,
+                                      dataMasterId());
+    mmio_req->setContext(tc->contextId());
     // Some architectures do need to massage physical addresses a bit
     // before they are inserted into the memory system. This enables
     // APIC accesses on x86 and m5ops where supported through a MMIO
     // interface.
     BaseTLB::Mode tlb_mode(write ? BaseTLB::Write : BaseTLB::Read);
-    Fault fault(tc->getDTBPtr()->finalizePhysical(&mmio_req, tc, tlb_mode));
+    Fault fault(tc->getDTBPtr()->finalizePhysical(mmio_req, tc, tlb_mode));
     if (fault != NoFault)
         warn("Finalization of MMIO address failed: %s\n", fault->name());
 
 
     const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
-    Packet pkt(&mmio_req, cmd);
-    pkt.dataStatic(data);
+    PacketPtr pkt = new Packet(mmio_req, cmd);
+    pkt->dataStatic(data);
 
-    if (mmio_req.isMmappedIpr()) {
+    if (mmio_req->isMmappedIpr()) {
         // We currently assume that there is no need to migrate to a
         // different event queue when doing IPRs. Currently, IPRs are
         // only used for m5ops, so it should be a valid assumption.
         const Cycles ipr_delay(write ?
-                             TheISA::handleIprWrite(tc, &pkt) :
-                             TheISA::handleIprRead(tc, &pkt));
+                             TheISA::handleIprWrite(tc, pkt) :
+                             TheISA::handleIprRead(tc, pkt));
         threadContextDirty = true;
-        return clockPeriod() * ipr_delay;
+        delete pkt->req;
+        delete pkt;
+        delay = clockPeriod() * ipr_delay;
     } else {
         // Temporarily lock and migrate to the event queue of the
         // VM. This queue is assumed to "own" all devices we need to
         // access if running in multi-core mode.
         EventQueue::ScopedMigration migrate(vm.eventQueue());
 
-        return dataPort.sendAtomic(&pkt);
+        delay = dataPort.submitIO(pkt);
     }
+    _status = dataPort.nextIOState();
+    return delay;
 }
 
 void
diff --git a/src/cpu/kvm/x86_cpu.cc b/src/cpu/kvm/x86_cpu.cc
--- a/src/cpu/kvm/x86_cpu.cc
+++ b/src/cpu/kvm/x86_cpu.cc
@@ -1344,24 +1344,24 @@
         pAddr = X86ISA::x86IOAddress(port);
     }
 
-    Request io_req(pAddr, kvm_run.io.size, Request::UNCACHEABLE,
-                   dataMasterId());
-    io_req.setContext(tc->contextId());
-
     const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
     // Temporarily lock and migrate to the event queue of the
     // VM. This queue is assumed to "own" all devices we need to
     // access if running in multi-core mode.
     EventQueue::ScopedMigration migrate(vm.eventQueue());
     for (int i = 0; i < count; ++i) {
-        Packet pkt(&io_req, cmd);
+        RequestPtr io_req = new Request(pAddr, kvm_run.io.size,
+                                        Request::UNCACHEABLE, dataMasterId());
+        io_req->setContext(tc->contextId());
 
-        pkt.dataStatic(guestData);
-        delay += dataPort.sendAtomic(&pkt);
+        PacketPtr pkt = new Packet(io_req, cmd);
+
+        pkt->dataStatic(guestData);
+        delay += dataPort.submitIO(pkt);
 
         guestData += kvm_run.io.size;
     }
-
+    _status = dataPort.nextIOState();
     return delay;
 }