diff --git a/src/mem/ruby/network/simple/SimpleNetwork.hh b/src/mem/ruby/network/simple/SimpleNetwork.hh
--- a/src/mem/ruby/network/simple/SimpleNetwork.hh
+++ b/src/mem/ruby/network/simple/SimpleNetwork.hh
@@ -57,6 +57,7 @@
     int getEndpointBandwidth() { return m_endpoint_bandwidth; }
     bool getAdaptiveRouting() {return m_adaptive_routing; }
 
+    void regStats();
     void printStats(std::ostream& out) const;
     void clearStats();
     void printConfig(std::ostream& out) const;
diff --git a/src/mem/ruby/network/simple/SimpleNetwork.cc b/src/mem/ruby/network/simple/SimpleNetwork.cc
--- a/src/mem/ruby/network/simple/SimpleNetwork.cc
+++ b/src/mem/ruby/network/simple/SimpleNetwork.cc
@@ -249,6 +249,20 @@
 }
 
 void
+SimpleNetwork::regStats()
+{
+  for (int i = 0; i < m_switch_ptr_vector.size(); i++) {
+    const std::vector<Throttle*>* throttles =
+      m_switch_ptr_vector[i]->getThrottles();
+    for(std::vector<Throttle*>::const_iterator it = throttles->begin();
+        it != throttles->end();
+        it++) {
+      (*it)->regStats();
+    }
+  }
+}
+
+void
 SimpleNetwork::printStats(ostream& out) const
 {
     out << endl;
diff --git a/src/mem/ruby/network/simple/Throttle.hh b/src/mem/ruby/network/simple/Throttle.hh
--- a/src/mem/ruby/network/simple/Throttle.hh
+++ b/src/mem/ruby/network/simple/Throttle.hh
@@ -42,6 +42,7 @@
 #include <string>
 #include <vector>
 
+#include "base/statistics.hh"
 #include "mem/ruby/common/Consumer.hh"
 #include "mem/ruby/common/Global.hh"
 #include "mem/ruby/network/Network.hh"
@@ -66,6 +67,7 @@
         const std::vector<MessageBuffer*>& out_vec);
     void wakeup();
 
+    void regStats();
     void printStats(std::ostream& out) const;
     void clearStats();
     void printConfig(std::ostream& out) const;
@@ -81,7 +83,15 @@
     const std::vector<std::vector<int> >&
     getCounters() const
     {
-        return m_message_counters;
+      std::vector<std::vector<int> >* vec = new std::vector<std::vector<int> >;
+      vec->resize(m_message_counters.size());
+      for (int i = 0; i<m_message_counters.size(); i++) {
+        (*vec)[i].resize(m_message_counters[i].size());
+        for (int j = 0; j<m_message_counters[i].size(); j++) {
+          (*vec)[i][j] = m_message_counters[i][j]->value();
+        }
+      }
+      return *vec;
     }
 
     void clear();
@@ -100,7 +110,7 @@
 
     std::vector<MessageBuffer*> m_in;
     std::vector<MessageBuffer*> m_out;
-    std::vector<std::vector<int> > m_message_counters;
+    std::vector<std::vector<Stats::Scalar*> > m_message_counters;
     int m_vnets;
     std::vector<int> m_units_remaining;
     int m_sID;
@@ -112,7 +122,8 @@
 
     // For tracking utilization
     Time m_ruby_start;
-    double m_links_utilized;
+    Stats::Scalar m_links_utilized;
+    Stats::Formula m_links_utilization;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/network/simple/Throttle.cc b/src/mem/ruby/network/simple/Throttle.cc
--- a/src/mem/ruby/network/simple/Throttle.cc
+++ b/src/mem/ruby/network/simple/Throttle.cc
@@ -28,6 +28,7 @@
 
 #include <cassert>
 
+#include "sim/stats.hh"
 #include "base/cprintf.hh"
 #include "debug/RubyNetwork.hh"
 #include "mem/protocol/Protocol.hh"
@@ -100,7 +101,7 @@
     for (int i = 0; i < MessageSizeType_NUM; i++) {
         m_message_counters[i].resize(in_vec.size());
         for (int j = 0; j<m_message_counters[i].size(); j++) {
-            m_message_counters[i][j] = 0;
+           m_message_counters[i][j] = new Stats::Scalar;
         }
     }
 }
@@ -174,7 +175,7 @@
                 m_in[vnet]->pop();
 
                 // Count the message
-                m_message_counters[net_msg_ptr->getMessageSize()][vnet]++;
+                (*(m_message_counters[net_msg_ptr->getMessageSize()][vnet]))++;
 
                 DPRINTF(RubyNetwork, "%s\n", *m_out[vnet]);
             }
@@ -220,6 +221,40 @@
 }
 
 void
+Throttle::regStats()
+{
+    stringstream name;
+    name << "ruby.switch" << NodeIDToString(m_sID)
+         << ".link" << NodeIDToString(m_node)
+         << ".utilization";
+    m_links_utilization = 100.0 * (m_links_utilized /
+                                   (simTicks/
+                                    Stats::constant(g_eventQueue_ptr->getClock())));
+    m_links_utilization
+      .name(name.str().c_str())
+      .desc("The utlization ratio for this link")
+      ;
+
+    for (int i = 0; i < MessageSizeType_NUM; i++) {
+        for (int j = 0; j<m_message_counters[i].size(); j++) {
+          stringstream name;
+          name << "ruby.switch" << NodeIDToString(m_sID)
+               << ".link" << NodeIDToString(m_node)
+               << ".vnet" << j << "."
+               << (MessageSizeType) i
+               << ".message_count";
+          m_message_counters[i][j]
+            ->name(name.str().c_str())
+            .desc("Message Count")
+            ;
+          (*(m_message_counters[i][j])) = 0;
+        }
+    }
+}
+
+
+
+void
 Throttle::printStats(ostream& out) const
 {
     out << "utilized_percent: " << getUtilization() << endl;
@@ -229,11 +264,10 @@
 Throttle::clearStats()
 {
     m_ruby_start = g_eventQueue_ptr->getTime();
-    m_links_utilized = 0.0;
 
     for (int i = 0; i < m_message_counters.size(); i++) {
         for (int j = 0; j < m_message_counters[i].size(); j++) {
-            m_message_counters[i][j] = 0;
+          (*(m_message_counters[i][j])) = 0;
         }
     }
 }
@@ -246,8 +280,7 @@
 double
 Throttle::getUtilization() const
 {
-    return 100.0 * double(m_links_utilized) /
-        double(g_eventQueue_ptr->getTime()-m_ruby_start);
+    return m_links_utilization.total();
 }
 
 void
diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh
--- a/src/mem/ruby/profiler/AddressProfiler.hh
+++ b/src/mem/ruby/profiler/AddressProfiler.hh
@@ -32,6 +32,7 @@
 #include <iostream>
 
 #include "base/hashmap.hh"
+#include "base/statistics.hh"
 #include "mem/protocol/AccessType.hh"
 #include "mem/protocol/RubyRequest.hh"
 #include "mem/ruby/common/Address.hh"
@@ -53,6 +54,7 @@
 
     void printStats(std::ostream& out) const;
     void clearStats();
+    void registerStats();
 
     void addTraceSample(Address data_addr, Address pc_addr,
                         RubyRequestType type, RubyAccessMode access_mode,
@@ -80,11 +82,11 @@
     AddressMap m_macroBlockAccessTrace;
     AddressMap m_programCounterAccessTrace;
     AddressMap m_retryProfileMap;
-    Histogram m_retryProfileHisto;
-    Histogram m_retryProfileHistoWrite;
-    Histogram m_retryProfileHistoRead;
-    Histogram m_getx_sharing_histogram;
-    Histogram m_gets_sharing_histogram;
+    Stats::Histogram m_retryProfileHisto;
+    Stats::Histogram m_retryProfileHistoWrite;
+    Stats::Histogram m_retryProfileHistoRead;
+    Stats::Histogram m_getx_sharing_histogram;
+    Stats::Histogram m_gets_sharing_histogram;
 
     //added by SS
     bool m_hot_lines;
diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc
--- a/src/mem/ruby/profiler/AddressProfiler.cc
+++ b/src/mem/ruby/profiler/AddressProfiler.cc
@@ -146,7 +146,6 @@
 AddressProfiler::AddressProfiler(int num_of_sequencers)
 {
     m_num_of_sequencers = num_of_sequencers;
-    clearStats();
 }
 
 AddressProfiler::~AddressProfiler()
@@ -175,8 +174,8 @@
 
         out << endl;
         out << "sharing_misses: " << m_sharing_miss_counter << endl;
-        out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
-        out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
+        //        out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
+        //        out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
 
         out << endl;
         out << "Hot Data Blocks" << endl;
@@ -213,12 +212,12 @@
         out << "Retry Profile" << endl;
         out << "-------------" << endl;
         out << endl;
-        out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
-        out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
-        out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
+        //        out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
+        //        out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
+        //        out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
 
         out << "retry_histogram_percent: ";
-        m_retryProfileHisto.printPercent(out);
+        //        m_retryProfileHisto.printPercent(out);
         out << endl;
 
         printSorted(out, m_num_of_sequencers, m_retryProfileMap,
@@ -228,6 +227,41 @@
 }
 
 void
+AddressProfiler::registerStats()
+{
+  m_retryProfileHisto
+    .init(50)
+    .name("ruby.retry_histogram_absolute")
+    .desc("??")
+    ;
+
+  m_retryProfileHistoRead
+    .init(50)
+    .name("ruby.retry_histogram_read")
+    .desc("??")
+    ;
+
+  m_retryProfileHistoWrite
+    .init(50)
+    .name("ruby.retry_histogram_write")
+    .desc("??")
+    ;
+
+  m_getx_sharing_histogram
+    .init(50)
+    .name("ruby.getx_sharing_histogram")
+    .desc("??")
+    ;
+
+  m_gets_sharing_histogram
+    .init(50)
+    .name("ruby.gets_sharing_histogram")
+    .desc("??")
+    ;
+  clearStats();
+}
+
+void
 AddressProfiler::clearStats()
 {
     // Clear the maps
@@ -236,11 +270,11 @@
     m_macroBlockAccessTrace.clear();
     m_programCounterAccessTrace.clear();
     m_retryProfileMap.clear();
-    m_retryProfileHisto.clear();
-    m_retryProfileHistoRead.clear();
-    m_retryProfileHistoWrite.clear();
-    m_getx_sharing_histogram.clear();
-    m_gets_sharing_histogram.clear();
+    m_retryProfileHisto.reset();
+    m_retryProfileHistoRead.reset();
+    m_retryProfileHistoWrite.reset();
+    m_getx_sharing_histogram.reset();
+    m_gets_sharing_histogram.reset();
 }
 
 void
@@ -254,7 +288,7 @@
     indirection_set.remove(requestor);
     int num_indirections = indirection_set.count();
 
-    m_getx_sharing_histogram.add(num_indirections);
+    m_getx_sharing_histogram.sample(num_indirections);
     bool indirection_miss = (num_indirections > 0);
 
     addTraceSample(datablock, PC, RubyRequestType_ST, RubyAccessMode(0),
@@ -271,7 +305,7 @@
     indirection_set.remove(requestor);
     int num_indirections = indirection_set.count();
 
-    m_gets_sharing_histogram.add(num_indirections);
+    m_gets_sharing_histogram.sample(num_indirections);
     bool indirection_miss = (num_indirections > 0);
 
     addTraceSample(datablock, PC, RubyRequestType_LD, RubyAccessMode(0),
@@ -319,11 +353,11 @@
 AddressProfiler::profileRetry(const Address& data_addr, AccessType type,
                               int count)
 {
-    m_retryProfileHisto.add(count);
+    m_retryProfileHisto.sample(count);
     if (type == AccessType_Read) {
-        m_retryProfileHistoRead.add(count);
+        m_retryProfileHistoRead.sample(count);
     } else {
-        m_retryProfileHistoWrite.add(count);
+        m_retryProfileHistoWrite.sample(count);
     }
     if (count > 1) {
         lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count);
diff --git a/src/mem/ruby/profiler/CacheProfiler.hh b/src/mem/ruby/profiler/CacheProfiler.hh
--- a/src/mem/ruby/profiler/CacheProfiler.hh
+++ b/src/mem/ruby/profiler/CacheProfiler.hh
@@ -33,6 +33,7 @@
 #include <string>
 #include <vector>
 
+#include "base/statistics.hh"
 #include "mem/protocol/GenericRequestType.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/RubyAccessMode.hh"
@@ -47,6 +48,7 @@
     CacheProfiler(const std::string& description);
     ~CacheProfiler();
 
+    void regStats();
     void printStats(std::ostream& out) const;
     void clearStats();
 
@@ -67,12 +69,12 @@
     void addStatSample(RubyAccessMode type, PrefetchBit pfBit);
 
     std::string m_description;
-    int64 m_misses;
-    int64 m_demand_misses;
-    int64 m_prefetches;
-    int64 m_sw_prefetches;
-    int64 m_hw_prefetches;
-    int64 m_accessModeTypeHistogram[RubyAccessMode_NUM];
+    Stats::Scalar m_misses;
+    Stats::Scalar m_demand_misses;
+    Stats::Scalar m_prefetches;
+    Stats::Scalar m_sw_prefetches;
+    Stats::Scalar m_hw_prefetches;
+    Stats::Scalar m_accessModeTypeCount[RubyAccessMode_NUM];
 
     std::vector<int> m_cacheRequestType;
     std::vector<int> m_genericRequestType;
diff --git a/src/mem/ruby/profiler/CacheProfiler.cc b/src/mem/ruby/profiler/CacheProfiler.cc
--- a/src/mem/ruby/profiler/CacheProfiler.cc
+++ b/src/mem/ruby/profiler/CacheProfiler.cc
@@ -26,6 +26,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sstream>
 #include "mem/ruby/profiler/CacheProfiler.hh"
 #include "mem/ruby/profiler/Profiler.hh"
 #include "mem/ruby/system/System.hh"
@@ -45,16 +46,50 @@
 }
 
 void
+CacheProfiler::regStats()
+{
+  std::stringstream name;
+  name << "ruby." << m_description;
+
+  m_misses
+    .name(name.str() + ".total_misses")
+    .desc("Total number of cache misses")
+    ;
+  m_demand_misses
+    .name(name.str() + ".demand_misses")
+    .desc("Number of demand cache misses")
+    ;
+  m_prefetches
+    .name(name.str() + ".prefetches")
+    .desc("Number of cache prefetch requests")
+    ;
+  m_sw_prefetches
+    .name(name.str() + ".sw_prefetches")
+    .desc("Number of cache software prefetch requests")
+    ;
+  m_hw_prefetches
+    .name(name.str() + ".hw_prefetches")
+    .desc("Number of cache hardware prefetch requests")
+    ;
+  for(int i=0; i<RubyAccessMode_NUM; i++) {
+    m_accessModeTypeCount[i]
+      .name(name.str() + "." + RubyAccessMode_to_string((RubyAccessMode) i) + ".count")
+      .desc("The number of cache accesses that were of type " + RubyAccessMode_to_string((RubyAccessMode)i))
+      ;
+  }
+}
+
+void
 CacheProfiler::printStats(ostream& out) const
 {
     out << "Cache Stats: " << m_description << endl;
     string description = "  " + m_description;
 
-    out << description << "_total_misses: " << m_misses << endl;
-    out << description << "_total_demand_misses: " << m_demand_misses << endl;
-    out << description << "_total_prefetches: " << m_prefetches << endl;
-    out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl;
-    out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl;
+    out << description << "_total_misses: " << m_misses.value() << endl;
+    out << description << "_total_demand_misses: " << m_demand_misses.value() << endl;
+    out << description << "_total_prefetches: " << m_prefetches.value() << endl;
+    out << description << "_total_sw_prefetches: " << m_sw_prefetches.value() << endl;
+    out << description << "_total_hw_prefetches: " << m_hw_prefetches.value() << endl;
     out << endl;
 
     int requests = 0;
@@ -67,7 +102,7 @@
         requests += m_genericRequestType[i];
     }
 
-    assert(m_misses == requests);
+    assert(m_misses.value() == requests);
 
     if (requests > 0) {
         for (int i = 0; i < int(RubyRequestType_NUM); i++) {
@@ -95,11 +130,11 @@
         out << endl;
 
         for (int i = 0; i < RubyAccessMode_NUM; i++){
-            if (m_accessModeTypeHistogram[i] > 0) {
+            if (m_accessModeTypeCount[i].value() > 0) {
                 out << description << "_access_mode_type_"
                     << (RubyAccessMode) i << ":   "
-                    << m_accessModeTypeHistogram[i] << "    "
-                    << 100.0 * m_accessModeTypeHistogram[i] / requests
+                    << m_accessModeTypeCount[i].value() << "    "
+                    << 100.0 * m_accessModeTypeCount[i].value() / requests
                     << "%" << endl;
             }
         }
@@ -123,7 +158,7 @@
     m_sw_prefetches = 0;
     m_hw_prefetches = 0;
     for (int i = 0; i < RubyAccessMode_NUM; i++) {
-        m_accessModeTypeHistogram[i] = 0;
+        m_accessModeTypeCount[i] = 0;
     }
 }
 
@@ -151,7 +186,7 @@
 {
     m_misses++;
 
-    m_accessModeTypeHistogram[accessType]++;
+    m_accessModeTypeCount[accessType]++;
     if (pfBit == PrefetchBit_No) {
         m_demand_misses++;
     } else if (pfBit == PrefetchBit_Yes) {
diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.hh b/src/mem/ruby/profiler/MemCntrlProfiler.hh
--- a/src/mem/ruby/profiler/MemCntrlProfiler.hh
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh
@@ -33,6 +33,7 @@
 #include <string>
 #include <vector>
 
+#include "base/statistics.hh"
 #include "mem/ruby/common/Global.hh"
 
 class MemCntrlProfiler
@@ -42,6 +43,7 @@
                      int ranks_per_dimm, int dimms_per_channel);
     ~MemCntrlProfiler();
 
+    void regStats();
     void printStats(std::ostream& out) const;
     void clearStats();
 
@@ -69,22 +71,25 @@
     MemCntrlProfiler& operator=(const MemCntrlProfiler& obj);
 
     std::string m_description;
-    uint64 m_memReq;
-    uint64 m_memBankBusy;
-    uint64 m_memBusBusy;
-    uint64 m_memTfawBusy;
-    uint64 m_memReadWriteBusy;
-    uint64 m_memDataBusBusy;
-    uint64 m_memRefresh;
-    uint64 m_memRead;
-    uint64 m_memWrite;
-    uint64 m_memWaitCycles;
-    uint64 m_memInputQ;
-    uint64 m_memBankQ;
-    uint64 m_memArbWait;
-    uint64 m_memRandBusy;
-    uint64 m_memNotOld;
-    std::vector<uint64> m_memBankCount;
+    Stats::Scalar m_memReq;
+    Stats::Scalar m_memBankBusy;
+    Stats::Scalar m_memBusBusy;
+    Stats::Scalar m_memTfawBusy;
+    Stats::Scalar m_memReadWriteBusy;
+    Stats::Scalar m_memDataBusBusy;
+    Stats::Scalar m_memRefresh;
+    Stats::Scalar m_memRead;
+    Stats::Scalar m_memWrite;
+    Stats::Scalar m_memWaitCycles;
+    Stats::Scalar m_memInputQ;
+    Stats::Scalar m_memBankQ;
+    Stats::Scalar m_memArbWait;
+    Stats::Scalar m_memRandBusy;
+    Stats::Scalar m_memNotOld;
+    Stats::Vector m_memBankCount;
+    Stats::Formula m_memTotalStalls;
+    Stats::Formula m_memStallsPerRequest;
+
     int m_banks_per_rank;
     int m_ranks_per_dimm;
     int m_dimms_per_channel;
diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.cc b/src/mem/ruby/profiler/MemCntrlProfiler.cc
--- a/src/mem/ruby/profiler/MemCntrlProfiler.cc
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc
@@ -38,9 +38,6 @@
     m_ranks_per_dimm = ranks_per_dimm;
     m_dimms_per_channel = dimms_per_channel;
 
-    int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel;
-    m_memBankCount.resize(totalBanks);
-
     clearStats();
 }
 
@@ -49,9 +46,96 @@
 }
 
 void
+MemCntrlProfiler::regStats()
+{
+  string this_name = "ruby." + m_description;
+
+  m_memReq
+    .name(this_name + ".requests")
+    .desc("The number of requests made to this controller")
+    ;
+  m_memBankBusy
+    .name(this_name + ".bankBusy")
+    .desc("The number of cycles this controller stalled because a bank was busy")
+    ;
+  m_memBusBusy
+    .name(this_name + ".busBusy")
+    .desc("The number of cycles this controller stalled because a bus was busy")
+    ;
+  m_memTfawBusy
+    .name(this_name + ".tfawBusy")
+    .desc("The number of cycles this controller stalled because a tfaw was busy")
+    ;
+  m_memReadWriteBusy
+    .name(this_name + ".readWriteBusy")
+    .desc("The number of cycles this controller stalled because of a read-write turnaround")
+    ;
+  m_memDataBusBusy
+    .name(this_name + ".dataBusBusy")
+    .desc("The number of cycles this controller stalled because of a read-read turnaround")
+    ;
+  m_memRefresh
+    .name(this_name + ".refreshes")
+    .desc("The number of refreshes performed")
+    ;
+  m_memRead
+    .name(this_name + ".reads")
+    .desc("The number of reads requested")
+    ;
+  m_memWrite
+    .name(this_name + ".writes")
+    .desc("The number of writes requested")
+    ;
+  m_memWaitCycles
+    .name(this_name + ".waitCycles")
+    .desc("The number cycles spent waiting. Represents the aggregate of *Busy stats.")
+    ;
+  m_memInputQ
+    .name(this_name + ".inputQDelays")
+    .desc("The number cycles delayed on an input queue.")
+    ;
+  m_memBankQ
+    .name(this_name + ".bankQDelays")
+    .desc("The number cycles delayed on a bank queue.")
+    ;
+  m_memArbWait
+    .name(this_name + ".arbWait")
+    .desc("The number of stalls for arbitration.")
+    ;
+  m_memRandBusy
+    .name(this_name + ".randBusy")
+    .desc("The number of stalls for random busy.")
+    ;
+  m_memNotOld
+    .name(this_name + ".notOld")
+    .desc("The number of stalls for anti-starvation.")
+    ;
+
+  int totalBanks = m_banks_per_rank * m_ranks_per_dimm * m_dimms_per_channel;
+  m_memBankCount
+    .init(totalBanks)
+    .name(this_name + ".bankAccessCount")
+    .desc("The number of times a bank is accessed")
+    ;
+
+  m_memTotalStalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
+  m_memTotalStalls
+    .name(this_name + ".totalStalls")
+    .desc("The total number of stalls")
+    ;
+
+  m_memStallsPerRequest = (m_memInputQ + m_memBankQ + m_memWaitCycles) * 1.0 / m_memReq;
+  m_memStallsPerRequest
+    .name(this_name + ".stallsPerRequest")
+    .desc("The average number of stalls per request")
+    ;
+
+}
+
+void
 MemCntrlProfiler::printStats(ostream& out) const
 {
-    if (!m_memReq && !m_memRefresh) {
+   if (!m_memReq.value() && !m_memRefresh.value()) {
         out << "Memory Controller: " << m_description
             << " no stats recorded." << endl
             << endl
@@ -60,22 +144,22 @@
     }
 
     // if there's a memory controller at all
-    uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
-    double stallsPerReq = total_stalls * 1.0 / m_memReq;
+   uint64 total_stalls = m_memInputQ.value() + m_memBankQ.value() + m_memWaitCycles.value();
+   double stallsPerReq = total_stalls * 1.0 / m_memReq.value();
     out << "Memory controller: " << m_description << ":" << endl;
 
     // does not include refreshes
-    out << "  memory_total_requests: " << m_memReq << endl;
-    out << "  memory_reads: " << m_memRead << endl;
-    out << "  memory_writes: " << m_memWrite << endl;
-    out << "  memory_refreshes: " << m_memRefresh << endl;
+    out << "  memory_total_requests: " << m_memReq.value() << endl;
+    out << "  memory_reads: " << m_memRead.value() << endl;
+    out << "  memory_writes: " << m_memWrite.value() << endl;
+    out << "  memory_refreshes: " << m_memRefresh.value() << endl;
     out << "  memory_total_request_delays: " << total_stalls << endl;
     out << "  memory_delays_per_request: " << stallsPerReq << endl;
-    out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
+    out << "  memory_delays_in_input_queue: " << m_memInputQ.value() << endl;
     out << "  memory_delays_behind_head_of_bank_queue: "
-        << m_memBankQ << endl;
+        << m_memBankQ.value() << endl;
     out << "  memory_delays_stalled_at_head_of_bank_queue: "
-        << m_memWaitCycles << endl;
+        << m_memWaitCycles.value() << endl;
 
     // Note: The following "memory stalls" entries are a breakdown of
     // the cycles which already showed up in m_memWaitCycles.  The
@@ -86,20 +170,20 @@
     // heads-of-queues into batches to avoid starvation, a request in
     // a newer batch didn't try to arbitrate yet because there are
     // older requests waiting.
-    out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
-    out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
-    out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
-    out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
-    out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
-    out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
+    out << "  memory_stalls_for_bank_busy: " << m_memBankBusy.value() << endl;
+    out << "  memory_stalls_for_random_busy: " << m_memRandBusy.value() << endl;
+    out << "  memory_stalls_for_anti_starvation: " << m_memNotOld.value() << endl;
+    out << "  memory_stalls_for_arbitration: " << m_memArbWait.value() << endl;
+    out << "  memory_stalls_for_bus: " << m_memBusBusy.value() << endl;
+    out << "  memory_stalls_for_tfaw: " << m_memTfawBusy.value() << endl;
     out << "  memory_stalls_for_read_write_turnaround: "
-        << m_memReadWriteBusy << endl;
+        << m_memReadWriteBusy.value() << endl;
     out << "  memory_stalls_for_read_read_turnaround: "
-        << m_memDataBusBusy << endl;
+        << m_memDataBusBusy.value() << endl;
     out << "  accesses_per_bank: ";
 
     for (int bank = 0; bank < m_memBankCount.size(); bank++) {
-        out << m_memBankCount[bank] << "  ";
+      //      out << m_memBankCount[bank].value() << "  ";
     }
     out << endl;
     out << endl;
@@ -125,7 +209,7 @@
     m_memNotOld = 0;
 
     for (int bank = 0; bank < m_memBankCount.size(); bank++) {
-        m_memBankCount[bank] = 0;
+      m_memBankCount[bank] = 0;
     }
 }
 
diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh
--- a/src/mem/ruby/profiler/Profiler.hh
+++ b/src/mem/ruby/profiler/Profiler.hh
@@ -51,6 +51,7 @@
 #include <vector>
 
 #include "base/hashmap.hh"
+#include "base/statistics.hh"
 #include "mem/protocol/AccessType.hh"
 #include "mem/protocol/GenericMachineType.hh"
 #include "mem/protocol/GenericRequestType.hh"
@@ -78,6 +79,8 @@
     Profiler(const Params *);
     ~Profiler();
 
+    void registerStats();
+
     void wakeup();
 
     void setPeriodicStatsFile(const std::string& filename);
@@ -96,9 +99,6 @@
     void addAddressTraceSample(const RubyRequest& msg, NodeID id);
 
     void profileRequest(const std::string& requestStr);
-    void profileSharing(const Address& addr, AccessType type,
-                        NodeID requestor, const Set& sharers,
-                        const Set& owner);
 
     void profileMulticastRetry(const Address& addr, int count);
 
@@ -109,26 +109,19 @@
     void
     profileOutstandingRequest(int outstanding)
     {
-        m_outstanding_requests.add(outstanding);
+        m_outstanding_requests.sample(outstanding);
     }
 
     void
     profileOutstandingPersistentRequest(int outstanding)
     {
-        m_outstanding_persistent_requests.add(outstanding);
-    }
-
-    void
-    profileAverageLatencyEstimate(int latency)
-    {
-        m_average_latency_estimate.add(latency);
+        m_outstanding_persistent_requests.sample(outstanding);
     }
 
     void recordPrediction(bool wasGood, bool wasPredicted);
 
     void startTransaction(int cpu);
     void endTransaction(int cpu);
-    void profilePFWait(Time waitTime);
 
     void controllerBusy(MachineID machID);
     void bankBusy();
@@ -148,12 +141,6 @@
                         Time forwardRequestTime,
                         Time firstResponseTime,
                         Time completionTime);
-    
-    void swPrefetchLatency(Time t, 
-                           RubyRequestType type,
-                           const GenericMachineType respondingMach);
-
-    void sequencerRequests(int num) { m_sequencer_requests.add(num); }
 
     void profileMsgDelay(int virtualNetwork, int delayCycles);
 
@@ -190,51 +177,24 @@
     Time m_ruby_start;
     time_t m_real_time_start_time;
 
-    std::vector<std::vector<integer_t> > m_busyControllerCount;
-    integer_t m_busyBankCount;
-    Histogram m_multicast_retry_histogram;
+    Stats::Histogram m_wCCIssueToInitialRequestHistogram;
+    Stats::Histogram m_wCCInitialRequestToForwardRequestHistogram;
+    Stats::Histogram m_wCCForwardRequestToFirstResponseHistogram;
+    Stats::Histogram m_wCCFirstResponseToCompleteHistogram;
+    Stats::Scalar m_wCCIncompleteTimes;
 
-    Histogram m_filter_action_histogram;
-    Histogram m_tbeProfile;
+    Stats::Histogram m_dirIssueToInitialRequestHistogram;
+    Stats::Histogram m_dirInitialRequestToForwardRequestHistogram;
+    Stats::Histogram m_dirForwardRequestToFirstResponseHistogram;
+    Stats::Histogram m_dirFirstResponseToCompleteHistogram;
+    Stats::Scalar m_dirIncompleteTimes;
 
-    Histogram m_sequencer_requests;
-    Histogram m_read_sharing_histogram;
-    Histogram m_write_sharing_histogram;
-    Histogram m_all_sharing_histogram;
-    int64 m_cache_to_cache;
-    int64 m_memory_to_cache;
+    Stats::Histogram m_delayedCyclesHistogram;
+    Stats::Histogram m_delayedCyclesNonPFHistogram;
+    std::vector<Stats::Histogram*> m_delayedCyclesVCHistograms;
 
-    Histogram m_prefetchWaitHistogram;
-
-    std::vector<Histogram> m_missLatencyHistograms;
-    std::vector<Histogram> m_machLatencyHistograms;
-    std::vector< std::vector<Histogram> > m_missMachLatencyHistograms;
-    Histogram m_wCCIssueToInitialRequestHistogram;
-    Histogram m_wCCInitialRequestToForwardRequestHistogram;
-    Histogram m_wCCForwardRequestToFirstResponseHistogram;
-    Histogram m_wCCFirstResponseToCompleteHistogram;
-    int64 m_wCCIncompleteTimes;
-    Histogram m_dirIssueToInitialRequestHistogram;
-    Histogram m_dirInitialRequestToForwardRequestHistogram;
-    Histogram m_dirForwardRequestToFirstResponseHistogram;
-    Histogram m_dirFirstResponseToCompleteHistogram;
-    int64 m_dirIncompleteTimes;
-
-    Histogram m_allMissLatencyHistogram;
-
-    Histogram m_allSWPrefetchLatencyHistogram;
-    Histogram m_SWPrefetchL2MissLatencyHistogram;
-    std::vector<Histogram> m_SWPrefetchLatencyHistograms;
-    std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
-
-    Histogram m_delayedCyclesHistogram;
-    Histogram m_delayedCyclesNonPFHistogram;
-    std::vector<Histogram> m_delayedCyclesVCHistograms;
-
-    Histogram m_outstanding_requests;
-    Histogram m_outstanding_persistent_requests;
-
-    Histogram m_average_latency_estimate;
+    Stats::Histogram m_outstanding_requests;
+    Stats::Histogram m_outstanding_persistent_requests;
 
     m5::hash_set<Address> m_watch_address_set;
     // counts all initiated cache request including PUTs
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -125,8 +125,6 @@
             << process_memory_resident() / process_memory_total() << endl;
     }
 
-    out << "miss_latency: " << m_allMissLatencyHistogram << endl;
-
     out << endl;
 
     if (m_all_instructions) {
@@ -242,47 +240,18 @@
     out << endl;
 
     if (!short_stats) {
-        out << "Busy Controller Counts:" << endl;
-        for (int i = 0; i < MachineType_NUM; i++) {
-            int size = MachineType_base_count((MachineType)i);
-            for (int j = 0; j < size; j++) {
-                MachineID machID;
-                machID.type = (MachineType)i;
-                machID.num = j;
-                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
-                if ((j + 1) % 8 == 0) {
-                    out << endl;
-                }
-            }
-            out << endl;
-        }
+        /*
+        out << "sequencer_requests_outstanding: "
+            << m_sequencer_requests.value() << endl;
         out << endl;
-
-        out << "Busy Bank Count:" << m_busyBankCount << endl;
-        out << endl;
-
-        out << "sequencer_requests_outstanding: "
-            << m_sequencer_requests << endl;
-        out << endl;
+        */
     }
 
     if (!short_stats) {
         out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
         out << "----------------------------------------" << endl;
-        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
-        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-            if (m_missLatencyHistograms[i].size() > 0) {
-                out << "miss_latency_" << RubyRequestType(i) << ": "
-                    << m_missLatencyHistograms[i] << endl;
-            }
-        }
-        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
-            if (m_machLatencyHistograms[i].size() > 0) {
-                out << "miss_latency_" << GenericMachineType(i) << ": "
-                    << m_machLatencyHistograms[i] << endl;
-            }
-        }
 
+        /*
         out << "miss_latency_wCC_issue_to_initial_request: " 
             << m_wCCIssueToInitialRequestHistogram << endl;
         out << "miss_latency_wCC_initial_forward_request: " 
@@ -291,7 +260,9 @@
             << m_wCCForwardRequestToFirstResponseHistogram << endl;
         out << "miss_latency_wCC_first_response_to_completion: " 
             << m_wCCFirstResponseToCompleteHistogram << endl;
-        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
+        */
+        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes.value() << endl;
+        /*
         out << "miss_latency_dir_issue_to_initial_request: " 
             << m_dirIssueToInitialRequestHistogram << endl;
         out << "miss_latency_dir_initial_forward_request: " 
@@ -300,8 +271,10 @@
             << m_dirForwardRequestToFirstResponseHistogram << endl;
         out << "miss_latency_dir_first_response_to_completion: " 
             << m_dirFirstResponseToCompleteHistogram << endl;
-        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
+        */
+        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes.value() << endl;
 
+        /*
         for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
             for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
                 if (m_missMachLatencyHistograms[i][j].size() > 0) {
@@ -311,63 +284,15 @@
                 }
             }
         }
-
-        out << endl;
-
-        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
-        out << "------------------------------------" << endl;
-        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
-        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
-            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
-                out << "prefetch_latency_" << RubyRequestType(i) << ": "
-                    << m_SWPrefetchLatencyHistograms[i] << endl;
-            }
-        }
-        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
-            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
-                out << "prefetch_latency_" << GenericMachineType(i) << ": "
-                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
-            }
-        }
-        out << "prefetch_latency_L2Miss:"
-            << m_SWPrefetchL2MissLatencyHistogram << endl;
-
-        if (m_all_sharing_histogram.size() > 0) {
-            out << "all_sharing: " << m_all_sharing_histogram << endl;
-            out << "read_sharing: " << m_read_sharing_histogram << endl;
-            out << "write_sharing: " << m_write_sharing_histogram << endl;
-
-            out << "all_sharing_percent: ";
-            m_all_sharing_histogram.printPercent(out);
-            out << endl;
-
-            out << "read_sharing_percent: ";
-            m_read_sharing_histogram.printPercent(out);
-            out << endl;
-
-            out << "write_sharing_percent: ";
-            m_write_sharing_histogram.printPercent(out);
-            out << endl;
-
-            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
-            out << "all_misses: " << total_miss << endl;
-            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
-            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
-            out << "cache_to_cache_percent: "
-                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
-                << endl;
-            out << "memory_to_cache_percent: "
-                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
-                << endl;
-            out << endl;
-        }
-
+        */
+        /*
         if (m_outstanding_requests.size() > 0) {
             out << "outstanding_requests: ";
             m_outstanding_requests.printPercent(out);
             out << endl;
             out << endl;
         }
+        */
     }
 
     if (!short_stats) {
@@ -393,8 +318,6 @@
         }
         out << endl;
 
-        out << "filter_action: " << m_filter_action_histogram << endl;
-
         if (!m_all_instructions) {
             m_address_profiler_ptr->printStats(out);
         }
@@ -404,6 +327,7 @@
         }
 
         out << endl;
+        /*
         out << "Message Delayed Cycles" << endl;
         out << "----------------------" << endl;
         out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
@@ -413,6 +337,7 @@
             out << "  virtual_network_" << i << "_delay_cycles: "
                 << m_delayedCyclesVCHistograms[i] << endl;
         }
+        */
 
         printResourceUsage(out);
     }
@@ -441,6 +366,98 @@
 }
 
 void
+Profiler::registerStats()
+{
+    m_wCCIssueToInitialRequestHistogram
+      .init(200)
+      .name("ruby.wCCIssueToInitialRequestHistogram")
+      .desc("??")
+      ;
+    m_wCCInitialRequestToForwardRequestHistogram
+      .init(200)
+      .name("ruby.wCCInitialRequestToForwardRequestHistogram")
+      .desc("??")
+      ;
+    m_wCCForwardRequestToFirstResponseHistogram
+      .init(200)
+      .name("ruby.wCCForwardRequestToFirstResponseHistogram")
+      .desc("??")
+      ;
+    m_wCCFirstResponseToCompleteHistogram
+      .init(200)
+      .name("ruby.wCCFirstResponseToCompleteHistogram")
+      .desc("??")
+      ;
+    m_wCCIncompleteTimes
+      .name("ruby.wCCIncompleteTimes")
+      .desc("??")
+      ;
+
+    m_dirIssueToInitialRequestHistogram
+      .init(200)
+      .name("ruby.dirIssueToInitialRequestHistogram")
+      .desc("??")
+      ;
+    m_dirInitialRequestToForwardRequestHistogram
+      .init(200)
+      .name("ruby.dirInitialRequestToForwardRequestHistogram")
+      .desc("??")
+      ;
+    m_dirForwardRequestToFirstResponseHistogram
+      .init(200)
+      .name("ruby.dirForwardRequestToFirstResponseHistogram")
+      .desc("??")
+      ;
+    m_dirFirstResponseToCompleteHistogram
+      .init(200)
+      .name("ruby.dirFirstResponseToCompleteHistogram")
+      .desc("??")
+      ;
+    m_dirIncompleteTimes
+      .name("ruby.dirIncompleteTimes")
+      .desc("??")
+      ;
+
+    m_delayedCyclesHistogram
+      .init(50)
+      .name("ruby.delayedCyclesHistogram")
+      .desc("??")
+      ;
+    m_delayedCyclesNonPFHistogram
+      .init(50)
+      .name("ruby.delayedCyclesNonPFHistogram")
+      .desc("??")
+      ;
+    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
+    m_delayedCyclesVCHistograms.resize(size);
+    for (int i = 0; i < size; i++) {
+        char name[100];
+        sprintf(name, "ruby.vc%02i.delayedCyclesHistogram", i);
+        m_delayedCyclesVCHistograms[i] = new Stats::Histogram;
+        m_delayedCyclesVCHistograms[i]
+          ->init(50)
+          .name(name)
+          .desc("??")
+          ;
+    }
+
+    m_outstanding_requests
+      .init(50)
+      .name("ruby.outstandingRequests")
+      .desc("??")
+      ;
+    m_outstanding_persistent_requests
+      .init(50)
+      .name("ruby.outstandingPersistentRequests")
+      .desc("??")
+      ;
+    m_address_profiler_ptr->registerStats();
+    if (m_all_instructions) {
+      m_inst_profiler_ptr->registerStats();
+    }
+}
+
+void
 Profiler::clearStats()
 {
     m_ruby_start = g_eventQueue_ptr->getTime();
@@ -454,76 +471,32 @@
         }
     }
 
-    m_busyControllerCount.resize(MachineType_NUM); // all machines
-    for (int i = 0; i < MachineType_NUM; i++) {
-        int size = MachineType_base_count((MachineType)i);
-        m_busyControllerCount[i].resize(size);
-        for (int j = 0; j < size; j++) {
-            m_busyControllerCount[i][j] = 0;
-        }
-    }
-    m_busyBankCount = 0;
-
-    m_delayedCyclesHistogram.clear();
-    m_delayedCyclesNonPFHistogram.clear();
+    m_delayedCyclesHistogram.reset();
+    m_delayedCyclesNonPFHistogram.reset();
     int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
-    m_delayedCyclesVCHistograms.resize(size);
     for (int i = 0; i < size; i++) {
-        m_delayedCyclesVCHistograms[i].clear();
+        m_delayedCyclesVCHistograms[i]->reset();
     }
 
-    m_missLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-        m_missLatencyHistograms[i].clear(200);
-    }
-    m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
-    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
-        m_machLatencyHistograms[i].clear(200);
-    }
-    m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
-        m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
-        for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
-            m_missMachLatencyHistograms[i][j].clear(200);
-        }
-    }
-    m_allMissLatencyHistogram.clear(200);
-    m_wCCIssueToInitialRequestHistogram.clear(200);
-    m_wCCInitialRequestToForwardRequestHistogram.clear(200);
-    m_wCCForwardRequestToFirstResponseHistogram.clear(200);
-    m_wCCFirstResponseToCompleteHistogram.clear(200);
+    m_wCCIssueToInitialRequestHistogram.reset();
+    m_wCCInitialRequestToForwardRequestHistogram.reset();
+    m_wCCForwardRequestToFirstResponseHistogram.reset();
+    m_wCCFirstResponseToCompleteHistogram.reset();
     m_wCCIncompleteTimes = 0;
-    m_dirIssueToInitialRequestHistogram.clear(200);
-    m_dirInitialRequestToForwardRequestHistogram.clear(200);
-    m_dirForwardRequestToFirstResponseHistogram.clear(200);
-    m_dirFirstResponseToCompleteHistogram.clear(200);
+    m_dirIssueToInitialRequestHistogram.reset();
+    m_dirInitialRequestToForwardRequestHistogram.reset();
+    m_dirForwardRequestToFirstResponseHistogram.reset();
+    m_dirFirstResponseToCompleteHistogram.reset();
     m_dirIncompleteTimes = 0;
 
-    m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
-    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
-        m_SWPrefetchLatencyHistograms[i].clear(200);
-    }
-    m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
-    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
-        m_SWPrefetchMachLatencyHistograms[i].clear(200);
-    }
-    m_allSWPrefetchLatencyHistogram.clear(200);
-
-    m_sequencer_requests.clear();
-    m_read_sharing_histogram.clear();
-    m_write_sharing_histogram.clear();
-    m_all_sharing_histogram.clear();
-    m_cache_to_cache = 0;
-    m_memory_to_cache = 0;
-
     // clear HashMaps
     m_requestProfileMap.clear();
 
     // count requests profiled
     m_requests = 0;
 
-    m_outstanding_requests.clear();
-    m_outstanding_persistent_requests.clear();
+    m_outstanding_requests.reset();
+    m_outstanding_persistent_requests.reset();
 
     // Flush the prefetches through the system - used so that there
     // are no outstanding requests after stats are cleared
@@ -550,39 +523,13 @@
 }
 
 void
-Profiler::profileSharing(const Address& addr, AccessType type,
-                         NodeID requestor, const Set& sharers,
-                         const Set& owner)
-{
-    Set set_contacted(owner);
-    if (type == AccessType_Write) {
-        set_contacted.addSet(sharers);
-    }
-    set_contacted.remove(requestor);
-    int number_contacted = set_contacted.count();
-
-    if (type == AccessType_Write) {
-        m_write_sharing_histogram.add(number_contacted);
-    } else {
-        m_read_sharing_histogram.add(number_contacted);
-    }
-    m_all_sharing_histogram.add(number_contacted);
-
-    if (number_contacted == 0) {
-        m_memory_to_cache++;
-    } else {
-        m_cache_to_cache++;
-    }
-}
-
-void
 Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
 {
     assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
-    m_delayedCyclesHistogram.add(delayCycles);
-    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
+    m_delayedCyclesHistogram.sample(delayCycles);
+    m_delayedCyclesVCHistograms[virtualNetwork]->sample(delayCycles);
     if (virtualNetwork != 0) {
-        m_delayedCyclesNonPFHistogram.add(delayCycles);
+        m_delayedCyclesNonPFHistogram.sample(delayCycles);
     }
 }
 
@@ -598,36 +545,6 @@
 }
 
 void
-Profiler::controllerBusy(MachineID machID)
-{
-    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
-}
-
-void
-Profiler::profilePFWait(Time waitTime)
-{
-    m_prefetchWaitHistogram.add(waitTime);
-}
-
-void
-Profiler::bankBusy()
-{
-    m_busyBankCount++;
-}
-
-// non-zero cycle demand request
-void
-Profiler::missLatency(Time cycles, 
-                      RubyRequestType type,
-                      const GenericMachineType respondingMach)
-{
-    m_allMissLatencyHistogram.add(cycles);
-    m_missLatencyHistograms[type].add(cycles);
-    m_machLatencyHistograms[respondingMach].add(cycles);
-    m_missMachLatencyHistograms[type][respondingMach].add(cycles);
-}
-
-void
 Profiler::missLatencyWcc(Time issuedTime,
                          Time initialRequestTime,
                          Time forwardRequestTime,
@@ -638,16 +555,16 @@
         (initialRequestTime <= forwardRequestTime) &&
         (forwardRequestTime <= firstResponseTime) &&
         (firstResponseTime <= completionTime)) {
-        m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
+        m_wCCIssueToInitialRequestHistogram.sample(initialRequestTime - issuedTime);
         
-        m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime - 
-                                                         initialRequestTime);
+        m_wCCInitialRequestToForwardRequestHistogram.sample(forwardRequestTime -
+                                                            initialRequestTime);
         
-        m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime - 
-                                                        forwardRequestTime);
+        m_wCCForwardRequestToFirstResponseHistogram.sample(firstResponseTime -
+                                                           forwardRequestTime);
         
-        m_wCCFirstResponseToCompleteHistogram.add(completionTime - 
-                                                  firstResponseTime);
+        m_wCCFirstResponseToCompleteHistogram.sample(completionTime -
+                                                     firstResponseTime);
     } else {
         m_wCCIncompleteTimes++;
     }
@@ -664,36 +581,21 @@
         (initialRequestTime <= forwardRequestTime) &&
         (forwardRequestTime <= firstResponseTime) &&
         (firstResponseTime <= completionTime)) {
-        m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
+        m_dirIssueToInitialRequestHistogram.sample(initialRequestTime - issuedTime);
         
-        m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime - 
+        m_dirInitialRequestToForwardRequestHistogram.sample(forwardRequestTime -
                                                          initialRequestTime);
         
-        m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime - 
+        m_dirForwardRequestToFirstResponseHistogram.sample(firstResponseTime -
                                                         forwardRequestTime);
         
-        m_dirFirstResponseToCompleteHistogram.add(completionTime - 
+        m_dirFirstResponseToCompleteHistogram.sample(completionTime -
                                                   firstResponseTime);
     } else {
         m_dirIncompleteTimes++;
     }
 }
 
-// non-zero cycle prefetch request
-void
-Profiler::swPrefetchLatency(Time cycles, 
-                            RubyRequestType type,
-                            const GenericMachineType respondingMach)
-{
-    m_allSWPrefetchLatencyHistogram.add(cycles);
-    m_SWPrefetchLatencyHistograms[type].add(cycles);
-    m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
-    if (respondingMach == GenericMachineType_Directory ||
-        respondingMach == GenericMachineType_NUM) {
-        m_SWPrefetchL2MissLatencyHistogram.add(cycles);
-    }
-}
-
 // Helper function
 static double
 process_memory_total()
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
--- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.hh
@@ -50,8 +50,6 @@
                      GenericRequestType request_type);
 void profile_outstanding_persistent_request(int outstanding);
 void profile_outstanding_request(int outstanding);
-void profile_sharing(const Address& addr, AccessType type, NodeID requestor,
-                     const Set& sharers, const Set& owner);
 void profile_request(const std::string& L1CacheStateStr,
                      const std::string& L2CacheStateStr,
                      const std::string& directoryStateStr,
@@ -61,7 +59,6 @@
 void profile_token_retry(const Address& addr, AccessType type, int count);
 void profile_filter_action(int action);
 void profile_persistent_prediction(const Address& addr, AccessType type);
-void profile_average_latency_estimate(int latency);
 void profileMsgDelay(int virtualNetwork, int delayCycles);
 
 void profile_multicast_retry(const Address& addr, int count);
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
--- a/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Profiler_interface.cc
@@ -64,20 +64,6 @@
 }
 
 void
-profile_average_latency_estimate(int latency)
-{
-    g_system_ptr->getProfiler()->profileAverageLatencyEstimate(latency);
-}
-
-void
-profile_sharing(const Address& addr, AccessType type, NodeID requestor,
-                const Set& sharers, const Set& owner)
-{
-    g_system_ptr->getProfiler()->
-        profileSharing(addr, type, requestor, sharers, owner);
-}
-
-void
 profileMsgDelay(int virtualNetwork, int delayCycles)
 {
     g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh
--- a/src/mem/ruby/system/CacheMemory.hh
+++ b/src/mem/ruby/system/CacheMemory.hh
@@ -129,6 +129,7 @@
     void print(std::ostream& out) const;
     void printData(std::ostream& out) const;
 
+    void regStats() { m_profiler_ptr->regStats(); }
     void clearStats() const;
     void printStats(std::ostream& out) const;
 
diff --git a/src/mem/ruby/system/MemoryControl.hh b/src/mem/ruby/system/MemoryControl.hh
--- a/src/mem/ruby/system/MemoryControl.hh
+++ b/src/mem/ruby/system/MemoryControl.hh
@@ -84,6 +84,7 @@
 
     void printConfig(std::ostream& out);
     void print(std::ostream& out) const;
+    void regStats() { m_profiler_ptr->regStats(); }
     void clearStats() const;
     void printStats(std::ostream& out) const;
 
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -32,6 +32,7 @@
 #include <iostream>
 
 #include "base/hashmap.hh"
+#include "base/statistics.hh"
 #include "mem/protocol/GenericMachineType.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/RubyAccessMode.hh"
@@ -105,6 +106,7 @@
     bool empty() const;
 
     void print(std::ostream& out) const;
+    void regStats();
     void printStats(std::ostream& out) const;
     void checkCoherence(const Address& address);
 
@@ -147,10 +149,10 @@
     int m_outstanding_count;
     bool m_deadlock_check_scheduled;
 
-    int m_store_waiting_on_load_cycles;
-    int m_store_waiting_on_store_cycles;
-    int m_load_waiting_on_store_cycles;
-    int m_load_waiting_on_load_cycles;
+    Stats::Scalar m_store_waiting_on_load_cycles;
+    Stats::Scalar m_store_waiting_on_store_cycles;
+    Stats::Scalar m_load_waiting_on_store_cycles;
+    Stats::Scalar m_load_waiting_on_load_cycles;
 
     bool m_usingNetworkTester;
 
@@ -166,6 +168,13 @@
     };
 
     SequencerWakeupEvent deadlockCheckEvent;
+
+    // Statistics
+    Stats::Histogram m_requests_hist;
+    std::vector< Stats::Histogram* > m_miss_latency_by_request_type_hist;
+    std::vector< Stats::Histogram* > m_miss_latency_by_mach_type_hist;
+    std::vector< std::vector< Stats::Histogram* > > m_miss_latency_by_request_and_mach_type_hist;
+    Stats::Histogram m_miss_latency_hist;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -138,17 +138,76 @@
 }
 
 void
+Sequencer::regStats()
+{
+  stringstream name;
+  name << "ruby.sequencer" << m_version;
+  m_requests_hist
+    .init(50)
+    .name(name.str() + ".outstandingRequests")
+    .desc("Histogram of the number of outstanding requests per cycle")
+    ;
+
+  m_miss_latency_by_request_type_hist.resize(RubyRequestType_NUM);
+  for (int i = 0; i < m_miss_latency_by_request_type_hist.size(); i++) {
+    m_miss_latency_by_request_type_hist[i] = new Stats::Histogram;
+    m_miss_latency_by_request_type_hist[i]
+      ->init(200)
+      .name(name.str() + "." + RubyRequestType_to_string((RubyRequestType)i) + ".reqMissLatency")
+      .desc("Histogram of miss latencies for this request type")
+      ;
+  }
+  m_miss_latency_by_mach_type_hist.resize(GenericMachineType_NUM);
+  for (int i = 0; i < m_miss_latency_by_mach_type_hist.size(); i++) {
+    m_miss_latency_by_mach_type_hist[i] = new Stats::Histogram;
+    m_miss_latency_by_mach_type_hist[i]
+      ->init(200)
+      .name(name.str() + "." + GenericMachineType_to_string((GenericMachineType)i) + ".machMissLatency")
+      .desc("Histogram of miss latencies for this machine type")
+      ;
+  }
+  m_miss_latency_by_request_and_mach_type_hist.resize(RubyRequestType_NUM);
+  for (int i = 0; i < m_miss_latency_by_request_and_mach_type_hist.size(); i++) {
+    m_miss_latency_by_request_and_mach_type_hist[i].resize(GenericMachineType_NUM);
+    for (int j = 0; j < m_miss_latency_by_request_and_mach_type_hist[i].size(); j++) {
+      m_miss_latency_by_request_and_mach_type_hist[i][j] = new Stats::Histogram;
+
+      m_miss_latency_by_request_and_mach_type_hist[i][j]
+        ->init(200)
+        .name(name.str() +
+              "." + GenericMachineType_to_string((GenericMachineType)j) +
+              "." + RubyRequestType_to_string((RubyRequestType)i) +
+              ".missLatency")
+        .desc("Histogram of miss latencies")
+        ;
+    }
+  }
+  m_miss_latency_hist
+    .init(200)
+    .name(name.str() + ".missLatency")
+    .desc("Histogram of miss latencies")
+    ;
+  m_store_waiting_on_load_cycles
+    .name(name.str() + ".store_waiting_on_load_cycles")
+    .desc("Number of cycles a store spent waiting on a load")
+    ;
+  m_store_waiting_on_store_cycles
+    .name(name.str() + ".store_waiting_on_store_cycles")
+    .desc("Number of cycles a store spent waiting on a store")
+    ;
+  m_load_waiting_on_load_cycles
+    .name(name.str() + ".load_waiting_on_load_cycles")
+    .desc("Number of cycles a load spent waiting on a load")
+    ;
+  m_load_waiting_on_store_cycles
+    .name(name.str() + ".load_waiting_on_store_cycles")
+    .desc("Number of cycles a load spent waiting on a store")
+    ;
+}
+
+void
 Sequencer::printStats(ostream & out) const
 {
-    out << "Sequencer: " << m_name << endl
-        << "  store_waiting_on_load_cycles: "
-        << m_store_waiting_on_load_cycles << endl
-        << "  store_waiting_on_store_cycles: "
-        << m_store_waiting_on_store_cycles << endl
-        << "  load_waiting_on_load_cycles: "
-        << m_load_waiting_on_load_cycles << endl
-        << "  load_waiting_on_store_cycles: "
-        << m_load_waiting_on_store_cycles << endl;
 }
 
 void
@@ -267,7 +326,7 @@
         m_outstanding_count++;
     }
 
-    g_system_ptr->getProfiler()->sequencerRequests(m_outstanding_count);
+    m_requests_hist.sample(m_outstanding_count);
 
     total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
     assert(m_outstanding_count == total_outstanding);
@@ -482,7 +541,11 @@
 
     // Profile the miss latency for all non-zero demand misses
     if (miss_latency != 0) {
-        g_system_ptr->getProfiler()->missLatency(miss_latency, type, mach);
+
+      m_miss_latency_by_request_type_hist[type]->sample(miss_latency);
+      m_miss_latency_by_mach_type_hist[mach]->sample(miss_latency);
+      m_miss_latency_by_request_and_mach_type_hist[type][mach]->sample(miss_latency);
+      m_miss_latency_hist.sample(miss_latency);
 
         if (mach == GenericMachineType_L1Cache_wCC) {
             g_system_ptr->getProfiler()->missLatencyWcc(issued_time,
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -97,6 +97,7 @@
 void
 RubySystem::init()
 {
+    m_profiler_ptr->registerStats();
     m_profiler_ptr->clearStats();
 }
 
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -222,6 +222,7 @@
 #include <sstream>
 #include <string>
 
+#include "base/statistics.hh"
 #include "mem/protocol/${ident}_ProfileDumper.hh"
 #include "mem/protocol/${ident}_Profiler.hh"
 #include "mem/protocol/TransitionResult.hh"
@@ -252,6 +253,7 @@
     $c_ident(const Params *p);
     static int getNumControllers();
     void init();
+    void regStats();
     MessageBuffer* getMandatoryQueue() const;
     const int & getVersion() const;
     const std::string toString() const;
@@ -334,6 +336,10 @@
 ${ident}_Profiler m_profiler;
 static int m_num_controllers;
 
+// Statistics
+
+Stats::Scalar m_busy_count;
+
 // Internal functions
 ''')
 
@@ -678,6 +684,20 @@
             mq_ident = "NULL"
 
         code('''
+void
+$c_ident::regStats()
+{
+    stringstream name;
+    m_profiler.regStats();
+
+    name << "ruby.${c_ident}";
+    name << m_version << ".busy_count";
+    m_busy_count
+      .name(name.str())
+      .desc("The number of cycles this controller was busy")
+      ;
+}
+
 int
 $c_ident::getNumControllers()
 {
@@ -845,6 +865,7 @@
 
         code('''
     m_profiler.clearStats();
+    m_busy_count = 0;
 }
 ''')
 
@@ -990,7 +1011,7 @@
         assert(counter <= m_transitions_per_cycle);
         if (counter == m_transitions_per_cycle) {
             // Count how often we are fully utilized
-            g_system_ptr->getProfiler()->controllerBusy(m_machineID);
+            m_busy_count++;
 
             // Wakeup in another cycle and try again
             g_eventQueue_ptr->scheduleEvent(this, 1);
@@ -1360,6 +1381,7 @@
 #include <cassert>
 #include <iostream>
 
+#include "base/statistics.hh"
 #include "mem/protocol/${ident}_Event.hh"
 #include "mem/protocol/${ident}_State.hh"
 #include "mem/ruby/common/Global.hh"
@@ -1375,10 +1397,11 @@
     bool isPossible(${ident}_State state, ${ident}_Event event);
     uint64 getTransitionCount(${ident}_State state, ${ident}_Event event);
     void clearStats();
+    void regStats();
 
   private:
-    int m_counters[${ident}_State_NUM][${ident}_Event_NUM];
-    int m_event_counters[${ident}_Event_NUM];
+    Stats::Scalar *m_counters[${ident}_State_NUM][${ident}_Event_NUM];
+    Stats::Scalar m_event_counters[${ident}_Event_NUM];
     bool m_possible[${ident}_State_NUM][${ident}_Event_NUM];
     int m_version;
 };
@@ -1397,6 +1420,7 @@
 
 #include <cassert>
 
+#include <sstream>
 #include "mem/protocol/${ident}_Profiler.hh"
 
 ${ident}_Profiler::${ident}_Profiler()
@@ -1404,7 +1428,6 @@
     for (int state = 0; state < ${ident}_State_NUM; state++) {
         for (int event = 0; event < ${ident}_Event_NUM; event++) {
             m_possible[state][event] = false;
-            m_counters[state][event] = 0;
         }
     }
     for (int event = 0; event < ${ident}_Event_NUM; event++) {
@@ -1419,11 +1442,39 @@
 }
 
 void
+${ident}_Profiler::regStats()
+{
+    for (int event=0; event < ${ident}_Event_NUM; event++) {
+      std::stringstream name;
+      name << "ruby.${ident}_Controller" << m_version << "." << ${ident}_Event_to_string((${ident}_Event)event);
+      m_event_counters[event]
+        .name(name.str().c_str())
+        .desc("Number of events that occurred");
+    }
+
+    for (int state = 0; state < ${ident}_State_NUM; state++) {
+       for (int event = 0; event < ${ident}_Event_NUM; event++) {
+            if (m_possible[state][event]) {
+              m_counters[state][event] = new Stats::Scalar;
+              std::stringstream name;
+              name << "ruby.${ident}_Controller" << m_version << ".";
+              name << ${ident}_State_to_string((${ident}_State) state) << "_";
+              name << ${ident}_Event_to_string((${ident}_Event) event);
+              m_counters[state][event]
+                ->name(name.str().c_str())
+                .desc("Number of transitions that occurred");
+            }
+        }
+    }
+}
+
+void
 ${ident}_Profiler::clearStats()
 {
     for (int state = 0; state < ${ident}_State_NUM; state++) {
         for (int event = 0; event < ${ident}_Event_NUM; event++) {
-            m_counters[state][event] = 0;
+            if (m_possible[state][event])
+               (*(m_counters[state][event])) = 0;
         }
     }
 
@@ -1435,7 +1486,7 @@
 ${ident}_Profiler::countTransition(${ident}_State state, ${ident}_Event event)
 {
     assert(m_possible[state][event]);
-    m_counters[state][event]++;
+    (*(m_counters[state][event]))++;
     m_event_counters[event]++;
 }
 void
@@ -1448,7 +1499,7 @@
 uint64
 ${ident}_Profiler::getEventCount(${ident}_Event event)
 {
-    return m_event_counters[event];
+    return m_event_counters[event].value();
 }
 
 bool
@@ -1461,7 +1512,7 @@
 ${ident}_Profiler::getTransitionCount(${ident}_State state,
                                       ${ident}_Event event)
 {
-    return m_counters[state][event];
+    return m_counters[state][event]->value();
 }
 
 ''')