diff -r f91706cade05 -r a625622cd8a1 src/python/m5/stats/__init__.py --- a/src/python/m5/stats/__init__.py Tue Jan 29 10:39:43 2013 +0000 +++ b/src/python/m5/stats/__init__.py Tue Jan 29 10:40:02 2013 +0000 @@ -175,6 +175,22 @@ return d +## @brief Convert a sparse histogram from the C++ based map to a python object +# @param data The data from C++. +# @return A python object (sparse_hist) which stores the data. +def convert_sparse_hist(data): + from m5.stats.info import SparseHist + sparse_hist = attrdict() + + # The sampled data is stored as a C++ map. Convert it to a python dict. + sparse_hist.storage = {} + for key in data.cmap.keys(): + sparse_hist.storage[key] = data.cmap[key] + + sparse_hist.samples = data.samples + + return sparse_hist + ## @brief Convert a stat to a python stat. # @param stat The input statistic. def convert_value(stat): @@ -189,6 +205,8 @@ return [ convert_dist(d) for d in stat.data ] if stat_type == 'Vector2dInfo': return [ v for v in stat.cvec ] + if stat_type == 'SparseHistInfo': + return convert_sparse_hist(stat.data) return None @@ -232,6 +250,7 @@ 'VectorDistInfo' : info.VectorDist, 'Vector2dInfo' : info.Vector2d, 'FormulaInfo' : info.Formula, + 'SparseHistInfo' : info.SparseHist, } ids = {} diff -r f91706cade05 -r a625622cd8a1 src/python/m5/stats/display.py --- a/src/python/m5/stats/display.py Tue Jan 29 10:39:43 2013 +0000 +++ b/src/python/m5/stats/display.py Tue Jan 29 10:40:02 2013 +0000 @@ -359,6 +359,27 @@ p.value = total p.display(out) +## @brief Display a sparse histogram. +# Begin by printing the number of samples. This is followed by printing the +# distribution itself. +class SparseHistDisplay(DisplayBase): + ## @brief Print the data. + # @param The output to which to print the data, + def display(self, out): + v = self.value + + p = Display(self) + + p.name = "%s::samples" % self.name + p.value = v.samples + p.display(out) + + # Sort the dictionary by key to ensure that it is in the right order. + for k, val in sorted(v.storage.iteritems(), key=lambda(k): (k)): + p.name = "%s::%d" % (self.name, k) + p.value = val + p.display(out) + # register_display is used to link the display function to the type of # statistic. register_display = registry() @@ -409,6 +430,14 @@ p = DistDisplay(stat) p.display(out) +@register_display(SparseHist) +## @brief Function to display a sparse histogram. +# @param stat The stat to output. +# @param out The output to which the stats should be printed. +def statistic_display(stat, out): + p = SparseHistDisplay(stat) + p.display(out) + @register_display(VectorDist) ## @brief Function to display a vector distribution. # @param stat The stat to output. diff -r f91706cade05 -r a625622cd8a1 src/python/m5/stats/info.py --- a/src/python/m5/stats/info.py Tue Jan 29 10:39:43 2013 +0000 +++ b/src/python/m5/stats/info.py Tue Jan 29 10:40:02 2013 +0000 @@ -990,6 +990,96 @@ self.value /= other return self +## @brief Class to store a Sparse Histogram +class SparseHist(Statistic): + ## @brief Clear the data stored + def clear(self): + self.samples = 0 + self.storage = {} + + ## @brief Copy the sparse histogram. This determines the type of object, + # calls its constructor and copies the number of samples and the stored + # data into the new object. + # @return A copy of the sparse histogram. + def copy(self): + cls = type(self) + copy = cls() + copy.samples = self.samples + copy.storage = self.storage + return copy + + ## @brief Determine if it is possible to compare this histogram to another + # This is accomplished by checking that they are both of the same type. + # @param other The object we are comparing to ourself. + # @return True if both hisograms are of the same type, False otherwise. + def comparable(self, other): + return type(self) == type(other) + + ## @brief Check that we are non-zero by determining if any samples have been + # stored. + # @return True if we have samples, False otherwise. + def __nonzero__(self): + return bool(self.samples) + + ## @brief Determine if this histogram is equal to another histogram. + # Begin by checking if the two histograms are comparable, then check that + # the number of samples and the stored data are equal. + # @return True if they are equal, False otherwise. + def __eq__(self, other): + if not self.comparable(other): + return False + + return (self.samples == other.samples and + self.storage == other.storage) + + ## @brief Add two sparse histograms. + # Check that they are comparable, and then add the samples, the sum and + # the squares. + # @param other The other sparse histogram. + def __iadd__(self, other): + assert self.comparable(other) + + self.samples += other.samples + + # Add the two dicts + # Get all elements in other.storage, see if they exist in self.storage + # and add them, storing them in self.storage. + for i in other.storage: + self.storage[i] = self.storage.get(i, 0) + other.storage[i] + + return self + + ## @brief Subtract one histogram from another. Check that they are + # comparable, and then subtract the samples, the sum and the squares. + # @param other The other distribution. + def __sub__(self, other): + assert self.comparable(other) + + self.samples -= other.samples + + # Get all elements in other.storage, see if they exist in self.storage + # and subtract them, storing them in self.storage. + for i in other.storage: + self.storage[i] = self.storage.get(i, 0) - other.storage[i] + + return self + + ## @brief Divide one historgam by another. Check that they are + # comparable, and then divide the samples, the sum and the squares by their + # counterparts. + # @param other The other distribution. + def __itruediv__(self, other): + if not other: + return self + self.samples /= other + + for i in self.storage: + self.storage[i] /= other + + return self + + __all__ = [ 'unproxy', 'scalar', 'vector', 'value', 'values', 'total', 'len', 'Scalar', 'Vector', 'Formula', 'Deviation', 'Distribution', - 'Histogram', 'Dist', 'VectorDist', 'Vector2d' ] + 'Histogram', 'Dist', 'VectorDist', 'Vector2d', 'Statistic', + 'SparseHist' ] diff -r f91706cade05 -r a625622cd8a1 src/python/m5/stats/sql.py --- a/src/python/m5/stats/sql.py Tue Jan 29 10:39:43 2013 +0000 +++ b/src/python/m5/stats/sql.py Tue Jan 29 10:40:02 2013 +0000 @@ -119,6 +119,15 @@ Column('overflow', Float), ) + # Stores sparse histograms. + sparse_hist_table = Table('sparseHistValue', metadata, + Column('id', Integer), + Column('dump', Integer), + Column('samples', Integer), + Column('elements', Binary), + Column('count', Binary), + ) + metadata.create_all() @@ -254,6 +263,35 @@ self.samples, self.min, self.max, self.bucket, self.vector, self.min_val, self.max_val, self.underflow, self.overflow) +# @brief Class used to insert sparse histograms into the database. +class SparseHistValueClass(Base): + __tablename__ = 'sparseHistValue' + + id = Column(Integer, primary_key = True) + dump = Column(Integer) + samples = Column(Float) + elements = Column(Binary) + count = Column(Binary) + + def __init__(self, id, dump, samples, storage): + self.id = id + self.dump = dump + self.samples = samples + + import array + + hist_elements = storage.keys() + element_array = array.array('f', hist_elements) + self.elements = element_array.tostring() + + hist_count = storage.values() + count_array = array.array('i', hist_count) + self.count = count_array.tostring() + + def __repr__(self): + return "" % (self.id, self.dump, + self.samples, self.elements, self.values) + # @brief Add the information about a stat. # @param name The name of the stat. @@ -308,5 +346,11 @@ session.add(temp) + elif stat.type == "SparseHistInfo": + data = stat.value + temp = SparseHistValueClass(id = stat.id, dump = dumpCount, + samples = data.samples, storage = data.storage) + session.add(temp) + else: - panic("Unable to output stat %s. Unsupported stat type!", name) + panic("Unable to output stat %s. Unsupported stat type!", stat.name) diff -r f91706cade05 -r a625622cd8a1 src/python/swig/stats.i --- a/src/python/swig/stats.i Tue Jan 29 10:39:43 2013 +0000 +++ b/src/python/swig/stats.i Tue Jan 29 10:40:02 2013 +0000 @@ -126,11 +126,14 @@ %include "base/stats/info.hh" +%include std_map.i + namespace std { %template(list_info) list; %template(vector_double) vector; %template(vector_string) vector; %template(vector_DistData) vector; +%template(map_double_int) map; } namespace Stats { diff -r f91706cade05 -r a625622cd8a1 util/stats/extractSQLStats.py --- a/util/stats/extractSQLStats.py Tue Jan 29 10:39:43 2013 +0000 +++ b/util/stats/extractSQLStats.py Tue Jan 29 10:40:02 2013 +0000 @@ -123,6 +123,9 @@ # Stores distributions. distValueTable = Table('distValue', metadata, autoload=True) + + # Stores sparse histograms. + sparseHistValueTable = Table('sparseHistValue', metadata, autoload=True) except: print "The input DB file is not a valid gem5 stats dump! Exiting..." sys.exit(1) @@ -211,6 +214,48 @@ else: print result.name + ":", ",".join(str(v) for v in temp) + elif item.type =="SparseHistInfo": + # For stat called item.name, get values and keys for dump + # options.dump. Need to join the statsTable and the + # sparseHistValueTable based on the id of the stat, then filter by + # name and index of the dump. + for result in (session.query(statsTable.join(sparseHistValueTable, + statsTable.c.id==sparseHistValueTable.c.id)) + .filter(statsTable.c.id == sparseHistValueTable.c.id) + .filter(sparseHistValueTable.c.dump == options.dump) + .filter(statsTable.c.name == item.name) + .all()): + + # The values and keys are stored in binary, and therefore need + # to be extracted. + import array + + elements_array = array.array('f') + elements_array.fromstring(result.elements) + + count_array = array.array('f') + count_array.fromstring(result.count) + + if not len(elements_array) == len(count_array): + print "Unable to retrive the sparse histogram from the" + \ + "database. Exiting..." + sys.exit(1) + + data = {} + for i in range(len(elements_array)): + data[elements_array[i]] = count_array[i] + + # If the user has specificed an output file, write to that. + # Otherwise, write to the screen. + if options.output: + outputFile.write(result.name + ":\n") + for k, val in sorted(data.iteritems(), key=lambda(k): (k)): + outputFile.write(str(k) + ':' + str(val) + ',') + else: + print result.name + ":" + for k, val in sorted(data.iteritems(), key=lambda(k): (k)): + print str(k) + ':' + str(val) + ',' + else: print "Unhandled type: " + item.type sys.exit(1)