Blame benchtests/scripts/compare_bench.py

Packit Service 3ab402
#!/usr/bin/python3
Packit 6c4009
# Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit 6c4009
# This file is part of the GNU C Library.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
# modify it under the terms of the GNU Lesser General Public
Packit 6c4009
# License as published by the Free Software Foundation; either
Packit 6c4009
# version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
# Lesser General Public License for more details.
Packit 6c4009
#
Packit 6c4009
# You should have received a copy of the GNU Lesser General Public
Packit 6c4009
# License along with the GNU C Library; if not, see
Packit 6c4009
# <http://www.gnu.org/licenses/>.
Packit 6c4009
"""Compare two benchmark results
Packit 6c4009
Packit 6c4009
Given two benchmark result files and a threshold, this script compares the
Packit 6c4009
benchmark results and flags differences in performance beyond a given
Packit 6c4009
threshold.
Packit 6c4009
"""
Packit 6c4009
import sys
Packit 6c4009
import os
Packit 6c4009
import pylab
Packit 6c4009
import import_bench as bench
Packit 6c4009
import argparse
Packit 6c4009
Packit 6c4009
def do_compare(func, var, tl1, tl2, par, threshold):
Packit 6c4009
    """Compare one of the aggregate measurements
Packit 6c4009
Packit 6c4009
    Helper function to compare one of the aggregate measurements of a function
Packit 6c4009
    variant.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        func: Function name
Packit 6c4009
        var: Function variant name
Packit 6c4009
        tl1: The first timings list
Packit 6c4009
        tl2: The second timings list
Packit 6c4009
        par: The aggregate to measure
Packit 6c4009
        threshold: The threshold for differences, beyond which the script should
Packit 6c4009
        print a warning.
Packit 6c4009
    """
Packit 6c4009
    d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)]
Packit 6c4009
    if d > threshold:
Packit 6c4009
        if tl1[par] > tl2[par]:
Packit 6c4009
            ind = '+++'
Packit 6c4009
        else:
Packit 6c4009
            ind = '---'
Packit 6c4009
        print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
Packit 6c4009
                (ind, func, var, par, d, tl1[par], tl2[par]))
Packit 6c4009
Packit 6c4009
Packit 6c4009
def compare_runs(pts1, pts2, threshold):
Packit 6c4009
    """Compare two benchmark runs
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        pts1: Timing data from first machine
Packit 6c4009
        pts2: Timing data from second machine
Packit 6c4009
    """
Packit 6c4009
Packit 6c4009
    # XXX We assume that the two benchmarks have identical functions and
Packit 6c4009
    # variants.  We cannot compare two benchmarks that may have different
Packit 6c4009
    # functions or variants.  Maybe that is something for the future.
Packit 6c4009
    for func in pts1['functions'].keys():
Packit 6c4009
        for var in pts1['functions'][func].keys():
Packit 6c4009
            tl1 = pts1['functions'][func][var]
Packit 6c4009
            tl2 = pts2['functions'][func][var]
Packit 6c4009
Packit 6c4009
            # Compare the consolidated numbers
Packit 6c4009
            # do_compare(func, var, tl1, tl2, 'max', threshold)
Packit 6c4009
            do_compare(func, var, tl1, tl2, 'min', threshold)
Packit 6c4009
            do_compare(func, var, tl1, tl2, 'mean', threshold)
Packit 6c4009
Packit 6c4009
            # Skip over to the next variant or function if there is no detailed
Packit 6c4009
            # timing info for the function variant.
Packit 6c4009
            if 'timings' not in pts1['functions'][func][var].keys() or \
Packit 6c4009
                'timings' not in pts2['functions'][func][var].keys():
Packit 6c4009
                    return
Packit 6c4009
Packit 6c4009
            # If two lists do not have the same length then it is likely that
Packit 6c4009
            # the performance characteristics of the function have changed.
Packit 6c4009
            # XXX: It is also likely that there was some measurement that
Packit 6c4009
            # strayed outside the usual range.  Such ouiers should not
Packit 6c4009
            # happen on an idle machine with identical hardware and
Packit 6c4009
            # configuration, but ideal environments are hard to come by.
Packit 6c4009
            if len(tl1['timings']) != len(tl2['timings']):
Packit 6c4009
                print('* %s(%s): Timing characteristics changed' %
Packit 6c4009
                        (func, var))
Packit 6c4009
                print('\tBefore: [%s]' %
Packit 6c4009
                        ', '.join([str(x) for x in tl1['timings']]))
Packit 6c4009
                print('\tAfter: [%s]' %
Packit 6c4009
                        ', '.join([str(x) for x in tl2['timings']]))
Packit 6c4009
                continue
Packit 6c4009
Packit 6c4009
            # Collect numbers whose differences cross the threshold we have
Packit 6c4009
            # set.
Packit 6c4009
            issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
Packit 6c4009
                        if abs(y - x) * 100 / x > threshold]
Packit 6c4009
Packit 6c4009
            # Now print them.
Packit 6c4009
            for t1, t2 in issues:
Packit 6c4009
                d = abs(t2 - t1) * 100 / t1
Packit 6c4009
                if t2 > t1:
Packit 6c4009
                    ind = '-'
Packit 6c4009
                else:
Packit 6c4009
                    ind = '+'
Packit 6c4009
Packit 6c4009
                print("%s %s(%s): (%.2lf%%) from %g to %g" %
Packit 6c4009
                        (ind, func, var, d, t1, t2))
Packit 6c4009
Packit 6c4009
Packit 6c4009
def plot_graphs(bench1, bench2):
Packit 6c4009
    """Plot graphs for functions
Packit 6c4009
Packit 6c4009
    Make scatter plots for the functions and their variants.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        bench1: Set of points from the first machine
Packit 6c4009
        bench2: Set of points from the second machine.
Packit 6c4009
    """
Packit 6c4009
    for func in bench1['functions'].keys():
Packit 6c4009
        for var in bench1['functions'][func].keys():
Packit 6c4009
            # No point trying to print a graph if there are no detailed
Packit 6c4009
            # timings.
Packit 6c4009
            if u'timings' not in bench1['functions'][func][var].keys():
Packit 6c4009
                print('Skipping graph for %s(%s)' % (func, var))
Packit 6c4009
                continue
Packit 6c4009
Packit 6c4009
            pylab.clf()
Packit 6c4009
            pylab.ylabel('Time (cycles)')
Packit 6c4009
Packit 6c4009
            # First set of points
Packit 6c4009
            length = len(bench1['functions'][func][var]['timings'])
Packit 6c4009
            X = [float(x) for x in range(length)]
Packit 6c4009
            lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
Packit 6c4009
                    1.5 + 100 / length)
Packit 6c4009
            pylab.setp(lines, 'color', 'r')
Packit 6c4009
Packit 6c4009
            # Second set of points
Packit 6c4009
            length = len(bench2['functions'][func][var]['timings'])
Packit 6c4009
            X = [float(x) for x in range(length)]
Packit 6c4009
            lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
Packit 6c4009
                    1.5 + 100 / length)
Packit 6c4009
            pylab.setp(lines, 'color', 'g')
Packit 6c4009
Packit 6c4009
            if var:
Packit 6c4009
                filename = "%s-%s.png" % (func, var)
Packit 6c4009
            else:
Packit 6c4009
                filename = "%s.png" % func
Packit 6c4009
            print('Writing out %s' % filename)
Packit 6c4009
            pylab.savefig(filename)
Packit 6c4009
Packit 6c4009
def main(bench1, bench2, schema, threshold):
Packit 6c4009
    bench1 = bench.parse_bench(bench1, schema)
Packit 6c4009
    bench2 = bench.parse_bench(bench2, schema)
Packit 6c4009
Packit 6c4009
    plot_graphs(bench1, bench2)
Packit 6c4009
Packit 6c4009
    bench.compress_timings(bench1)
Packit 6c4009
    bench.compress_timings(bench2)
Packit 6c4009
Packit 6c4009
    compare_runs(bench1, bench2, threshold)
Packit 6c4009
Packit 6c4009
Packit 6c4009
if __name__ == '__main__':
Packit 6c4009
    parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')
Packit 6c4009
Packit 6c4009
    # Required parameters
Packit 6c4009
    parser.add_argument('bench1', help='First bench to compare')
Packit 6c4009
    parser.add_argument('bench2', help='Second bench to compare')
Packit 6c4009
Packit 6c4009
    # Optional parameters
Packit 6c4009
    parser.add_argument('--schema',
Packit 6c4009
                        default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
Packit 6c4009
                        help='JSON file to validate source/dest files (default: %(default)s)')
Packit 6c4009
    parser.add_argument('--threshold', default=10.0, help='Only print those with equal or higher threshold (default: %(default)s)')
Packit 6c4009
Packit 6c4009
    args = parser.parse_args()
Packit 6c4009
Packit 6c4009
    main(args.bench1, args.bench2, args.schema, args.threshold)