Blame benchtests/scripts/import_bench.py

Packit Service 1860ba
#!/usr/bin/python3
Packit 6c4009
# Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit 6c4009
# This file is part of the GNU C Library.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
# modify it under the terms of the GNU Lesser General Public
Packit 6c4009
# License as published by the Free Software Foundation; either
Packit 6c4009
# version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
#
Packit 6c4009
# The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
# but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
# Lesser General Public License for more details.
Packit 6c4009
#
Packit 6c4009
# You should have received a copy of the GNU Lesser General Public
Packit 6c4009
# License along with the GNU C Library; if not, see
Packit 6c4009
# <http://www.gnu.org/licenses/>.
Packit 6c4009
"""Functions to import benchmark data and process it"""
Packit 6c4009
Packit 6c4009
import json
Packit 6c4009
try:
Packit 6c4009
    import jsonschema as validator
Packit 6c4009
except ImportError:
Packit 6c4009
    print('Could not find jsonschema module.')
Packit 6c4009
    raise
Packit 6c4009
Packit 6c4009
Packit 6c4009
def mean(lst):
Packit 6c4009
    """Compute and return mean of numbers in a list
Packit 6c4009
Packit 6c4009
    The numpy average function has horrible performance, so implement our
Packit 6c4009
    own mean function.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        lst: The list of numbers to average.
Packit 6c4009
    Return:
Packit 6c4009
        The mean of members in the list.
Packit 6c4009
    """
Packit 6c4009
    return sum(lst) / len(lst)
Packit 6c4009
Packit 6c4009
Packit 6c4009
def split_list(bench, func, var):
Packit 6c4009
    """ Split the list into a smaller set of more distinct points
Packit 6c4009
Packit 6c4009
    Group together points such that the difference between the smallest
Packit 6c4009
    point and the mean is less than 1/3rd of the mean.  This means that
Packit 6c4009
    the mean is at most 1.5x the smallest member of that group.
Packit 6c4009
Packit 6c4009
    mean - xmin < mean / 3
Packit 6c4009
    i.e. 2 * mean / 3 < xmin
Packit 6c4009
    i.e. mean < 3 * xmin / 2
Packit 6c4009
Packit 6c4009
    For an evenly distributed group, the largest member will be less than
Packit 6c4009
    twice the smallest member of the group.
Packit 6c4009
    Derivation:
Packit 6c4009
Packit 6c4009
    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
Packit 6c4009
Packit 6c4009
    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
Packit 6c4009
    and max element is xmin + (n - 1) * d
Packit 6c4009
Packit 6c4009
    Now, mean < 3 * xmin / 2
Packit 6c4009
Packit 6c4009
    3 * xmin > 2 * mean
Packit 6c4009
    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
Packit 6c4009
    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
Packit 6c4009
    n * xmin > n * (n - 1) * d
Packit 6c4009
    xmin > (n - 1) * d
Packit 6c4009
    2 * xmin > xmin + (n-1) * d
Packit 6c4009
    2 * xmin > xmax
Packit 6c4009
Packit 6c4009
    Hence, proved.
Packit 6c4009
Packit 6c4009
    Similarly, it is trivial to prove that for a similar aggregation by using
Packit 6c4009
    the maximum element, the maximum element in the group must be at most 4/3
Packit 6c4009
    times the mean.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        bench: The benchmark object
Packit 6c4009
        func: The function name
Packit 6c4009
        var: The function variant name
Packit 6c4009
    """
Packit 6c4009
    means = []
Packit 6c4009
    lst = bench['functions'][func][var]['timings']
Packit 6c4009
    last = len(lst) - 1
Packit 6c4009
    while lst:
Packit 6c4009
        for i in range(last + 1):
Packit 6c4009
            avg = mean(lst[i:])
Packit 6c4009
            if avg > 0.75 * lst[last]:
Packit 6c4009
                means.insert(0, avg)
Packit 6c4009
                lst = lst[:i]
Packit 6c4009
                last = i - 1
Packit 6c4009
                break
Packit 6c4009
    bench['functions'][func][var]['timings'] = means
Packit 6c4009
Packit 6c4009
Packit 6c4009
def do_for_all_timings(bench, callback):
Packit 6c4009
    """Call a function for all timing objects for each function and its
Packit 6c4009
    variants.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        bench: The benchmark object
Packit 6c4009
        callback: The callback function
Packit 6c4009
    """
Packit 6c4009
    for func in bench['functions'].keys():
Packit 6c4009
        for k in bench['functions'][func].keys():
Packit 6c4009
            if 'timings' not in bench['functions'][func][k].keys():
Packit 6c4009
                continue
Packit 6c4009
Packit 6c4009
            callback(bench, func, k)
Packit 6c4009
Packit 6c4009
Packit 6c4009
def compress_timings(points):
Packit 6c4009
    """Club points with close enough values into a single mean value
Packit 6c4009
Packit 6c4009
    See split_list for details on how the clubbing is done.
Packit 6c4009
Packit 6c4009
    Args:
Packit 6c4009
        points: The set of points.
Packit 6c4009
    """
Packit 6c4009
    do_for_all_timings(points, split_list)
Packit 6c4009
Packit 6c4009
Packit 6c4009
def parse_bench(filename, schema_filename):
Packit 6c4009
    """Parse the input file
Packit 6c4009
Packit 6c4009
    Parse and validate the json file containing the benchmark outputs.  Return
Packit 6c4009
    the resulting object.
Packit 6c4009
    Args:
Packit 6c4009
        filename: Name of the benchmark output file.
Packit 6c4009
    Return:
Packit 6c4009
        The bench dictionary.
Packit 6c4009
    """
Packit 6c4009
    with open(schema_filename, 'r') as schemafile:
Packit 6c4009
        schema = json.load(schemafile)
Packit 6c4009
        with open(filename, 'r') as benchfile:
Packit 6c4009
            bench = json.load(benchfile)
Packit 6c4009
            validator.validate(bench, schema)
Packit 6c4009
            do_for_all_timings(bench, lambda b, f, v:
Packit 6c4009
                    b['functions'][f][v]['timings'].sort())
Packit 6c4009
            return bench