Blame dnf/match_counter.py

Packit 6f3914
# match_counter.py
Packit 6f3914
# Implements class MatchCounter.
Packit 6f3914
#
Packit 6f3914
# Copyright (C) 2012-2016 Red Hat, Inc.
Packit 6f3914
#
Packit 6f3914
# This copyrighted material is made available to anyone wishing to use,
Packit 6f3914
# modify, copy, or redistribute it subject to the terms and conditions of
Packit 6f3914
# the GNU General Public License v.2, or (at your option) any later version.
Packit 6f3914
# This program is distributed in the hope that it will be useful, but WITHOUT
Packit 6f3914
# ANY WARRANTY expressed or implied, including the implied warranties of
Packit 6f3914
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
Packit 6f3914
# Public License for more details.  You should have received a copy of the
Packit 6f3914
# GNU General Public License along with this program; if not, write to the
Packit 6f3914
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
Packit 6f3914
# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
Packit 6f3914
# source code or documentation are not subject to the GNU General Public
Packit 6f3914
# License and may only be used or replicated with the express permission of
Packit 6f3914
# Red Hat, Inc.
Packit 6f3914
#
Packit 6f3914
Packit 6f3914
from __future__ import absolute_import
Packit 6f3914
from __future__ import print_function
Packit 6f3914
from __future__ import unicode_literals
Packit 6f3914
from functools import reduce
Packit 6f3914
Packit 6f3914
WEIGHTS = {
Packit 6f3914
    'name'		: 7,
Packit 6f3914
    'summary'		: 4,
Packit 6f3914
    'description'	: 2,
Packit 6f3914
    'url'		: 1,
Packit 6f3914
    }
Packit 6f3914
Packit 6f3914
Packit 6f3914
def _canonize_string_set(sset, length):
Packit 6f3914
    """ Ordered sset with empty strings prepended. """
Packit 6f3914
    current = len(sset)
Packit 6f3914
    l = [''] * (length - current) + sorted(sset)
Packit 6f3914
    return l
Packit 6f3914
Packit 6f3914
Packit 6f3914
class MatchCounter(dict):
Packit 6f3914
    """Map packages to which of their attributes matched in a search against
Packit 6f3914
    what values.
Packit 6f3914
Packit 6f3914
    The mapping is: ``package -> [(key, needle), ... ]``.
Packit 6f3914
Packit 6f3914
    """
Packit 6f3914
Packit 6f3914
    @staticmethod
Packit 6f3914
    def _eval_weights(pkg, matches):
Packit 6f3914
        # how much is each match worth and return their sum:
Packit 6f3914
        def weight(match):
Packit 6f3914
            key = match[0]
Packit 6f3914
            needle = match[1]
Packit 6f3914
            haystack = getattr(pkg, key)
Packit 6f3914
            coef = 2 if haystack == needle else 1
Packit 6f3914
            return coef * WEIGHTS[key]
Packit 6f3914
Packit 6f3914
        return sum(map(weight, matches))
Packit 6f3914
Packit 6f3914
    @staticmethod
Packit 6f3914
    def _eval_distance(pkg, matches):
Packit 6f3914
        dist = 0
Packit 6f3914
        for (key, needle) in matches:
Packit 6f3914
            haystack = getattr(pkg, key)
Packit 6f3914
            dist += len(haystack) - len(needle)
Packit 6f3914
        return dist
Packit 6f3914
Packit 6f3914
    def _key_func(self):
Packit 6f3914
        """Get the key function used for sorting matches.
Packit 6f3914
Packit 6f3914
        It is not enough to only look at the matches and order them by the sum
Packit 6f3914
        of their weighted hits. In case this number is the same we have to
Packit 6f3914
        ensure that the same matched needles are next to each other in the
Packit 6f3914
        result.
Packit 6f3914
Packit 6f3914
        Returned function is:
Packit 6f3914
        pkg -> (weights_sum, canonized_needles_set, -distance)
Packit 6f3914
Packit 6f3914
        """
Packit 6f3914
        max_length = self._max_needles()
Packit 6f3914
        def get_key(pkg):
Packit 6f3914
            return (self._eval_weights(pkg, self[pkg]),
Packit 6f3914
                    _canonize_string_set(self.matched_needles(pkg), max_length),
Packit 6f3914
                    -self._eval_distance(pkg, self[pkg]))
Packit 6f3914
        return get_key
Packit 6f3914
Packit 6f3914
    def _max_needles(self):
Packit 6f3914
        """Return the max count of needles of all packages."""
Packit 6f3914
        if self:
Packit 6f3914
            return max(len(self.matched_needles(pkg)) for pkg in self)
Packit 6f3914
        return 0
Packit 6f3914
Packit 6f3914
    def add(self, pkg, key, needle):
Packit 6f3914
        self.setdefault(pkg, []).append((key, needle))
Packit 6f3914
Packit 6f3914
    def dump(self):
Packit 6f3914
        for pkg in self:
Packit 6f3914
            print('%s\t%s' % (pkg, self[pkg]))
Packit 6f3914
Packit 6f3914
    def matched_haystacks(self, pkg):
Packit 6f3914
        return set(getattr(pkg, m[0]) for m in self[pkg])
Packit 6f3914
Packit 6f3914
    def matched_keys(self, pkg):
Packit 6f3914
        # return keys in the same order they appear in the list
Packit 6f3914
        result = []
Packit 6f3914
        for i in self[pkg]:
Packit 6f3914
            if i[0] in result:
Packit 6f3914
                continue
Packit 6f3914
            result.append(i[0])
Packit 6f3914
        return result
Packit 6f3914
Packit 6f3914
    def matched_needles(self, pkg):
Packit 6f3914
        return set(m[1] for m in self[pkg])
Packit 6f3914
Packit 6f3914
    def sorted(self, reverse=False, limit_to=None):
Packit 6f3914
        keys = limit_to if limit_to else self.keys()
Packit 6f3914
        return sorted(keys, key=self._key_func(), reverse=reverse)
Packit 6f3914
Packit 6f3914
    def total(self):
Packit 6f3914
        return reduce(lambda total, pkg: total + len(self[pkg]), self, 0)