|
Packit Service |
21c75c |
# match_counter.py
|
|
Packit Service |
21c75c |
# Implements class MatchCounter.
|
|
Packit Service |
21c75c |
#
|
|
Packit Service |
21c75c |
# Copyright (C) 2012-2016 Red Hat, Inc.
|
|
Packit Service |
21c75c |
#
|
|
Packit Service |
21c75c |
# This copyrighted material is made available to anyone wishing to use,
|
|
Packit Service |
21c75c |
# modify, copy, or redistribute it subject to the terms and conditions of
|
|
Packit Service |
21c75c |
# the GNU General Public License v.2, or (at your option) any later version.
|
|
Packit Service |
21c75c |
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
Packit Service |
21c75c |
# ANY WARRANTY expressed or implied, including the implied warranties of
|
|
Packit Service |
21c75c |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
Packit Service |
21c75c |
# Public License for more details. You should have received a copy of the
|
|
Packit Service |
21c75c |
# GNU General Public License along with this program; if not, write to the
|
|
Packit Service |
21c75c |
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
Packit Service |
21c75c |
# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
|
|
Packit Service |
21c75c |
# source code or documentation are not subject to the GNU General Public
|
|
Packit Service |
21c75c |
# License and may only be used or replicated with the express permission of
|
|
Packit Service |
21c75c |
# Red Hat, Inc.
|
|
Packit Service |
21c75c |
#
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
from __future__ import absolute_import
|
|
Packit Service |
21c75c |
from __future__ import print_function
|
|
Packit Service |
21c75c |
from __future__ import unicode_literals
|
|
Packit Service |
21c75c |
from functools import reduce
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
WEIGHTS = {
|
|
Packit Service |
21c75c |
'name' : 7,
|
|
Packit Service |
21c75c |
'summary' : 4,
|
|
Packit Service |
21c75c |
'description' : 2,
|
|
Packit Service |
21c75c |
'url' : 1,
|
|
Packit Service |
21c75c |
}
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def _canonize_string_set(sset, length):
|
|
Packit Service |
21c75c |
""" Ordered sset with empty strings prepended. """
|
|
Packit Service |
21c75c |
current = len(sset)
|
|
Packit Service |
21c75c |
l = [''] * (length - current) + sorted(sset)
|
|
Packit Service |
21c75c |
return l
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
class MatchCounter(dict):
|
|
Packit Service |
21c75c |
"""Map packages to which of their attributes matched in a search against
|
|
Packit Service |
21c75c |
what values.
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
The mapping is: ``package -> [(key, needle), ... ]``.
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
"""
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
@staticmethod
|
|
Packit Service |
21c75c |
def _eval_weights(pkg, matches):
|
|
Packit Service |
21c75c |
# how much is each match worth and return their sum:
|
|
Packit Service |
21c75c |
def weight(match):
|
|
Packit Service |
21c75c |
key = match[0]
|
|
Packit Service |
21c75c |
needle = match[1]
|
|
Packit Service |
21c75c |
haystack = getattr(pkg, key)
|
|
Packit Service |
21c75c |
if key == "name" and haystack == needle:
|
|
Packit Service |
21c75c |
# if package matches exactly by name, increase weight
|
|
Packit Service |
21c75c |
return 2 * WEIGHTS[key]
|
|
Packit Service |
21c75c |
return WEIGHTS[key]
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
return sum(map(weight, matches))
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def _key_func(self):
|
|
Packit Service |
21c75c |
"""Get the key function used for sorting matches.
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
It is not enough to only look at the matches and order them by the sum
|
|
Packit Service |
21c75c |
of their weighted hits. In case this number is the same we have to
|
|
Packit Service |
21c75c |
ensure that the same matched needles are next to each other in the
|
|
Packit Service |
21c75c |
result.
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
Returned function is:
|
|
Packit Service |
21c75c |
pkg -> (weights_sum, canonized_needles_set, -distance)
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
"""
|
|
Packit Service |
21c75c |
def get_key(pkg):
|
|
Packit Service |
21c75c |
return (
|
|
Packit Service |
21c75c |
# use negative value to make sure packages with the highest weight come first
|
|
Packit Service |
21c75c |
- self._eval_weights(pkg, self[pkg]),
|
|
Packit Service |
21c75c |
# then order packages alphabetically
|
|
Packit Service |
21c75c |
pkg.name,
|
|
Packit Service |
21c75c |
)
|
|
Packit Service |
21c75c |
return get_key
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def _max_needles(self):
|
|
Packit Service |
21c75c |
"""Return the max count of needles of all packages."""
|
|
Packit Service |
21c75c |
if self:
|
|
Packit Service |
21c75c |
return max(len(self.matched_needles(pkg)) for pkg in self)
|
|
Packit Service |
21c75c |
return 0
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def add(self, pkg, key, needle):
|
|
Packit Service |
21c75c |
self.setdefault(pkg, []).append((key, needle))
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def dump(self):
|
|
Packit Service |
21c75c |
for pkg in self:
|
|
Packit Service |
21c75c |
print('%s\t%s' % (pkg, self[pkg]))
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def matched_haystacks(self, pkg):
|
|
Packit Service |
21c75c |
return set(getattr(pkg, m[0]) for m in self[pkg])
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def matched_keys(self, pkg):
|
|
Packit Service |
21c75c |
# return keys in the same order they appear in the list
|
|
Packit Service |
21c75c |
result = []
|
|
Packit Service |
21c75c |
for i in self[pkg]:
|
|
Packit Service |
21c75c |
if i[0] in result:
|
|
Packit Service |
21c75c |
continue
|
|
Packit Service |
21c75c |
result.append(i[0])
|
|
Packit Service |
21c75c |
return result
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def matched_needles(self, pkg):
|
|
Packit Service |
21c75c |
return set(m[1] for m in self[pkg])
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def sorted(self, reverse=False, limit_to=None):
|
|
Packit Service |
21c75c |
keys = limit_to if limit_to else self.keys()
|
|
Packit Service |
21c75c |
return sorted(keys, key=self._key_func())
|
|
Packit Service |
21c75c |
|
|
Packit Service |
21c75c |
def total(self):
|
|
Packit Service |
21c75c |
return reduce(lambda total, pkg: total + len(self[pkg]), self, 0)
|