A stroll among Debian packages

Can Debtags be used to show a list of packages similar to a given one? Yes it can:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
#
# Copyright (C) 2007  Enrico Zini <enrico@debian.org>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.

# From python-debian
from debian_bundle import debtags
# From python-apt
import apt
import re
aptCache = apt.Cache()

pkg = 'debtags'

# Read the debtags database
db = debtags.DB()
tagFilter = re.compile(r"^special::.+$|^.+::TODO$")
db.read(open('/var/lib/debtags/package-tags', "r"), lambda x: not tagFilter.match(x))

# Get the tags of the package
tagset = db.tagsOfPackage(pkg)

# Get the list of packages that have some tag in common with pkg
nextpkgs = set()
for pkg, tags in db.iterPackagesTags():
    if tagset & tags:
        nextpkgs.add(pkg)

# Score every package by the number of tags it has in common with pkg
def pkgscore(pkg):
    score = 0
    for tag in db.tagsOfPackage(pkg):
        if tag in tagset:
            score += 1
    return score

# Show the top 20 related packages in reverse score order
display = sorted(nextpkgs, key=pkgscore, reverse=True)[:20]
for num, pkg in enumerate(display):
    aptpkg = aptCache[pkg]
    desc = aptpkg.rawDescription.split("\n")[0]
    print "%2d) %s - %s" % (num + 1, pkg, desc)

Now, imagine an application that allows you to move from package to package using the list of similar packages. You are using this application and moving from tag to tag.

Can the list of previous packages you visited be used when computing similar tags to make the selection even smarter?

Have a look:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
#
# Copyright (C) 2007  Enrico Zini <enrico@debian.org>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.

# From python-debian
from debian_bundle import debtags
# From python-apt
import apt
import re
aptCache = apt.Cache()

# Read the Debtags database
db = debtags.DB()
tagFilter = re.compile(r"^special::.+$|^.+::TODO$")
db.read(open(options.tagdb, "r"), lambda x: not tagFilter.match(x))

# Maximum number of previous packages to remember
maxlen = 3
# Initial package selection
trail = [ 'debtags' ]

# Loop until the user chooses to quit
done = False
while not done:
    # Compute a package weight according to how old it is in the
    # trail
    pkgweight = {}
    for idx, pkg in enumerate(trail):
        pkgweight[pkg] = 1.-(idx/maxlen)

    # For every tag, find the number of packages in trail that have the tag
    tagscores = {}
    for pkg in trail:
        for tag in db.tagsOfPackage(pkg):
            if tag in tagscores:
                tagscores[tag] += pkgweight[pkg]
            else:
                tagscores[tag] = pkgweight[pkg]

    # Divide every tag score by the number of packages in the trail,
    # obtaining a 'tag weight'.  A package can be later scored by summing
    # the weight of all its tags.
    for tag in tagscores.iterkeys():
        tagscores[tag] = float(tagscores[tag]) / float(len(trail))

    # Find the merged tagset of the packages in trail
    trailtags = set(tagscores.keys())

    # Get the list of packages whose tagsets intersect the trail tagset
    nextpkgs = set()
    for pkg, tags in db.iterPackagesTags():
        if trailtags & tags:
            nextpkgs.add(pkg)

    # Score every package by the sum of the weight of its tags
    def pkgscore(pkg):
        score = 0.0
        for tag in db.tagsOfPackage(pkg):
            if tag in tagscores:
                score += tagscores[tag]
        return score

    # Show the first 20 packages in reverse score order
    display = sorted(nextpkgs, key=pkgscore, reverse=True)[:20]
    for num, pkg in enumerate(display):
        aptpkg = aptCache[pkg]
        desc = aptpkg.rawDescription.split("\n")[0]
        print "%2d) %s - %s" % (num + 1, pkg, desc)

    # Ask the user to choose a new package
    while True:
        ans = raw_input("> ").strip()
        if ans[0] == 'q':
            done = True
            break
        elif ans.isdigit():
            num = int(ans) - 1
            if num < len(display):
                # TODO: on a different kind of interface, display the full
                # description of pkg
                trail = [display[num]] + trail[:maxlen]
                break
            else:
                print "The number is too high"

Who wants to make a GUI out of this?