I prepared a new toy to play with at Debconf and uploaded it to unstable:

Package: libept-dev
Description: High-level library for managing Debian package information
 The library defines a very minimal framework in which many sources of data
 about Debian packages can be implemented and queried together.
 .
 The library includes four data sources:
 .
  * APT: access the APT database
  * Debtags: access the Debtags tag information
  * Popcon: access Popcon package scores
  * TextSearch: fast Xapian-based full text search on package description
 .
 This is the development library.

Package: ept-cache
Description: Commandline tool to search the package archive
 ept-cache is a simple commandline interface to the functions of libept.
 .
 It can currently search and display data from four sources:
 .
  * The APT database
  * The Debtags tag information
  * Popcon package scores
  * A fast Xapian-based full text index on package descriptions

Yes, this finally brings lots of very cool data sources about packages together.

Try this one:

# Check if all data providers are active and give instructions on how
# to activate those that aren't
ept-cache info

# Follow the instructions to activate everything

# Show all GUI image editors, sorted by popularity, in reverse order
ept-cache search image editor -t gui -s p-

If you have the Xapian data provider enabled, the results of a search are given in relevance order, the most relevant first. And also, searches are done with proper stemming, so if you look for image editor it will also find image editing, although it would score image editor higher.

It's also quite lovely to work with it in C++. I'll improvise here a few examples:

Print name and short description of every package

#include <ept/apt/apt.h>
#include <ept/apt/packagerecord.h>

using namsepace std;
using namespace ept::apt;

void playWithApt()
{
    // Apt data source
    Apt apt;

    // Parser of package records
    PackageRecord rec;

    // Iterate all package records
    for (Apt::record_iterator i = apt.recordBegin();
        i != apt.recordEnd(); ++i)
    {
        rec.scan(*i);
        cout << rec.pakcage() << " - " << rec.shortDescription() << endl;
    }
}

Show all image editors

#include <ept/debtags/debtags.h>
#include <set>

using namespace ept::debtags;

void playWithDebtags()
{
    // Apt data source
    Apt apt;
    // Parser of package records
    PackageRecord rec;
    // Debtags data source
    Debtags debtags;

    if (!debtags.hasData())
        return;

    set<Tag> tags;
    tags.insert(debtags.vocabulary().tagByName("works-with::image:raster"));
    tags.insert(debtags.vocabulary().tagByName("use::editing"));
    tags.insert(debtags.vocabulary().tagByName("role::program"));
    set<string> results = debtags.getItemsHavingTags(tags);
    for (set<string>::const_iterator i = results.begin();
        i != results.end(); ++i)
    {
        rec.scan(apt.rawRecord(*i));
        cout << rec.pakcage() << " - " << rec.shortDescription() << endl;
    }
}

Print all package names, sorted by popularity

#include <ept/popcon/popcon.h>
#include <algorithm>

using namespace ept::popcon;

// STL comparator
struct PopconCompare
{
    Popcon& popcon;
    bool operator<(const std::string& pkg1, const std::string& pkg2) const
    {
        return popcon[pkg1] < popocon[pkg2];
    }
};

void playWithPopcon()
{
    // Apt data source
    Apt apt;
    // Popcon data source
    Popcon popcon;
    vector<string> sorted;

    if (!popcon.hasData())
        return;

    // Get all package names in the vector
    copy(apt.begin(), apt.end(), back_inserter(sorted));

    // Sort it by popularity
    sort(sorted.begin(), sorted.end(), PopconCompare(popcon));

    // Print it out
    for (vector<string>::const_iterator i = sorted.begin();
        i != sorted.end(); ++i)
        cout << *i << endl;
}

Search for image viewer, but we don't want to view kernel images

#include <xapian.h>

using namespace ept::textsearch;

void playWithXapian()
{
    TextSearch textsearch;
    vector<string> wanted;
    vector<string> notwanted;

    Xapian::Enquire enq(textsearch.db());
    // This will tokenise the search query into terms, stem them
    // and OR them together in a query.  Xapian will score higher
    // those results in which more ORed terms match, which is what
    // we want.
    Xapian::Query want = textSearch.makeOrQuery("image viewer");
    Xapian::Query dontWant = textSearch.makeOrQuery("linux kernel");

    enq.set_query(Xapian::Query(Xapian::Query::OP_AND_NOT, want, dontWant));

    // Print the top 20 results, with their relevance percentage
    Xapian::MSet matches = enq.get_mset(0, 20);
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
    {
        // The get_data() of a document is the package name
        cout << i.get_document().get_data() << " ("
             << i.get_percent() << "%)" << endl;
    }
}

debian debtags eng pdo tips

2009-06-06 00:57:39+02:00