Blame doc/index.py

Packit 21b7a2
#!/usr/bin/python -u
Packit 21b7a2
#
Packit 21b7a2
# imports the API description and fills up a database with
Packit 21b7a2
# name relevance to modules, functions or web pages
Packit 21b7a2
#
Packit 21b7a2
# Operation needed:
Packit 21b7a2
# =================
Packit 21b7a2
#
Packit 21b7a2
# install mysqld, the python wrappers for mysql and libxml2, start mysqld
Packit 21b7a2
# Change the root passwd of mysql:
Packit 21b7a2
#    mysqladmin -u root password new_password
Packit 21b7a2
# Create the new database xmlsoft
Packit 21b7a2
#    mysqladmin -p create xmlsoft
Packit 21b7a2
# Create a database user 'veillard' and give him passord access
Packit 21b7a2
# change veillard and abcde with the right user name and passwd
Packit 21b7a2
#    mysql -p
Packit 21b7a2
#    password:
Packit 21b7a2
#    mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
Packit 21b7a2
#           IDENTIFIED BY 'abcde' WITH GRANT OPTION;
Packit 21b7a2
#
Packit 21b7a2
# As the user check the access:
Packit 21b7a2
#    mysql -p xmlsoft
Packit 21b7a2
#    Enter password:
Packit 21b7a2
#    Welcome to the MySQL monitor....
Packit 21b7a2
#    mysql> use xmlsoft
Packit 21b7a2
#    Database changed
Packit 21b7a2
#    mysql> quit
Packit 21b7a2
#    Bye
Packit 21b7a2
#
Packit 21b7a2
# Then run the script in the doc subdir, it will create the symbols and
Packit 21b7a2
# word tables and populate them with informations extracted from 
Packit 21b7a2
# the libxml2-api.xml API description, and make then accessible read-only
Packit 21b7a2
# by nobody@loaclhost the user expected to be Apache's one
Packit 21b7a2
#
Packit 21b7a2
# On the Apache configuration, make sure you have php support enabled
Packit 21b7a2
#
Packit 21b7a2
Packit 21b7a2
import MySQLdb
Packit 21b7a2
import libxml2
Packit 21b7a2
import sys
Packit 21b7a2
import string
Packit 21b7a2
import os
Packit 21b7a2
Packit 21b7a2
#
Packit 21b7a2
# We are not interested in parsing errors here
Packit 21b7a2
#
Packit 21b7a2
def callback(ctx, str):
Packit 21b7a2
    return
Packit 21b7a2
libxml2.registerErrorHandler(callback, None)
Packit 21b7a2
Packit 21b7a2
#
Packit 21b7a2
# The dictionnary of tables required and the SQL command needed
Packit 21b7a2
# to create them
Packit 21b7a2
#
Packit 21b7a2
TABLES={
Packit 21b7a2
  "symbols" : """CREATE TABLE symbols (
Packit 21b7a2
           name varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   module varchar(255) BINARY NOT NULL,
Packit 21b7a2
           type varchar(25) NOT NULL,
Packit 21b7a2
	   descr varchar(255),
Packit 21b7a2
	   UNIQUE KEY name (name),
Packit 21b7a2
	   KEY module (module))""",
Packit 21b7a2
  "words" : """CREATE TABLE words (
Packit 21b7a2
           name varchar(50) BINARY NOT NULL,
Packit 21b7a2
	   symbol varchar(255) BINARY NOT NULL,
Packit 21b7a2
           relevance int,
Packit 21b7a2
	   KEY name (name),
Packit 21b7a2
	   KEY symbol (symbol),
Packit 21b7a2
	   UNIQUE KEY ID (name, symbol))""",
Packit 21b7a2
  "wordsHTML" : """CREATE TABLE wordsHTML (
Packit 21b7a2
           name varchar(50) BINARY NOT NULL,
Packit 21b7a2
	   resource varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   section varchar(255),
Packit 21b7a2
	   id varchar(50),
Packit 21b7a2
           relevance int,
Packit 21b7a2
	   KEY name (name),
Packit 21b7a2
	   KEY resource (resource),
Packit 21b7a2
	   UNIQUE KEY ref (name, resource))""",
Packit 21b7a2
  "wordsArchive" : """CREATE TABLE wordsArchive (
Packit 21b7a2
           name varchar(50) BINARY NOT NULL,
Packit 21b7a2
	   ID int(11) NOT NULL,
Packit 21b7a2
           relevance int,
Packit 21b7a2
	   KEY name (name),
Packit 21b7a2
	   UNIQUE KEY ref (name, ID))""",
Packit 21b7a2
  "pages" : """CREATE TABLE pages (
Packit 21b7a2
           resource varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   title varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   UNIQUE KEY name (resource))""",
Packit 21b7a2
  "archives" : """CREATE TABLE archives (
Packit 21b7a2
           ID int(11) NOT NULL auto_increment,
Packit 21b7a2
           resource varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   title varchar(255) BINARY NOT NULL,
Packit 21b7a2
	   UNIQUE KEY id (ID,resource(255)),
Packit 21b7a2
	   INDEX (ID),
Packit 21b7a2
	   INDEX (resource))""",
Packit 21b7a2
  "Queries" : """CREATE TABLE Queries (
Packit 21b7a2
           ID int(11) NOT NULL auto_increment,
Packit 21b7a2
	   Value varchar(50) NOT NULL,
Packit 21b7a2
	   Count int(11) NOT NULL,
Packit 21b7a2
	   UNIQUE KEY id (ID,Value(35)),
Packit 21b7a2
	   INDEX (ID))""",
Packit 21b7a2
  "AllQueries" : """CREATE TABLE AllQueries (
Packit 21b7a2
           ID int(11) NOT NULL auto_increment,
Packit 21b7a2
	   Value varchar(50) NOT NULL,
Packit 21b7a2
	   Count int(11) NOT NULL,
Packit 21b7a2
	   UNIQUE KEY id (ID,Value(35)),
Packit 21b7a2
	   INDEX (ID))""",
Packit 21b7a2
}
Packit 21b7a2
Packit 21b7a2
#
Packit 21b7a2
# The XML API description file to parse
Packit 21b7a2
#
Packit 21b7a2
API="libxml2-api.xml"
Packit 21b7a2
DB=None
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#                  MySQL database interfaces				#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
def createTable(db, name):
Packit 21b7a2
    global TABLES
Packit 21b7a2
Packit 21b7a2
    if db == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if name == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    c = db.cursor()
Packit 21b7a2
Packit 21b7a2
    ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
Packit 21b7a2
    if ret == 1:
Packit 21b7a2
        print "Removed table %s" % (name)
Packit 21b7a2
    print "Creating table %s" % (name)
Packit 21b7a2
    try:
Packit 21b7a2
        ret = c.execute(TABLES[name])
Packit 21b7a2
    except:
Packit 21b7a2
        print "Failed to create table %s" % (name)
Packit 21b7a2
	return -1
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def checkTables(db, verbose = 1):
Packit 21b7a2
    global TABLES
Packit 21b7a2
Packit 21b7a2
    if db == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    c = db.cursor()
Packit 21b7a2
    nbtables = c.execute("show tables")
Packit 21b7a2
    if verbose:
Packit 21b7a2
	print "Found %d tables" % (nbtables)
Packit 21b7a2
    tables = {}
Packit 21b7a2
    i = 0
Packit 21b7a2
    while i < nbtables:
Packit 21b7a2
        l = c.fetchone()
Packit 21b7a2
	name = l[0]
Packit 21b7a2
	tables[name] = {}
Packit 21b7a2
        i = i + 1
Packit 21b7a2
Packit 21b7a2
    for table in TABLES.keys():
Packit 21b7a2
        if not tables.has_key(table):
Packit 21b7a2
	    print "table %s missing" % (table)
Packit 21b7a2
	    createTable(db, table)
Packit 21b7a2
	try:
Packit 21b7a2
	    ret = c.execute("SELECT count(*) from %s" % table);
Packit 21b7a2
	    row = c.fetchone()
Packit 21b7a2
	    if verbose:
Packit 21b7a2
		print "Table %s contains %d records" % (table, row[0])
Packit 21b7a2
	except:
Packit 21b7a2
	    print "Troubles with table %s : repairing" % (table)
Packit 21b7a2
	    ret = c.execute("repair table %s" % table);
Packit 21b7a2
	    print "repairing returned %d" % (ret)
Packit 21b7a2
	    ret = c.execute("SELECT count(*) from %s" % table);
Packit 21b7a2
	    row = c.fetchone()
Packit 21b7a2
	    print "Table %s contains %d records" % (table, row[0])
Packit 21b7a2
    if verbose:
Packit 21b7a2
	print "checkTables finished"
Packit 21b7a2
Packit 21b7a2
    # make sure apache can access the tables read-only
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
Packit 21b7a2
	ret = c.execute("GRANT INSERT,SELECT,UPDATE  ON xmlsoft.Queries TO nobody@localhost")
Packit 21b7a2
    except:
Packit 21b7a2
        pass
Packit 21b7a2
    return 0
Packit 21b7a2
    
Packit 21b7a2
def openMySQL(db="xmlsoft", passwd=None, verbose = 1):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if passwd == None:
Packit 21b7a2
        try:
Packit 21b7a2
	    passwd = os.environ["MySQL_PASS"]
Packit 21b7a2
	except:
Packit 21b7a2
	    print "No password available, set environment MySQL_PASS"
Packit 21b7a2
	    sys.exit(1)
Packit 21b7a2
Packit 21b7a2
    DB = MySQLdb.connect(passwd=passwd, db=db)
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    ret = checkTables(DB, verbose)
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def updateWord(name, symbol, relevance):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if name == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return -1
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
Packit 21b7a2
		(name, symbol, relevance))
Packit 21b7a2
    except:
Packit 21b7a2
        try:
Packit 21b7a2
	    ret = c.execute(
Packit 21b7a2
    """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
Packit 21b7a2
		    (relevance, name, symbol))
Packit 21b7a2
	except:
Packit 21b7a2
	    print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
Packit 21b7a2
	    print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
Packit 21b7a2
	    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    return -1
Packit 21b7a2
	     
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def updateSymbol(name, module, type, desc):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    updateWord(name, name, 50)
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if name == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if module == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if type == None:
Packit 21b7a2
        return -1
Packit 21b7a2
Packit 21b7a2
    try:
Packit 21b7a2
	desc = string.replace(desc, "'", " ")
Packit 21b7a2
	l = string.split(desc, ".")
Packit 21b7a2
	desc = l[0]
Packit 21b7a2
	desc = desc[0:99]
Packit 21b7a2
    except:
Packit 21b7a2
        desc = ""
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
Packit 21b7a2
                    (name, module, type, desc))
Packit 21b7a2
    except:
Packit 21b7a2
        try:
Packit 21b7a2
	    ret = c.execute(
Packit 21b7a2
"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
Packit 21b7a2
                    (module, type, desc, name))
Packit 21b7a2
        except:
Packit 21b7a2
	    print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
Packit 21b7a2
	    print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
Packit 21b7a2
	    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    return -1
Packit 21b7a2
	     
Packit 21b7a2
    return ret
Packit 21b7a2
        
Packit 21b7a2
def addFunction(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'function', desc)
Packit 21b7a2
Packit 21b7a2
def addMacro(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'macro', desc)
Packit 21b7a2
Packit 21b7a2
def addEnum(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'enum', desc)
Packit 21b7a2
Packit 21b7a2
def addStruct(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'struct', desc)
Packit 21b7a2
Packit 21b7a2
def addConst(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'const', desc)
Packit 21b7a2
Packit 21b7a2
def addType(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'type', desc)
Packit 21b7a2
Packit 21b7a2
def addFunctype(name, module, desc = ""):
Packit 21b7a2
    return updateSymbol(name, module, 'functype', desc)
Packit 21b7a2
Packit 21b7a2
def addPage(resource, title):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if resource == None:
Packit 21b7a2
        return -1
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
	    """INSERT INTO pages (resource, title) VALUES ('%s','%s')""" %
Packit 21b7a2
                    (resource, title))
Packit 21b7a2
    except:
Packit 21b7a2
        try:
Packit 21b7a2
	    ret = c.execute(
Packit 21b7a2
		"""UPDATE pages SET title='%s' WHERE resource='%s'""" %
Packit 21b7a2
                    (title, resource))
Packit 21b7a2
        except:
Packit 21b7a2
	    print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
Packit 21b7a2
	    print """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource)
Packit 21b7a2
	    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    return -1
Packit 21b7a2
	     
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def updateWordHTML(name, resource, desc, id, relevance):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if name == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if resource == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if id == None:
Packit 21b7a2
        id = ""
Packit 21b7a2
    if desc == None:
Packit 21b7a2
        desc = ""
Packit 21b7a2
    else:
Packit 21b7a2
	try:
Packit 21b7a2
	    desc = string.replace(desc, "'", " ")
Packit 21b7a2
	    desc = desc[0:99]
Packit 21b7a2
	except:
Packit 21b7a2
	    desc = ""
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
"""INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES ('%s','%s', '%s', '%s', '%d')""" %
Packit 21b7a2
                    (name, resource, desc, id, relevance))
Packit 21b7a2
    except:
Packit 21b7a2
        try:
Packit 21b7a2
	    ret = c.execute(
Packit 21b7a2
"""UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" %
Packit 21b7a2
                    (desc, id, relevance, name, resource))
Packit 21b7a2
        except:
Packit 21b7a2
	    print "Update symbol (%s, %s, %d) failed command" % (name, resource, relevance)
Packit 21b7a2
	    print """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource)
Packit 21b7a2
	    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    return -1
Packit 21b7a2
	     
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def checkXMLMsgArchive(url):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if url == None:
Packit 21b7a2
        return -1
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
	    """SELECT ID FROM archives WHERE resource='%s'""" % (url))
Packit 21b7a2
	row = c.fetchone()
Packit 21b7a2
	if row == None:
Packit 21b7a2
	    return -1
Packit 21b7a2
    except:
Packit 21b7a2
	return -1
Packit 21b7a2
	     
Packit 21b7a2
    return row[0]
Packit 21b7a2
    
Packit 21b7a2
def addXMLMsgArchive(url, title):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if url == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if title == None:
Packit 21b7a2
        title = ""
Packit 21b7a2
    else:
Packit 21b7a2
	title = string.replace(title, "'", " ")
Packit 21b7a2
	title = title[0:99]
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
        cmd = """INSERT INTO archives (resource, title) VALUES ('%s','%s')""" % (url, title)
Packit 21b7a2
        ret = c.execute(cmd)
Packit 21b7a2
	cmd = """SELECT ID FROM archives WHERE resource='%s'""" % (url)
Packit 21b7a2
        ret = c.execute(cmd)
Packit 21b7a2
	row = c.fetchone()
Packit 21b7a2
	if row == None:
Packit 21b7a2
	    print "addXMLMsgArchive failed to get the ID: %s" % (url)
Packit 21b7a2
	    return -1
Packit 21b7a2
    except:
Packit 21b7a2
        print "addXMLMsgArchive failed command: %s" % (cmd)
Packit 21b7a2
	return -1
Packit 21b7a2
	     
Packit 21b7a2
    return((int)(row[0]))
Packit 21b7a2
Packit 21b7a2
def updateWordArchive(name, id, relevance):
Packit 21b7a2
    global DB
Packit 21b7a2
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        openMySQL()
Packit 21b7a2
    if DB == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if name == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if id == None:
Packit 21b7a2
        return -1
Packit 21b7a2
Packit 21b7a2
    c = DB.cursor()
Packit 21b7a2
    try:
Packit 21b7a2
	ret = c.execute(
Packit 21b7a2
"""INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s', '%d', '%d')""" %
Packit 21b7a2
                    (name, id, relevance))
Packit 21b7a2
    except:
Packit 21b7a2
        try:
Packit 21b7a2
	    ret = c.execute(
Packit 21b7a2
"""UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" %
Packit 21b7a2
                    (relevance, name, id))
Packit 21b7a2
        except:
Packit 21b7a2
	    print "Update word archive (%s, %d, %d) failed command" % (name, id, relevance)
Packit 21b7a2
	    print """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id)
Packit 21b7a2
	    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    return -1
Packit 21b7a2
	     
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#                  Word dictionnary and analysis routines		#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
Packit 21b7a2
#
Packit 21b7a2
# top 100 english word without the one len < 3 + own set
Packit 21b7a2
#
Packit 21b7a2
dropWords = {
Packit 21b7a2
    'the':0, 'this':0, 'can':0, 'man':0, 'had':0, 'him':0, 'only':0,
Packit 21b7a2
    'and':0, 'not':0, 'been':0, 'other':0, 'even':0, 'are':0, 'was':0,
Packit 21b7a2
    'new':0, 'most':0, 'but':0, 'when':0, 'some':0, 'made':0, 'from':0,
Packit 21b7a2
    'who':0, 'could':0, 'after':0, 'that':0, 'will':0, 'time':0, 'also':0,
Packit 21b7a2
    'have':0, 'more':0, 'these':0, 'did':0, 'was':0, 'two':0, 'many':0,
Packit 21b7a2
    'they':0, 'may':0, 'before':0, 'for':0, 'which':0, 'out':0, 'then':0,
Packit 21b7a2
    'must':0, 'one':0, 'through':0, 'with':0, 'you':0, 'said':0,
Packit 21b7a2
    'first':0, 'back':0, 'were':0, 'what':0, 'any':0, 'years':0, 'his':0,
Packit 21b7a2
    'her':0, 'where':0, 'all':0, 'its':0, 'now':0, 'much':0, 'she':0,
Packit 21b7a2
    'about':0, 'such':0, 'your':0, 'there':0, 'into':0, 'like':0, 'may':0,
Packit 21b7a2
    'would':0, 'than':0, 'our':0, 'well':0, 'their':0, 'them':0, 'over':0,
Packit 21b7a2
    'down':0,
Packit 21b7a2
    'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0, 'cur':0,
Packit 21b7a2
}
Packit 21b7a2
Packit 21b7a2
wordsDict = {}
Packit 21b7a2
wordsDictHTML = {}
Packit 21b7a2
wordsDictArchive = {}
Packit 21b7a2
Packit 21b7a2
def cleanupWordsString(str):
Packit 21b7a2
    str = string.replace(str, ".", " ")
Packit 21b7a2
    str = string.replace(str, "!", " ")
Packit 21b7a2
    str = string.replace(str, "?", " ")
Packit 21b7a2
    str = string.replace(str, ",", " ")
Packit 21b7a2
    str = string.replace(str, "'", " ")
Packit 21b7a2
    str = string.replace(str, '"', " ")
Packit 21b7a2
    str = string.replace(str, ";", " ")
Packit 21b7a2
    str = string.replace(str, "(", " ")
Packit 21b7a2
    str = string.replace(str, ")", " ")
Packit 21b7a2
    str = string.replace(str, "{", " ")
Packit 21b7a2
    str = string.replace(str, "}", " ")
Packit 21b7a2
    str = string.replace(str, "<", " ")
Packit 21b7a2
    str = string.replace(str, ">", " ")
Packit 21b7a2
    str = string.replace(str, "=", " ")
Packit 21b7a2
    str = string.replace(str, "/", " ")
Packit 21b7a2
    str = string.replace(str, "*", " ")
Packit 21b7a2
    str = string.replace(str, ":", " ")
Packit 21b7a2
    str = string.replace(str, "#", " ")
Packit 21b7a2
    str = string.replace(str, "\\", " ")
Packit 21b7a2
    str = string.replace(str, "\n", " ")
Packit 21b7a2
    str = string.replace(str, "\r", " ")
Packit 21b7a2
    str = string.replace(str, "\xc2", " ")
Packit 21b7a2
    str = string.replace(str, "\xa0", " ")
Packit 21b7a2
    return str
Packit 21b7a2
    
Packit 21b7a2
def cleanupDescrString(str):
Packit 21b7a2
    str = string.replace(str, "'", " ")
Packit 21b7a2
    str = string.replace(str, "\n", " ")
Packit 21b7a2
    str = string.replace(str, "\r", " ")
Packit 21b7a2
    str = string.replace(str, "\xc2", " ")
Packit 21b7a2
    str = string.replace(str, "\xa0", " ")
Packit 21b7a2
    l = string.split(str)
Packit 21b7a2
    str = string.join(str)
Packit 21b7a2
    return str
Packit 21b7a2
Packit 21b7a2
def splitIdentifier(str):
Packit 21b7a2
    ret = []
Packit 21b7a2
    while str != "":
Packit 21b7a2
        cur = string.lower(str[0])
Packit 21b7a2
	str = str[1:]
Packit 21b7a2
	if ((cur < 'a') or (cur > 'z')):
Packit 21b7a2
	    continue
Packit 21b7a2
	while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
Packit 21b7a2
	    cur = cur + string.lower(str[0])
Packit 21b7a2
	    str = str[1:]
Packit 21b7a2
	while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
Packit 21b7a2
	    cur = cur + str[0]
Packit 21b7a2
	    str = str[1:]
Packit 21b7a2
	while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
Packit 21b7a2
	    str = str[1:]
Packit 21b7a2
	ret.append(cur)
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def addWord(word, module, symbol, relevance):
Packit 21b7a2
    global wordsDict
Packit 21b7a2
Packit 21b7a2
    if word == None or len(word) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    if module == None or symbol == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if dropWords.has_key(word):
Packit 21b7a2
        return 0
Packit 21b7a2
    if ord(word[0]) > 0x80:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    if wordsDict.has_key(word):
Packit 21b7a2
        d = wordsDict[word]
Packit 21b7a2
	if d == None:
Packit 21b7a2
	    return 0
Packit 21b7a2
	if len(d) > 500:
Packit 21b7a2
	    wordsDict[word] = None
Packit 21b7a2
	    return 0
Packit 21b7a2
	try:
Packit 21b7a2
	    relevance = relevance + d[(module, symbol)]
Packit 21b7a2
	except:
Packit 21b7a2
	    pass
Packit 21b7a2
    else:
Packit 21b7a2
        wordsDict[word] = {}
Packit 21b7a2
    wordsDict[word][(module, symbol)] = relevance
Packit 21b7a2
    return relevance
Packit 21b7a2
    
Packit 21b7a2
def addString(str, module, symbol, relevance):
Packit 21b7a2
    if str == None or len(str) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    ret = 0
Packit 21b7a2
    str = cleanupWordsString(str)
Packit 21b7a2
    l = string.split(str)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	if len(word) > 2:
Packit 21b7a2
	    ret = ret + addWord(word, module, symbol, 5)
Packit 21b7a2
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def addWordHTML(word, resource, id, section, relevance):
Packit 21b7a2
    global wordsDictHTML
Packit 21b7a2
Packit 21b7a2
    if word == None or len(word) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    if resource == None or section == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    if dropWords.has_key(word):
Packit 21b7a2
        return 0
Packit 21b7a2
    if ord(word[0]) > 0x80:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    section = cleanupDescrString(section)
Packit 21b7a2
Packit 21b7a2
    if wordsDictHTML.has_key(word):
Packit 21b7a2
        d = wordsDictHTML[word]
Packit 21b7a2
	if d == None:
Packit 21b7a2
	    print "skipped %s" % (word)
Packit 21b7a2
	    return 0
Packit 21b7a2
	try:
Packit 21b7a2
	    (r,i,s) = d[resource]
Packit 21b7a2
	    if i != None:
Packit 21b7a2
	        id = i
Packit 21b7a2
	    if s != None:
Packit 21b7a2
	        section = s
Packit 21b7a2
	    relevance = relevance + r
Packit 21b7a2
	except:
Packit 21b7a2
	    pass
Packit 21b7a2
    else:
Packit 21b7a2
        wordsDictHTML[word] = {}
Packit 21b7a2
    d = wordsDictHTML[word];
Packit 21b7a2
    d[resource] = (relevance, id, section)
Packit 21b7a2
    return relevance
Packit 21b7a2
    
Packit 21b7a2
def addStringHTML(str, resource, id, section, relevance):
Packit 21b7a2
    if str == None or len(str) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    ret = 0
Packit 21b7a2
    str = cleanupWordsString(str)
Packit 21b7a2
    l = string.split(str)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	if len(word) > 2:
Packit 21b7a2
	    try:
Packit 21b7a2
		r = addWordHTML(word, resource, id, section, relevance)
Packit 21b7a2
		if r < 0:
Packit 21b7a2
		    print "addWordHTML failed: %s %s" % (word, resource)
Packit 21b7a2
		ret = ret + r
Packit 21b7a2
	    except:
Packit 21b7a2
		print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
Packit 21b7a2
		print sys.exc_type, sys.exc_value
Packit 21b7a2
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
def addWordArchive(word, id, relevance):
Packit 21b7a2
    global wordsDictArchive
Packit 21b7a2
Packit 21b7a2
    if word == None or len(word) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    if id == None or id == -1:
Packit 21b7a2
        return -1
Packit 21b7a2
    if dropWords.has_key(word):
Packit 21b7a2
        return 0
Packit 21b7a2
    if ord(word[0]) > 0x80:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    if wordsDictArchive.has_key(word):
Packit 21b7a2
        d = wordsDictArchive[word]
Packit 21b7a2
	if d == None:
Packit 21b7a2
	    print "skipped %s" % (word)
Packit 21b7a2
	    return 0
Packit 21b7a2
	try:
Packit 21b7a2
	    r = d[id]
Packit 21b7a2
	    relevance = relevance + r
Packit 21b7a2
	except:
Packit 21b7a2
	    pass
Packit 21b7a2
    else:
Packit 21b7a2
        wordsDictArchive[word] = {}
Packit 21b7a2
    d = wordsDictArchive[word];
Packit 21b7a2
    d[id] = relevance
Packit 21b7a2
    return relevance
Packit 21b7a2
    
Packit 21b7a2
def addStringArchive(str, id, relevance):
Packit 21b7a2
    if str == None or len(str) < 3:
Packit 21b7a2
        return -1
Packit 21b7a2
    ret = 0
Packit 21b7a2
    str = cleanupWordsString(str)
Packit 21b7a2
    l = string.split(str)
Packit 21b7a2
    for word in l:
Packit 21b7a2
        i = len(word)
Packit 21b7a2
	if i > 2:
Packit 21b7a2
	    try:
Packit 21b7a2
		r = addWordArchive(word, id, relevance)
Packit 21b7a2
		if r < 0:
Packit 21b7a2
		    print "addWordArchive failed: %s %s" % (word, id)
Packit 21b7a2
		else:
Packit 21b7a2
		    ret = ret + r
Packit 21b7a2
	    except:
Packit 21b7a2
		print "addWordArchive failed: %s %s %d" % (word, id, relevance)
Packit 21b7a2
		print sys.exc_type, sys.exc_value
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#                  XML API description analysis				#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
Packit 21b7a2
def loadAPI(filename):
Packit 21b7a2
    doc = libxml2.parseFile(filename)
Packit 21b7a2
    print "loaded %s" % (filename)
Packit 21b7a2
    return doc
Packit 21b7a2
Packit 21b7a2
def foundExport(file, symbol):
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    addFunction(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
    return 1
Packit 21b7a2
     
Packit 21b7a2
def analyzeAPIFile(top):
Packit 21b7a2
    count = 0
Packit 21b7a2
    name = top.prop("name")
Packit 21b7a2
    cur = top.children
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "exports":
Packit 21b7a2
	    count = count + foundExport(name, cur.prop("symbol"))
Packit 21b7a2
	else:
Packit 21b7a2
	    print "unexpected element %s in API doc <file name='%s'>" % (name)
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
    return count
Packit 21b7a2
Packit 21b7a2
def analyzeAPIFiles(top):
Packit 21b7a2
    count = 0
Packit 21b7a2
    cur = top.children
Packit 21b7a2
        
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "file":
Packit 21b7a2
	    count = count + analyzeAPIFile(cur)
Packit 21b7a2
	else:
Packit 21b7a2
	    print "unexpected element %s in API doc <files>" % (cur.name)
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
    return count
Packit 21b7a2
Packit 21b7a2
def analyzeAPIEnum(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    addEnum(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIConst(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    addConst(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIType(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    addType(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIFunctype(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    addFunctype(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIStruct(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    addStruct(symbol, file)
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
Packit 21b7a2
    info = top.prop("info")
Packit 21b7a2
    if info != None:
Packit 21b7a2
	info = string.replace(info, "'", " ")
Packit 21b7a2
	info = string.strip(info)
Packit 21b7a2
	l = string.split(info)
Packit 21b7a2
	for word in l:
Packit 21b7a2
	    if len(word) > 2:
Packit 21b7a2
		addWord(word, file, symbol, 5)
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIMacro(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = string.replace(symbol, "'", " ")
Packit 21b7a2
    symbol = string.strip(symbol)
Packit 21b7a2
Packit 21b7a2
    info = None
Packit 21b7a2
    cur = top.children
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "info":
Packit 21b7a2
	    info = cur.content
Packit 21b7a2
	    break
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
Packit 21b7a2
    if info == None:
Packit 21b7a2
	addMacro(symbol, file)
Packit 21b7a2
        print "Macro %s description has no <info>" % (symbol)
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    info = string.replace(info, "'", " ")
Packit 21b7a2
    info = string.strip(info)
Packit 21b7a2
    addMacro(symbol, file, info)
Packit 21b7a2
    l = string.split(info)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	if len(word) > 2:
Packit 21b7a2
	    addWord(word, file, symbol, 5)
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPIFunction(top):
Packit 21b7a2
    file = top.prop("file")
Packit 21b7a2
    if file == None:
Packit 21b7a2
        return 0
Packit 21b7a2
    symbol = top.prop("name")
Packit 21b7a2
    if symbol == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    symbol = string.replace(symbol, "'", " ")
Packit 21b7a2
    symbol = string.strip(symbol)
Packit 21b7a2
    info = None
Packit 21b7a2
    cur = top.children
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "info":
Packit 21b7a2
	    info = cur.content
Packit 21b7a2
	elif cur.name == "return":
Packit 21b7a2
	    rinfo = cur.prop("info")
Packit 21b7a2
	    if rinfo != None:
Packit 21b7a2
		rinfo = string.replace(rinfo, "'", " ")
Packit 21b7a2
		rinfo = string.strip(rinfo)
Packit 21b7a2
	        addString(rinfo, file, symbol, 7)
Packit 21b7a2
	elif cur.name == "arg":
Packit 21b7a2
	    ainfo = cur.prop("info")
Packit 21b7a2
	    if ainfo != None:
Packit 21b7a2
		ainfo = string.replace(ainfo, "'", " ")
Packit 21b7a2
		ainfo = string.strip(ainfo)
Packit 21b7a2
	        addString(ainfo, file, symbol, 5)
Packit 21b7a2
	    name = cur.prop("name")
Packit 21b7a2
	    if name != None:
Packit 21b7a2
		name = string.replace(name, "'", " ")
Packit 21b7a2
		name = string.strip(name)
Packit 21b7a2
	        addWord(name, file, symbol, 7)
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
    if info == None:
Packit 21b7a2
        print "Function %s description has no <info>" % (symbol)
Packit 21b7a2
	addFunction(symbol, file, "")
Packit 21b7a2
    else:
Packit 21b7a2
        info = string.replace(info, "'", " ")
Packit 21b7a2
	info = string.strip(info)
Packit 21b7a2
	addFunction(symbol, file, info)
Packit 21b7a2
        addString(info, file, symbol, 5)
Packit 21b7a2
Packit 21b7a2
    l = splitIdentifier(symbol)
Packit 21b7a2
    for word in l:
Packit 21b7a2
	addWord(word, file, symbol, 10)
Packit 21b7a2
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def analyzeAPISymbols(top):
Packit 21b7a2
    count = 0
Packit 21b7a2
    cur = top.children
Packit 21b7a2
        
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "macro":
Packit 21b7a2
	    count = count + analyzeAPIMacro(cur)
Packit 21b7a2
	elif cur.name == "function":
Packit 21b7a2
	    count = count + analyzeAPIFunction(cur)
Packit 21b7a2
	elif cur.name == "const":
Packit 21b7a2
	    count = count + analyzeAPIConst(cur)
Packit 21b7a2
	elif cur.name == "typedef":
Packit 21b7a2
	    count = count + analyzeAPIType(cur)
Packit 21b7a2
	elif cur.name == "struct":
Packit 21b7a2
	    count = count + analyzeAPIStruct(cur)
Packit 21b7a2
	elif cur.name == "enum":
Packit 21b7a2
	    count = count + analyzeAPIEnum(cur)
Packit 21b7a2
	elif cur.name == "functype":
Packit 21b7a2
	    count = count + analyzeAPIFunctype(cur)
Packit 21b7a2
	else:
Packit 21b7a2
	    print "unexpected element %s in API doc <files>" % (cur.name)
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
    return count
Packit 21b7a2
Packit 21b7a2
def analyzeAPI(doc):
Packit 21b7a2
    count = 0
Packit 21b7a2
    if doc == None:
Packit 21b7a2
        return -1
Packit 21b7a2
    root = doc.getRootElement()
Packit 21b7a2
    if root.name != "api":
Packit 21b7a2
        print "Unexpected root name"
Packit 21b7a2
        return -1
Packit 21b7a2
    cur = root.children
Packit 21b7a2
    while cur != None:
Packit 21b7a2
        if cur.type == 'text':
Packit 21b7a2
	    cur = cur.next
Packit 21b7a2
	    continue
Packit 21b7a2
	if cur.name == "files":
Packit 21b7a2
	    pass
Packit 21b7a2
#	    count = count + analyzeAPIFiles(cur)
Packit 21b7a2
	elif cur.name == "symbols":
Packit 21b7a2
	    count = count + analyzeAPISymbols(cur)
Packit 21b7a2
	else:
Packit 21b7a2
	    print "unexpected element %s in API doc" % (cur.name)
Packit 21b7a2
        cur = cur.next
Packit 21b7a2
    return count
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#                  Web pages parsing and analysis			#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
Packit 21b7a2
import glob
Packit 21b7a2
Packit 21b7a2
def analyzeHTMLText(doc, resource, p, section, id):
Packit 21b7a2
    words = 0
Packit 21b7a2
    try:
Packit 21b7a2
	content = p.content
Packit 21b7a2
	words = words + addStringHTML(content, resource, id, section, 5)
Packit 21b7a2
    except:
Packit 21b7a2
        return -1
Packit 21b7a2
    return words
Packit 21b7a2
Packit 21b7a2
def analyzeHTMLPara(doc, resource, p, section, id):
Packit 21b7a2
    words = 0
Packit 21b7a2
    try:
Packit 21b7a2
	content = p.content
Packit 21b7a2
	words = words + addStringHTML(content, resource, id, section, 5)
Packit 21b7a2
    except:
Packit 21b7a2
        return -1
Packit 21b7a2
    return words
Packit 21b7a2
Packit 21b7a2
def analyzeHTMLPre(doc, resource, p, section, id):
Packit 21b7a2
    words = 0
Packit 21b7a2
    try:
Packit 21b7a2
	content = p.content
Packit 21b7a2
	words = words + addStringHTML(content, resource, id, section, 5)
Packit 21b7a2
    except:
Packit 21b7a2
        return -1
Packit 21b7a2
    return words
Packit 21b7a2
Packit 21b7a2
def analyzeHTML(doc, resource, p, section, id):
Packit 21b7a2
    words = 0
Packit 21b7a2
    try:
Packit 21b7a2
	content = p.content
Packit 21b7a2
	words = words + addStringHTML(content, resource, id, section, 5)
Packit 21b7a2
    except:
Packit 21b7a2
        return -1
Packit 21b7a2
    return words
Packit 21b7a2
Packit 21b7a2
def analyzeHTML(doc, resource):
Packit 21b7a2
    para = 0;
Packit 21b7a2
    ctxt = doc.xpathNewContext()
Packit 21b7a2
    try:
Packit 21b7a2
	res = ctxt.xpathEval("//head/title")
Packit 21b7a2
	title = res[0].content
Packit 21b7a2
    except:
Packit 21b7a2
        title = "Page %s" % (resource)
Packit 21b7a2
    addPage(resource, title)
Packit 21b7a2
    try:
Packit 21b7a2
	items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
Packit 21b7a2
	section = title
Packit 21b7a2
	id = ""
Packit 21b7a2
	for item in items:
Packit 21b7a2
	    if item.name == 'h1' or item.name == 'h2' or item.name == 'h3':
Packit 21b7a2
	        section = item.content
Packit 21b7a2
		if item.prop("id"):
Packit 21b7a2
		    id = item.prop("id")
Packit 21b7a2
		elif item.prop("name"):
Packit 21b7a2
		    id = item.prop("name")
Packit 21b7a2
	    elif item.type == 'text':
Packit 21b7a2
	        analyzeHTMLText(doc, resource, item, section, id)
Packit 21b7a2
		para = para + 1
Packit 21b7a2
	    elif item.name == 'p':
Packit 21b7a2
	        analyzeHTMLPara(doc, resource, item, section, id)
Packit 21b7a2
		para = para + 1
Packit 21b7a2
	    elif item.name == 'pre':
Packit 21b7a2
	        analyzeHTMLPre(doc, resource, item, section, id)
Packit 21b7a2
		para = para + 1
Packit 21b7a2
	    else:
Packit 21b7a2
	        print "Page %s, unexpected %s element" % (resource, item.name)
Packit 21b7a2
    except:
Packit 21b7a2
        print "Page %s: problem analyzing" % (resource)
Packit 21b7a2
	print sys.exc_type, sys.exc_value
Packit 21b7a2
Packit 21b7a2
    return para
Packit 21b7a2
Packit 21b7a2
def analyzeHTMLPages():
Packit 21b7a2
    ret = 0
Packit 21b7a2
    HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html")
Packit 21b7a2
    for html in HTMLfiles:
Packit 21b7a2
	if html[0:3] == "API":
Packit 21b7a2
	    continue
Packit 21b7a2
	if html == "xml.html":
Packit 21b7a2
	    continue
Packit 21b7a2
	try:
Packit 21b7a2
	    doc = libxml2.parseFile(html)
Packit 21b7a2
	except:
Packit 21b7a2
	    doc = libxml2.htmlParseFile(html, None)
Packit 21b7a2
	try:
Packit 21b7a2
	    res = analyzeHTML(doc, html)
Packit 21b7a2
	    print "Parsed %s : %d paragraphs" % (html, res)
Packit 21b7a2
	    ret = ret + 1
Packit 21b7a2
	except:
Packit 21b7a2
	    print "could not parse %s" % (html)
Packit 21b7a2
    return ret
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#                  Mail archives parsing and analysis			#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
Packit 21b7a2
import time
Packit 21b7a2
Packit 21b7a2
def getXMLDateArchive(t = None):
Packit 21b7a2
    if t == None:
Packit 21b7a2
	t = time.time()
Packit 21b7a2
    T = time.gmtime(t)
Packit 21b7a2
    month = time.strftime("%B", T)
Packit 21b7a2
    year = T[0]
Packit 21b7a2
    url = "http://mail.gnome.org/archives/xml/%d-%s/date.html" % (year, month)
Packit 21b7a2
    return url
Packit 21b7a2
Packit 21b7a2
def scanXMLMsgArchive(url, title, force = 0):
Packit 21b7a2
    if url == None or title == None:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    ID = checkXMLMsgArchive(url)
Packit 21b7a2
    if force == 0 and ID != -1:
Packit 21b7a2
        return 0
Packit 21b7a2
Packit 21b7a2
    if ID == -1:
Packit 21b7a2
	ID = addXMLMsgArchive(url, title)
Packit 21b7a2
	if ID == -1:
Packit 21b7a2
	    return 0
Packit 21b7a2
Packit 21b7a2
    try:
Packit 21b7a2
        print "Loading %s" % (url)
Packit 21b7a2
        doc = libxml2.htmlParseFile(url, None);
Packit 21b7a2
    except:
Packit 21b7a2
        doc = None
Packit 21b7a2
    if doc == None:
Packit 21b7a2
        print "Failed to parse %s" % (url)
Packit 21b7a2
	return 0
Packit 21b7a2
Packit 21b7a2
    addStringArchive(title, ID, 20)
Packit 21b7a2
    ctxt = doc.xpathNewContext()
Packit 21b7a2
    texts = ctxt.xpathEval("//pre//text()")
Packit 21b7a2
    for text in texts:
Packit 21b7a2
        addStringArchive(text.content, ID, 5)
Packit 21b7a2
Packit 21b7a2
    return 1
Packit 21b7a2
Packit 21b7a2
def scanXMLDateArchive(t = None, force = 0):
Packit 21b7a2
    global wordsDictArchive
Packit 21b7a2
Packit 21b7a2
    wordsDictArchive = {}
Packit 21b7a2
Packit 21b7a2
    url = getXMLDateArchive(t)
Packit 21b7a2
    print "loading %s" % (url)
Packit 21b7a2
    try:
Packit 21b7a2
	doc = libxml2.htmlParseFile(url, None);
Packit 21b7a2
    except:
Packit 21b7a2
        doc = None
Packit 21b7a2
    if doc == None:
Packit 21b7a2
        print "Failed to parse %s" % (url)
Packit 21b7a2
	return -1
Packit 21b7a2
    ctxt = doc.xpathNewContext()
Packit 21b7a2
    anchors = ctxt.xpathEval("//a[@href]")
Packit 21b7a2
    links = 0
Packit 21b7a2
    newmsg = 0
Packit 21b7a2
    for anchor in anchors:
Packit 21b7a2
	href = anchor.prop("href")
Packit 21b7a2
	if href == None or href[0:3] != "msg":
Packit 21b7a2
	    continue
Packit 21b7a2
        try:
Packit 21b7a2
	    links = links + 1
Packit 21b7a2
Packit 21b7a2
	    msg = libxml2.buildURI(href, url)
Packit 21b7a2
	    title = anchor.content
Packit 21b7a2
	    if title != None and title[0:4] == 'Re: ':
Packit 21b7a2
	        title = title[4:]
Packit 21b7a2
	    if title != None and title[0:6] == '[xml] ':
Packit 21b7a2
	        title = title[6:]
Packit 21b7a2
	    newmsg = newmsg + scanXMLMsgArchive(msg, title, force)
Packit 21b7a2
Packit 21b7a2
	except:
Packit 21b7a2
	    pass
Packit 21b7a2
Packit 21b7a2
    return newmsg
Packit 21b7a2
    
Packit 21b7a2
Packit 21b7a2
#########################################################################
Packit 21b7a2
#									#
Packit 21b7a2
#          Main code: open the DB, the API XML and analyze it		#
Packit 21b7a2
#									#
Packit 21b7a2
#########################################################################
Packit 21b7a2
def analyzeArchives(t = None, force = 0):
Packit 21b7a2
    global wordsDictArchive
Packit 21b7a2
Packit 21b7a2
    ret = scanXMLDateArchive(t, force)
Packit 21b7a2
    print "Indexed %d words in %d archive pages" % (len(wordsDictArchive), ret)
Packit 21b7a2
Packit 21b7a2
    i = 0
Packit 21b7a2
    skipped = 0
Packit 21b7a2
    for word in wordsDictArchive.keys():
Packit 21b7a2
	refs = wordsDictArchive[word]
Packit 21b7a2
	if refs  == None:
Packit 21b7a2
	    skipped = skipped + 1
Packit 21b7a2
	    continue;
Packit 21b7a2
	for id in refs.keys():
Packit 21b7a2
	    relevance = refs[id]
Packit 21b7a2
	    updateWordArchive(word, id, relevance)
Packit 21b7a2
	    i = i + 1
Packit 21b7a2
Packit 21b7a2
    print "Found %d associations in HTML pages" % (i)
Packit 21b7a2
Packit 21b7a2
def analyzeHTMLTop():
Packit 21b7a2
    global wordsDictHTML
Packit 21b7a2
Packit 21b7a2
    ret = analyzeHTMLPages()
Packit 21b7a2
    print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret)
Packit 21b7a2
Packit 21b7a2
    i = 0
Packit 21b7a2
    skipped = 0
Packit 21b7a2
    for word in wordsDictHTML.keys():
Packit 21b7a2
	refs = wordsDictHTML[word]
Packit 21b7a2
	if refs  == None:
Packit 21b7a2
	    skipped = skipped + 1
Packit 21b7a2
	    continue;
Packit 21b7a2
	for resource in refs.keys():
Packit 21b7a2
	    (relevance, id, section) = refs[resource]
Packit 21b7a2
	    updateWordHTML(word, resource, section, id, relevance)
Packit 21b7a2
	    i = i + 1
Packit 21b7a2
Packit 21b7a2
    print "Found %d associations in HTML pages" % (i)
Packit 21b7a2
Packit 21b7a2
def analyzeAPITop():
Packit 21b7a2
    global wordsDict
Packit 21b7a2
    global API
Packit 21b7a2
Packit 21b7a2
    try:
Packit 21b7a2
	doc = loadAPI(API)
Packit 21b7a2
	ret = analyzeAPI(doc)
Packit 21b7a2
	print "Analyzed %d blocs" % (ret)
Packit 21b7a2
	doc.freeDoc()
Packit 21b7a2
    except:
Packit 21b7a2
	print "Failed to parse and analyze %s" % (API)
Packit 21b7a2
	print sys.exc_type, sys.exc_value
Packit 21b7a2
	sys.exit(1)
Packit 21b7a2
Packit 21b7a2
    print "Indexed %d words" % (len(wordsDict))
Packit 21b7a2
    i = 0
Packit 21b7a2
    skipped = 0
Packit 21b7a2
    for word in wordsDict.keys():
Packit 21b7a2
	refs = wordsDict[word]
Packit 21b7a2
	if refs  == None:
Packit 21b7a2
	    skipped = skipped + 1
Packit 21b7a2
	    continue;
Packit 21b7a2
	for (module, symbol) in refs.keys():
Packit 21b7a2
	    updateWord(word, symbol, refs[(module, symbol)])
Packit 21b7a2
	    i = i + 1
Packit 21b7a2
Packit 21b7a2
    print "Found %d associations, skipped %d words" % (i, skipped)
Packit 21b7a2
Packit 21b7a2
def usage():
Packit 21b7a2
    print "Usage index.py [--force] [--archive]  [--archive-year year] [--archive-month month] [--API] [--docs]"
Packit 21b7a2
    sys.exit(1)
Packit 21b7a2
Packit 21b7a2
def main():
Packit 21b7a2
    try:
Packit 21b7a2
	openMySQL()
Packit 21b7a2
    except:
Packit 21b7a2
	print "Failed to open the database"
Packit 21b7a2
	print sys.exc_type, sys.exc_value
Packit 21b7a2
	sys.exit(1)
Packit 21b7a2
Packit 21b7a2
    args = sys.argv[1:]
Packit 21b7a2
    force = 0
Packit 21b7a2
    if args:
Packit 21b7a2
        i = 0
Packit 21b7a2
	while i < len(args):
Packit 21b7a2
	    if args[i] == '--force':
Packit 21b7a2
	        force = 1
Packit 21b7a2
	    elif args[i] == '--archive':
Packit 21b7a2
	        analyzeArchives(None, force)
Packit 21b7a2
	    elif args[i] == '--archive-year':
Packit 21b7a2
	        i = i + 1;
Packit 21b7a2
		year = args[i]
Packit 21b7a2
		months = ["January" , "February", "March", "April", "May",
Packit 21b7a2
			  "June", "July", "August", "September", "October",
Packit 21b7a2
			  "November", "December"];
Packit 21b7a2
	        for month in months:
Packit 21b7a2
		    try:
Packit 21b7a2
		        str = "%s-%s" % (year, month)
Packit 21b7a2
			T = time.strptime(str, "%Y-%B")
Packit 21b7a2
			t = time.mktime(T) + 3600 * 24 * 10;
Packit 21b7a2
			analyzeArchives(t, force)
Packit 21b7a2
		    except:
Packit 21b7a2
			print "Failed to index month archive:"
Packit 21b7a2
			print sys.exc_type, sys.exc_value
Packit 21b7a2
	    elif args[i] == '--archive-month':
Packit 21b7a2
	        i = i + 1;
Packit 21b7a2
		month = args[i]
Packit 21b7a2
		try:
Packit 21b7a2
		    T = time.strptime(month, "%Y-%B")
Packit 21b7a2
		    t = time.mktime(T) + 3600 * 24 * 10;
Packit 21b7a2
		    analyzeArchives(t, force)
Packit 21b7a2
		except:
Packit 21b7a2
		    print "Failed to index month archive:"
Packit 21b7a2
		    print sys.exc_type, sys.exc_value
Packit 21b7a2
	    elif args[i] == '--API':
Packit 21b7a2
	        analyzeAPITop()
Packit 21b7a2
	    elif args[i] == '--docs':
Packit 21b7a2
	        analyzeHTMLTop()
Packit 21b7a2
	    else:
Packit 21b7a2
	        usage()
Packit 21b7a2
	    i = i + 1
Packit 21b7a2
    else:
Packit 21b7a2
        usage()
Packit 21b7a2
Packit 21b7a2
if __name__ == "__main__":
Packit 21b7a2
    main()