Blame doc/index.py

Packit Service a31ea6
#!/usr/bin/python -u
Packit Service a31ea6
#
Packit Service a31ea6
# imports the API description and fills up a database with
Packit Service a31ea6
# name relevance to modules, functions or web pages
Packit Service a31ea6
#
Packit Service a31ea6
# Operation needed:
Packit Service a31ea6
# =================
Packit Service a31ea6
#
Packit Service a31ea6
# install mysqld, the python wrappers for mysql and libxml2, start mysqld
Packit Service a31ea6
# Change the root passwd of mysql:
Packit Service a31ea6
#    mysqladmin -u root password new_password
Packit Service a31ea6
# Create the new database xmlsoft
Packit Service a31ea6
#    mysqladmin -p create xmlsoft
Packit Service a31ea6
# Create a database user 'veillard' and give him passord access
Packit Service a31ea6
# change veillard and abcde with the right user name and passwd
Packit Service a31ea6
#    mysql -p
Packit Service a31ea6
#    password:
Packit Service a31ea6
#    mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
Packit Service a31ea6
#           IDENTIFIED BY 'abcde' WITH GRANT OPTION;
Packit Service a31ea6
#
Packit Service a31ea6
# As the user check the access:
Packit Service a31ea6
#    mysql -p xmlsoft
Packit Service a31ea6
#    Enter password:
Packit Service a31ea6
#    Welcome to the MySQL monitor....
Packit Service a31ea6
#    mysql> use xmlsoft
Packit Service a31ea6
#    Database changed
Packit Service a31ea6
#    mysql> quit
Packit Service a31ea6
#    Bye
Packit Service a31ea6
#
Packit Service a31ea6
# Then run the script in the doc subdir, it will create the symbols and
Packit Service a31ea6
# word tables and populate them with informations extracted from 
Packit Service a31ea6
# the libxml2-api.xml API description, and make then accessible read-only
Packit Service a31ea6
# by nobody@loaclhost the user expected to be Apache's one
Packit Service a31ea6
#
Packit Service a31ea6
# On the Apache configuration, make sure you have php support enabled
Packit Service a31ea6
#
Packit Service a31ea6
Packit Service a31ea6
import MySQLdb
Packit Service a31ea6
import libxml2
Packit Service a31ea6
import sys
Packit Service a31ea6
import string
Packit Service a31ea6
import os
Packit Service a31ea6
Packit Service a31ea6
#
Packit Service a31ea6
# We are not interested in parsing errors here
Packit Service a31ea6
#
Packit Service a31ea6
def callback(ctx, str):
Packit Service a31ea6
    return
Packit Service a31ea6
libxml2.registerErrorHandler(callback, None)
Packit Service a31ea6
Packit Service a31ea6
#
Packit Service a31ea6
# The dictionary of tables required and the SQL command needed
Packit Service a31ea6
# to create them
Packit Service a31ea6
#
Packit Service a31ea6
TABLES={
Packit Service a31ea6
  "symbols" : """CREATE TABLE symbols (
Packit Service a31ea6
           name varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   module varchar(255) BINARY NOT NULL,
Packit Service a31ea6
           type varchar(25) NOT NULL,
Packit Service a31ea6
	   descr varchar(255),
Packit Service a31ea6
	   UNIQUE KEY name (name),
Packit Service a31ea6
	   KEY module (module))""",
Packit Service a31ea6
  "words" : """CREATE TABLE words (
Packit Service a31ea6
           name varchar(50) BINARY NOT NULL,
Packit Service a31ea6
	   symbol varchar(255) BINARY NOT NULL,
Packit Service a31ea6
           relevance int,
Packit Service a31ea6
	   KEY name (name),
Packit Service a31ea6
	   KEY symbol (symbol),
Packit Service a31ea6
	   UNIQUE KEY ID (name, symbol))""",
Packit Service a31ea6
  "wordsHTML" : """CREATE TABLE wordsHTML (
Packit Service a31ea6
           name varchar(50) BINARY NOT NULL,
Packit Service a31ea6
	   resource varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   section varchar(255),
Packit Service a31ea6
	   id varchar(50),
Packit Service a31ea6
           relevance int,
Packit Service a31ea6
	   KEY name (name),
Packit Service a31ea6
	   KEY resource (resource),
Packit Service a31ea6
	   UNIQUE KEY ref (name, resource))""",
Packit Service a31ea6
  "wordsArchive" : """CREATE TABLE wordsArchive (
Packit Service a31ea6
           name varchar(50) BINARY NOT NULL,
Packit Service a31ea6
	   ID int(11) NOT NULL,
Packit Service a31ea6
           relevance int,
Packit Service a31ea6
	   KEY name (name),
Packit Service a31ea6
	   UNIQUE KEY ref (name, ID))""",
Packit Service a31ea6
  "pages" : """CREATE TABLE pages (
Packit Service a31ea6
           resource varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   title varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   UNIQUE KEY name (resource))""",
Packit Service a31ea6
  "archives" : """CREATE TABLE archives (
Packit Service a31ea6
           ID int(11) NOT NULL auto_increment,
Packit Service a31ea6
           resource varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   title varchar(255) BINARY NOT NULL,
Packit Service a31ea6
	   UNIQUE KEY id (ID,resource(255)),
Packit Service a31ea6
	   INDEX (ID),
Packit Service a31ea6
	   INDEX (resource))""",
Packit Service a31ea6
  "Queries" : """CREATE TABLE Queries (
Packit Service a31ea6
           ID int(11) NOT NULL auto_increment,
Packit Service a31ea6
	   Value varchar(50) NOT NULL,
Packit Service a31ea6
	   Count int(11) NOT NULL,
Packit Service a31ea6
	   UNIQUE KEY id (ID,Value(35)),
Packit Service a31ea6
	   INDEX (ID))""",
Packit Service a31ea6
  "AllQueries" : """CREATE TABLE AllQueries (
Packit Service a31ea6
           ID int(11) NOT NULL auto_increment,
Packit Service a31ea6
	   Value varchar(50) NOT NULL,
Packit Service a31ea6
	   Count int(11) NOT NULL,
Packit Service a31ea6
	   UNIQUE KEY id (ID,Value(35)),
Packit Service a31ea6
	   INDEX (ID))""",
Packit Service a31ea6
}
Packit Service a31ea6
Packit Service a31ea6
#
Packit Service a31ea6
# The XML API description file to parse
Packit Service a31ea6
#
Packit Service a31ea6
API="libxml2-api.xml"
Packit Service a31ea6
DB=None
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#                  MySQL database interfaces				#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
def createTable(db, name):
Packit Service a31ea6
    global TABLES
Packit Service a31ea6
Packit Service a31ea6
    if db == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if name == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    c = db.cursor()
Packit Service a31ea6
Packit Service a31ea6
    ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
Packit Service a31ea6
    if ret == 1:
Packit Service a31ea6
        print "Removed table %s" % (name)
Packit Service a31ea6
    print "Creating table %s" % (name)
Packit Service a31ea6
    try:
Packit Service a31ea6
        ret = c.execute(TABLES[name])
Packit Service a31ea6
    except:
Packit Service a31ea6
        print "Failed to create table %s" % (name)
Packit Service a31ea6
	return -1
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def checkTables(db, verbose = 1):
Packit Service a31ea6
    global TABLES
Packit Service a31ea6
Packit Service a31ea6
    if db == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    c = db.cursor()
Packit Service a31ea6
    nbtables = c.execute("show tables")
Packit Service a31ea6
    if verbose:
Packit Service a31ea6
	print "Found %d tables" % (nbtables)
Packit Service a31ea6
    tables = {}
Packit Service a31ea6
    i = 0
Packit Service a31ea6
    while i < nbtables:
Packit Service a31ea6
        l = c.fetchone()
Packit Service a31ea6
	name = l[0]
Packit Service a31ea6
	tables[name] = {}
Packit Service a31ea6
        i = i + 1
Packit Service a31ea6
Packit Service a31ea6
    for table in TABLES.keys():
Packit Service a31ea6
        if not tables.has_key(table):
Packit Service a31ea6
	    print "table %s missing" % (table)
Packit Service a31ea6
	    createTable(db, table)
Packit Service a31ea6
	try:
Packit Service a31ea6
	    ret = c.execute("SELECT count(*) from %s" % table);
Packit Service a31ea6
	    row = c.fetchone()
Packit Service a31ea6
	    if verbose:
Packit Service a31ea6
		print "Table %s contains %d records" % (table, row[0])
Packit Service a31ea6
	except:
Packit Service a31ea6
	    print "Troubles with table %s : repairing" % (table)
Packit Service a31ea6
	    ret = c.execute("repair table %s" % table);
Packit Service a31ea6
	    print "repairing returned %d" % (ret)
Packit Service a31ea6
	    ret = c.execute("SELECT count(*) from %s" % table);
Packit Service a31ea6
	    row = c.fetchone()
Packit Service a31ea6
	    print "Table %s contains %d records" % (table, row[0])
Packit Service a31ea6
    if verbose:
Packit Service a31ea6
	print "checkTables finished"
Packit Service a31ea6
Packit Service a31ea6
    # make sure apache can access the tables read-only
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
Packit Service a31ea6
	ret = c.execute("GRANT INSERT,SELECT,UPDATE  ON xmlsoft.Queries TO nobody@localhost")
Packit Service a31ea6
    except:
Packit Service a31ea6
        pass
Packit Service a31ea6
    return 0
Packit Service a31ea6
    
Packit Service a31ea6
def openMySQL(db="xmlsoft", passwd=None, verbose = 1):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if passwd == None:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    passwd = os.environ["MySQL_PASS"]
Packit Service a31ea6
	except:
Packit Service a31ea6
	    print "No password available, set environment MySQL_PASS"
Packit Service a31ea6
	    sys.exit(1)
Packit Service a31ea6
Packit Service a31ea6
    DB = MySQLdb.connect(passwd=passwd, db=db)
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    ret = checkTables(DB, verbose)
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def updateWord(name, symbol, relevance):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if name == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
Packit Service a31ea6
		(name, symbol, relevance))
Packit Service a31ea6
    except:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    ret = c.execute(
Packit Service a31ea6
    """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
Packit Service a31ea6
		    (relevance, name, symbol))
Packit Service a31ea6
	except:
Packit Service a31ea6
	    print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
Packit Service a31ea6
	    print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
Packit Service a31ea6
	    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def updateSymbol(name, module, type, desc):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    updateWord(name, name, 50)
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if name == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if module == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if type == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
Packit Service a31ea6
    try:
Packit Service a31ea6
	desc = string.replace(desc, "'", " ")
Packit Service a31ea6
	l = string.split(desc, ".")
Packit Service a31ea6
	desc = l[0]
Packit Service a31ea6
	desc = desc[0:99]
Packit Service a31ea6
    except:
Packit Service a31ea6
        desc = ""
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
Packit Service a31ea6
                    (name, module, type, desc))
Packit Service a31ea6
    except:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    ret = c.execute(
Packit Service a31ea6
"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
Packit Service a31ea6
                    (module, type, desc, name))
Packit Service a31ea6
        except:
Packit Service a31ea6
	    print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
Packit Service a31ea6
	    print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
Packit Service a31ea6
	    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return ret
Packit Service a31ea6
        
Packit Service a31ea6
def addFunction(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'function', desc)
Packit Service a31ea6
Packit Service a31ea6
def addMacro(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'macro', desc)
Packit Service a31ea6
Packit Service a31ea6
def addEnum(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'enum', desc)
Packit Service a31ea6
Packit Service a31ea6
def addStruct(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'struct', desc)
Packit Service a31ea6
Packit Service a31ea6
def addConst(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'const', desc)
Packit Service a31ea6
Packit Service a31ea6
def addType(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'type', desc)
Packit Service a31ea6
Packit Service a31ea6
def addFunctype(name, module, desc = ""):
Packit Service a31ea6
    return updateSymbol(name, module, 'functype', desc)
Packit Service a31ea6
Packit Service a31ea6
def addPage(resource, title):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if resource == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
	    """INSERT INTO pages (resource, title) VALUES ('%s','%s')""" %
Packit Service a31ea6
                    (resource, title))
Packit Service a31ea6
    except:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    ret = c.execute(
Packit Service a31ea6
		"""UPDATE pages SET title='%s' WHERE resource='%s'""" %
Packit Service a31ea6
                    (title, resource))
Packit Service a31ea6
        except:
Packit Service a31ea6
	    print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
Packit Service a31ea6
	    print """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource)
Packit Service a31ea6
	    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def updateWordHTML(name, resource, desc, id, relevance):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if name == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if resource == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if id == None:
Packit Service a31ea6
        id = ""
Packit Service a31ea6
    if desc == None:
Packit Service a31ea6
        desc = ""
Packit Service a31ea6
    else:
Packit Service a31ea6
	try:
Packit Service a31ea6
	    desc = string.replace(desc, "'", " ")
Packit Service a31ea6
	    desc = desc[0:99]
Packit Service a31ea6
	except:
Packit Service a31ea6
	    desc = ""
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
"""INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES ('%s','%s', '%s', '%s', '%d')""" %
Packit Service a31ea6
                    (name, resource, desc, id, relevance))
Packit Service a31ea6
    except:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    ret = c.execute(
Packit Service a31ea6
"""UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" %
Packit Service a31ea6
                    (desc, id, relevance, name, resource))
Packit Service a31ea6
        except:
Packit Service a31ea6
	    print "Update symbol (%s, %s, %d) failed command" % (name, resource, relevance)
Packit Service a31ea6
	    print """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource)
Packit Service a31ea6
	    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def checkXMLMsgArchive(url):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if url == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
	    """SELECT ID FROM archives WHERE resource='%s'""" % (url))
Packit Service a31ea6
	row = c.fetchone()
Packit Service a31ea6
	if row == None:
Packit Service a31ea6
	    return -1
Packit Service a31ea6
    except:
Packit Service a31ea6
	return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return row[0]
Packit Service a31ea6
    
Packit Service a31ea6
def addXMLMsgArchive(url, title):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if url == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if title == None:
Packit Service a31ea6
        title = ""
Packit Service a31ea6
    else:
Packit Service a31ea6
	title = string.replace(title, "'", " ")
Packit Service a31ea6
	title = title[0:99]
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
        cmd = """INSERT INTO archives (resource, title) VALUES ('%s','%s')""" % (url, title)
Packit Service a31ea6
        ret = c.execute(cmd)
Packit Service a31ea6
	cmd = """SELECT ID FROM archives WHERE resource='%s'""" % (url)
Packit Service a31ea6
        ret = c.execute(cmd)
Packit Service a31ea6
	row = c.fetchone()
Packit Service a31ea6
	if row == None:
Packit Service a31ea6
	    print "addXMLMsgArchive failed to get the ID: %s" % (url)
Packit Service a31ea6
	    return -1
Packit Service a31ea6
    except:
Packit Service a31ea6
        print "addXMLMsgArchive failed command: %s" % (cmd)
Packit Service a31ea6
	return -1
Packit Service a31ea6
	     
Packit Service a31ea6
    return((int)(row[0]))
Packit Service a31ea6
Packit Service a31ea6
def updateWordArchive(name, id, relevance):
Packit Service a31ea6
    global DB
Packit Service a31ea6
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        openMySQL()
Packit Service a31ea6
    if DB == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if name == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if id == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
Packit Service a31ea6
    c = DB.cursor()
Packit Service a31ea6
    try:
Packit Service a31ea6
	ret = c.execute(
Packit Service a31ea6
"""INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s', '%d', '%d')""" %
Packit Service a31ea6
                    (name, id, relevance))
Packit Service a31ea6
    except:
Packit Service a31ea6
        try:
Packit Service a31ea6
	    ret = c.execute(
Packit Service a31ea6
"""UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" %
Packit Service a31ea6
                    (relevance, name, id))
Packit Service a31ea6
        except:
Packit Service a31ea6
	    print "Update word archive (%s, %d, %d) failed command" % (name, id, relevance)
Packit Service a31ea6
	    print """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id)
Packit Service a31ea6
	    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    return -1
Packit Service a31ea6
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#                  Word dictionary and analysis routines		#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
Packit Service a31ea6
#
Packit Service a31ea6
# top 100 english word without the one len < 3 + own set
Packit Service a31ea6
#
Packit Service a31ea6
dropWords = {
Packit Service a31ea6
    'the':0, 'this':0, 'can':0, 'man':0, 'had':0, 'him':0, 'only':0,
Packit Service a31ea6
    'and':0, 'not':0, 'been':0, 'other':0, 'even':0, 'are':0, 'was':0,
Packit Service a31ea6
    'new':0, 'most':0, 'but':0, 'when':0, 'some':0, 'made':0, 'from':0,
Packit Service a31ea6
    'who':0, 'could':0, 'after':0, 'that':0, 'will':0, 'time':0, 'also':0,
Packit Service a31ea6
    'have':0, 'more':0, 'these':0, 'did':0, 'was':0, 'two':0, 'many':0,
Packit Service a31ea6
    'they':0, 'may':0, 'before':0, 'for':0, 'which':0, 'out':0, 'then':0,
Packit Service a31ea6
    'must':0, 'one':0, 'through':0, 'with':0, 'you':0, 'said':0,
Packit Service a31ea6
    'first':0, 'back':0, 'were':0, 'what':0, 'any':0, 'years':0, 'his':0,
Packit Service a31ea6
    'her':0, 'where':0, 'all':0, 'its':0, 'now':0, 'much':0, 'she':0,
Packit Service a31ea6
    'about':0, 'such':0, 'your':0, 'there':0, 'into':0, 'like':0, 'may':0,
Packit Service a31ea6
    'would':0, 'than':0, 'our':0, 'well':0, 'their':0, 'them':0, 'over':0,
Packit Service a31ea6
    'down':0,
Packit Service a31ea6
    'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0, 'cur':0,
Packit Service a31ea6
}
Packit Service a31ea6
Packit Service a31ea6
wordsDict = {}
Packit Service a31ea6
wordsDictHTML = {}
Packit Service a31ea6
wordsDictArchive = {}
Packit Service a31ea6
Packit Service a31ea6
def cleanupWordsString(str):
Packit Service a31ea6
    str = string.replace(str, ".", " ")
Packit Service a31ea6
    str = string.replace(str, "!", " ")
Packit Service a31ea6
    str = string.replace(str, "?", " ")
Packit Service a31ea6
    str = string.replace(str, ",", " ")
Packit Service a31ea6
    str = string.replace(str, "'", " ")
Packit Service a31ea6
    str = string.replace(str, '"', " ")
Packit Service a31ea6
    str = string.replace(str, ";", " ")
Packit Service a31ea6
    str = string.replace(str, "(", " ")
Packit Service a31ea6
    str = string.replace(str, ")", " ")
Packit Service a31ea6
    str = string.replace(str, "{", " ")
Packit Service a31ea6
    str = string.replace(str, "}", " ")
Packit Service a31ea6
    str = string.replace(str, "<", " ")
Packit Service a31ea6
    str = string.replace(str, ">", " ")
Packit Service a31ea6
    str = string.replace(str, "=", " ")
Packit Service a31ea6
    str = string.replace(str, "/", " ")
Packit Service a31ea6
    str = string.replace(str, "*", " ")
Packit Service a31ea6
    str = string.replace(str, ":", " ")
Packit Service a31ea6
    str = string.replace(str, "#", " ")
Packit Service a31ea6
    str = string.replace(str, "\\", " ")
Packit Service a31ea6
    str = string.replace(str, "\n", " ")
Packit Service a31ea6
    str = string.replace(str, "\r", " ")
Packit Service a31ea6
    str = string.replace(str, "\xc2", " ")
Packit Service a31ea6
    str = string.replace(str, "\xa0", " ")
Packit Service a31ea6
    return str
Packit Service a31ea6
    
Packit Service a31ea6
def cleanupDescrString(str):
Packit Service a31ea6
    str = string.replace(str, "'", " ")
Packit Service a31ea6
    str = string.replace(str, "\n", " ")
Packit Service a31ea6
    str = string.replace(str, "\r", " ")
Packit Service a31ea6
    str = string.replace(str, "\xc2", " ")
Packit Service a31ea6
    str = string.replace(str, "\xa0", " ")
Packit Service a31ea6
    l = string.split(str)
Packit Service a31ea6
    str = string.join(str)
Packit Service a31ea6
    return str
Packit Service a31ea6
Packit Service a31ea6
def splitIdentifier(str):
Packit Service a31ea6
    ret = []
Packit Service a31ea6
    while str != "":
Packit Service a31ea6
        cur = string.lower(str[0])
Packit Service a31ea6
	str = str[1:]
Packit Service a31ea6
	if ((cur < 'a') or (cur > 'z')):
Packit Service a31ea6
	    continue
Packit Service a31ea6
	while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
Packit Service a31ea6
	    cur = cur + string.lower(str[0])
Packit Service a31ea6
	    str = str[1:]
Packit Service a31ea6
	while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
Packit Service a31ea6
	    cur = cur + str[0]
Packit Service a31ea6
	    str = str[1:]
Packit Service a31ea6
	while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
Packit Service a31ea6
	    str = str[1:]
Packit Service a31ea6
	ret.append(cur)
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def addWord(word, module, symbol, relevance):
Packit Service a31ea6
    global wordsDict
Packit Service a31ea6
Packit Service a31ea6
    if word == None or len(word) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if module == None or symbol == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if dropWords.has_key(word):
Packit Service a31ea6
        return 0
Packit Service a31ea6
    if ord(word[0]) > 0x80:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    if wordsDict.has_key(word):
Packit Service a31ea6
        d = wordsDict[word]
Packit Service a31ea6
	if d == None:
Packit Service a31ea6
	    return 0
Packit Service a31ea6
	if len(d) > 500:
Packit Service a31ea6
	    wordsDict[word] = None
Packit Service a31ea6
	    return 0
Packit Service a31ea6
	try:
Packit Service a31ea6
	    relevance = relevance + d[(module, symbol)]
Packit Service a31ea6
	except:
Packit Service a31ea6
	    pass
Packit Service a31ea6
    else:
Packit Service a31ea6
        wordsDict[word] = {}
Packit Service a31ea6
    wordsDict[word][(module, symbol)] = relevance
Packit Service a31ea6
    return relevance
Packit Service a31ea6
    
Packit Service a31ea6
def addString(str, module, symbol, relevance):
Packit Service a31ea6
    if str == None or len(str) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    ret = 0
Packit Service a31ea6
    str = cleanupWordsString(str)
Packit Service a31ea6
    l = string.split(str)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	if len(word) > 2:
Packit Service a31ea6
	    ret = ret + addWord(word, module, symbol, 5)
Packit Service a31ea6
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def addWordHTML(word, resource, id, section, relevance):
Packit Service a31ea6
    global wordsDictHTML
Packit Service a31ea6
Packit Service a31ea6
    if word == None or len(word) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if resource == None or section == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if dropWords.has_key(word):
Packit Service a31ea6
        return 0
Packit Service a31ea6
    if ord(word[0]) > 0x80:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    section = cleanupDescrString(section)
Packit Service a31ea6
Packit Service a31ea6
    if wordsDictHTML.has_key(word):
Packit Service a31ea6
        d = wordsDictHTML[word]
Packit Service a31ea6
	if d == None:
Packit Service a31ea6
	    print "skipped %s" % (word)
Packit Service a31ea6
	    return 0
Packit Service a31ea6
	try:
Packit Service a31ea6
	    (r,i,s) = d[resource]
Packit Service a31ea6
	    if i != None:
Packit Service a31ea6
	        id = i
Packit Service a31ea6
	    if s != None:
Packit Service a31ea6
	        section = s
Packit Service a31ea6
	    relevance = relevance + r
Packit Service a31ea6
	except:
Packit Service a31ea6
	    pass
Packit Service a31ea6
    else:
Packit Service a31ea6
        wordsDictHTML[word] = {}
Packit Service a31ea6
    d = wordsDictHTML[word];
Packit Service a31ea6
    d[resource] = (relevance, id, section)
Packit Service a31ea6
    return relevance
Packit Service a31ea6
    
Packit Service a31ea6
def addStringHTML(str, resource, id, section, relevance):
Packit Service a31ea6
    if str == None or len(str) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    ret = 0
Packit Service a31ea6
    str = cleanupWordsString(str)
Packit Service a31ea6
    l = string.split(str)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	if len(word) > 2:
Packit Service a31ea6
	    try:
Packit Service a31ea6
		r = addWordHTML(word, resource, id, section, relevance)
Packit Service a31ea6
		if r < 0:
Packit Service a31ea6
		    print "addWordHTML failed: %s %s" % (word, resource)
Packit Service a31ea6
		ret = ret + r
Packit Service a31ea6
	    except:
Packit Service a31ea6
		print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
Packit Service a31ea6
		print sys.exc_type, sys.exc_value
Packit Service a31ea6
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
def addWordArchive(word, id, relevance):
Packit Service a31ea6
    global wordsDictArchive
Packit Service a31ea6
Packit Service a31ea6
    if word == None or len(word) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if id == None or id == -1:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    if dropWords.has_key(word):
Packit Service a31ea6
        return 0
Packit Service a31ea6
    if ord(word[0]) > 0x80:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    if wordsDictArchive.has_key(word):
Packit Service a31ea6
        d = wordsDictArchive[word]
Packit Service a31ea6
	if d == None:
Packit Service a31ea6
	    print "skipped %s" % (word)
Packit Service a31ea6
	    return 0
Packit Service a31ea6
	try:
Packit Service a31ea6
	    r = d[id]
Packit Service a31ea6
	    relevance = relevance + r
Packit Service a31ea6
	except:
Packit Service a31ea6
	    pass
Packit Service a31ea6
    else:
Packit Service a31ea6
        wordsDictArchive[word] = {}
Packit Service a31ea6
    d = wordsDictArchive[word];
Packit Service a31ea6
    d[id] = relevance
Packit Service a31ea6
    return relevance
Packit Service a31ea6
    
Packit Service a31ea6
def addStringArchive(str, id, relevance):
Packit Service a31ea6
    if str == None or len(str) < 3:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    ret = 0
Packit Service a31ea6
    str = cleanupWordsString(str)
Packit Service a31ea6
    l = string.split(str)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
        i = len(word)
Packit Service a31ea6
	if i > 2:
Packit Service a31ea6
	    try:
Packit Service a31ea6
		r = addWordArchive(word, id, relevance)
Packit Service a31ea6
		if r < 0:
Packit Service a31ea6
		    print "addWordArchive failed: %s %s" % (word, id)
Packit Service a31ea6
		else:
Packit Service a31ea6
		    ret = ret + r
Packit Service a31ea6
	    except:
Packit Service a31ea6
		print "addWordArchive failed: %s %s %d" % (word, id, relevance)
Packit Service a31ea6
		print sys.exc_type, sys.exc_value
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#                  XML API description analysis				#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
Packit Service a31ea6
def loadAPI(filename):
Packit Service a31ea6
    doc = libxml2.parseFile(filename)
Packit Service a31ea6
    print "loaded %s" % (filename)
Packit Service a31ea6
    return doc
Packit Service a31ea6
Packit Service a31ea6
def foundExport(file, symbol):
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    addFunction(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
    return 1
Packit Service a31ea6
     
Packit Service a31ea6
def analyzeAPIFile(top):
Packit Service a31ea6
    count = 0
Packit Service a31ea6
    name = top.prop("name")
Packit Service a31ea6
    cur = top.children
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "exports":
Packit Service a31ea6
	    count = count + foundExport(name, cur.prop("symbol"))
Packit Service a31ea6
	else:
Packit Service a31ea6
	    print "unexpected element %s in API doc <file name='%s'>" % (name)
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
    return count
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIFiles(top):
Packit Service a31ea6
    count = 0
Packit Service a31ea6
    cur = top.children
Packit Service a31ea6
        
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "file":
Packit Service a31ea6
	    count = count + analyzeAPIFile(cur)
Packit Service a31ea6
	else:
Packit Service a31ea6
	    print "unexpected element %s in API doc <files>" % (cur.name)
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
    return count
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIEnum(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    addEnum(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIConst(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    addConst(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIType(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    addType(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIFunctype(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    addFunctype(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIStruct(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    addStruct(symbol, file)
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
Packit Service a31ea6
    info = top.prop("info")
Packit Service a31ea6
    if info != None:
Packit Service a31ea6
	info = string.replace(info, "'", " ")
Packit Service a31ea6
	info = string.strip(info)
Packit Service a31ea6
	l = string.split(info)
Packit Service a31ea6
	for word in l:
Packit Service a31ea6
	    if len(word) > 2:
Packit Service a31ea6
		addWord(word, file, symbol, 5)
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIMacro(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = string.replace(symbol, "'", " ")
Packit Service a31ea6
    symbol = string.strip(symbol)
Packit Service a31ea6
Packit Service a31ea6
    info = None
Packit Service a31ea6
    cur = top.children
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "info":
Packit Service a31ea6
	    info = cur.content
Packit Service a31ea6
	    break
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
Packit Service a31ea6
    if info == None:
Packit Service a31ea6
	addMacro(symbol, file)
Packit Service a31ea6
        print "Macro %s description has no <info>" % (symbol)
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    info = string.replace(info, "'", " ")
Packit Service a31ea6
    info = string.strip(info)
Packit Service a31ea6
    addMacro(symbol, file, info)
Packit Service a31ea6
    l = string.split(info)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	if len(word) > 2:
Packit Service a31ea6
	    addWord(word, file, symbol, 5)
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPIFunction(top):
Packit Service a31ea6
    file = top.prop("file")
Packit Service a31ea6
    if file == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
    symbol = top.prop("name")
Packit Service a31ea6
    if symbol == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    symbol = string.replace(symbol, "'", " ")
Packit Service a31ea6
    symbol = string.strip(symbol)
Packit Service a31ea6
    info = None
Packit Service a31ea6
    cur = top.children
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "info":
Packit Service a31ea6
	    info = cur.content
Packit Service a31ea6
	elif cur.name == "return":
Packit Service a31ea6
	    rinfo = cur.prop("info")
Packit Service a31ea6
	    if rinfo != None:
Packit Service a31ea6
		rinfo = string.replace(rinfo, "'", " ")
Packit Service a31ea6
		rinfo = string.strip(rinfo)
Packit Service a31ea6
	        addString(rinfo, file, symbol, 7)
Packit Service a31ea6
	elif cur.name == "arg":
Packit Service a31ea6
	    ainfo = cur.prop("info")
Packit Service a31ea6
	    if ainfo != None:
Packit Service a31ea6
		ainfo = string.replace(ainfo, "'", " ")
Packit Service a31ea6
		ainfo = string.strip(ainfo)
Packit Service a31ea6
	        addString(ainfo, file, symbol, 5)
Packit Service a31ea6
	    name = cur.prop("name")
Packit Service a31ea6
	    if name != None:
Packit Service a31ea6
		name = string.replace(name, "'", " ")
Packit Service a31ea6
		name = string.strip(name)
Packit Service a31ea6
	        addWord(name, file, symbol, 7)
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
    if info == None:
Packit Service a31ea6
        print "Function %s description has no <info>" % (symbol)
Packit Service a31ea6
	addFunction(symbol, file, "")
Packit Service a31ea6
    else:
Packit Service a31ea6
        info = string.replace(info, "'", " ")
Packit Service a31ea6
	info = string.strip(info)
Packit Service a31ea6
	addFunction(symbol, file, info)
Packit Service a31ea6
        addString(info, file, symbol, 5)
Packit Service a31ea6
Packit Service a31ea6
    l = splitIdentifier(symbol)
Packit Service a31ea6
    for word in l:
Packit Service a31ea6
	addWord(word, file, symbol, 10)
Packit Service a31ea6
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPISymbols(top):
Packit Service a31ea6
    count = 0
Packit Service a31ea6
    cur = top.children
Packit Service a31ea6
        
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "macro":
Packit Service a31ea6
	    count = count + analyzeAPIMacro(cur)
Packit Service a31ea6
	elif cur.name == "function":
Packit Service a31ea6
	    count = count + analyzeAPIFunction(cur)
Packit Service a31ea6
	elif cur.name == "const":
Packit Service a31ea6
	    count = count + analyzeAPIConst(cur)
Packit Service a31ea6
	elif cur.name == "typedef":
Packit Service a31ea6
	    count = count + analyzeAPIType(cur)
Packit Service a31ea6
	elif cur.name == "struct":
Packit Service a31ea6
	    count = count + analyzeAPIStruct(cur)
Packit Service a31ea6
	elif cur.name == "enum":
Packit Service a31ea6
	    count = count + analyzeAPIEnum(cur)
Packit Service a31ea6
	elif cur.name == "functype":
Packit Service a31ea6
	    count = count + analyzeAPIFunctype(cur)
Packit Service a31ea6
	else:
Packit Service a31ea6
	    print "unexpected element %s in API doc <files>" % (cur.name)
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
    return count
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPI(doc):
Packit Service a31ea6
    count = 0
Packit Service a31ea6
    if doc == None:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    root = doc.getRootElement()
Packit Service a31ea6
    if root.name != "api":
Packit Service a31ea6
        print "Unexpected root name"
Packit Service a31ea6
        return -1
Packit Service a31ea6
    cur = root.children
Packit Service a31ea6
    while cur != None:
Packit Service a31ea6
        if cur.type == 'text':
Packit Service a31ea6
	    cur = cur.next
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if cur.name == "files":
Packit Service a31ea6
	    pass
Packit Service a31ea6
#	    count = count + analyzeAPIFiles(cur)
Packit Service a31ea6
	elif cur.name == "symbols":
Packit Service a31ea6
	    count = count + analyzeAPISymbols(cur)
Packit Service a31ea6
	else:
Packit Service a31ea6
	    print "unexpected element %s in API doc" % (cur.name)
Packit Service a31ea6
        cur = cur.next
Packit Service a31ea6
    return count
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#                  Web pages parsing and analysis			#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
Packit Service a31ea6
import glob
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTMLText(doc, resource, p, section, id):
Packit Service a31ea6
    words = 0
Packit Service a31ea6
    try:
Packit Service a31ea6
	content = p.content
Packit Service a31ea6
	words = words + addStringHTML(content, resource, id, section, 5)
Packit Service a31ea6
    except:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    return words
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTMLPara(doc, resource, p, section, id):
Packit Service a31ea6
    words = 0
Packit Service a31ea6
    try:
Packit Service a31ea6
	content = p.content
Packit Service a31ea6
	words = words + addStringHTML(content, resource, id, section, 5)
Packit Service a31ea6
    except:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    return words
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTMLPre(doc, resource, p, section, id):
Packit Service a31ea6
    words = 0
Packit Service a31ea6
    try:
Packit Service a31ea6
	content = p.content
Packit Service a31ea6
	words = words + addStringHTML(content, resource, id, section, 5)
Packit Service a31ea6
    except:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    return words
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTML(doc, resource, p, section, id):
Packit Service a31ea6
    words = 0
Packit Service a31ea6
    try:
Packit Service a31ea6
	content = p.content
Packit Service a31ea6
	words = words + addStringHTML(content, resource, id, section, 5)
Packit Service a31ea6
    except:
Packit Service a31ea6
        return -1
Packit Service a31ea6
    return words
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTML(doc, resource):
Packit Service a31ea6
    para = 0;
Packit Service a31ea6
    ctxt = doc.xpathNewContext()
Packit Service a31ea6
    try:
Packit Service a31ea6
	res = ctxt.xpathEval("//head/title")
Packit Service a31ea6
	title = res[0].content
Packit Service a31ea6
    except:
Packit Service a31ea6
        title = "Page %s" % (resource)
Packit Service a31ea6
    addPage(resource, title)
Packit Service a31ea6
    try:
Packit Service a31ea6
	items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
Packit Service a31ea6
	section = title
Packit Service a31ea6
	id = ""
Packit Service a31ea6
	for item in items:
Packit Service a31ea6
	    if item.name == 'h1' or item.name == 'h2' or item.name == 'h3':
Packit Service a31ea6
	        section = item.content
Packit Service a31ea6
		if item.prop("id"):
Packit Service a31ea6
		    id = item.prop("id")
Packit Service a31ea6
		elif item.prop("name"):
Packit Service a31ea6
		    id = item.prop("name")
Packit Service a31ea6
	    elif item.type == 'text':
Packit Service a31ea6
	        analyzeHTMLText(doc, resource, item, section, id)
Packit Service a31ea6
		para = para + 1
Packit Service a31ea6
	    elif item.name == 'p':
Packit Service a31ea6
	        analyzeHTMLPara(doc, resource, item, section, id)
Packit Service a31ea6
		para = para + 1
Packit Service a31ea6
	    elif item.name == 'pre':
Packit Service a31ea6
	        analyzeHTMLPre(doc, resource, item, section, id)
Packit Service a31ea6
		para = para + 1
Packit Service a31ea6
	    else:
Packit Service a31ea6
	        print "Page %s, unexpected %s element" % (resource, item.name)
Packit Service a31ea6
    except:
Packit Service a31ea6
        print "Page %s: problem analyzing" % (resource)
Packit Service a31ea6
	print sys.exc_type, sys.exc_value
Packit Service a31ea6
Packit Service a31ea6
    return para
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTMLPages():
Packit Service a31ea6
    ret = 0
Packit Service a31ea6
    HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html")
Packit Service a31ea6
    for html in HTMLfiles:
Packit Service a31ea6
	if html[0:3] == "API":
Packit Service a31ea6
	    continue
Packit Service a31ea6
	if html == "xml.html":
Packit Service a31ea6
	    continue
Packit Service a31ea6
	try:
Packit Service a31ea6
	    doc = libxml2.parseFile(html)
Packit Service a31ea6
	except:
Packit Service a31ea6
	    doc = libxml2.htmlParseFile(html, None)
Packit Service a31ea6
	try:
Packit Service a31ea6
	    res = analyzeHTML(doc, html)
Packit Service a31ea6
	    print "Parsed %s : %d paragraphs" % (html, res)
Packit Service a31ea6
	    ret = ret + 1
Packit Service a31ea6
	except:
Packit Service a31ea6
	    print "could not parse %s" % (html)
Packit Service a31ea6
    return ret
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#                  Mail archives parsing and analysis			#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
Packit Service a31ea6
import time
Packit Service a31ea6
Packit Service a31ea6
def getXMLDateArchive(t = None):
Packit Service a31ea6
    if t == None:
Packit Service a31ea6
	t = time.time()
Packit Service a31ea6
    T = time.gmtime(t)
Packit Service a31ea6
    month = time.strftime("%B", T)
Packit Service a31ea6
    year = T[0]
Packit Service a31ea6
    url = "http://mail.gnome.org/archives/xml/%d-%s/date.html" % (year, month)
Packit Service a31ea6
    return url
Packit Service a31ea6
Packit Service a31ea6
def scanXMLMsgArchive(url, title, force = 0):
Packit Service a31ea6
    if url == None or title == None:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    ID = checkXMLMsgArchive(url)
Packit Service a31ea6
    if force == 0 and ID != -1:
Packit Service a31ea6
        return 0
Packit Service a31ea6
Packit Service a31ea6
    if ID == -1:
Packit Service a31ea6
	ID = addXMLMsgArchive(url, title)
Packit Service a31ea6
	if ID == -1:
Packit Service a31ea6
	    return 0
Packit Service a31ea6
Packit Service a31ea6
    try:
Packit Service a31ea6
        print "Loading %s" % (url)
Packit Service a31ea6
        doc = libxml2.htmlParseFile(url, None);
Packit Service a31ea6
    except:
Packit Service a31ea6
        doc = None
Packit Service a31ea6
    if doc == None:
Packit Service a31ea6
        print "Failed to parse %s" % (url)
Packit Service a31ea6
	return 0
Packit Service a31ea6
Packit Service a31ea6
    addStringArchive(title, ID, 20)
Packit Service a31ea6
    ctxt = doc.xpathNewContext()
Packit Service a31ea6
    texts = ctxt.xpathEval("//pre//text()")
Packit Service a31ea6
    for text in texts:
Packit Service a31ea6
        addStringArchive(text.content, ID, 5)
Packit Service a31ea6
Packit Service a31ea6
    return 1
Packit Service a31ea6
Packit Service a31ea6
def scanXMLDateArchive(t = None, force = 0):
Packit Service a31ea6
    global wordsDictArchive
Packit Service a31ea6
Packit Service a31ea6
    wordsDictArchive = {}
Packit Service a31ea6
Packit Service a31ea6
    url = getXMLDateArchive(t)
Packit Service a31ea6
    print "loading %s" % (url)
Packit Service a31ea6
    try:
Packit Service a31ea6
	doc = libxml2.htmlParseFile(url, None);
Packit Service a31ea6
    except:
Packit Service a31ea6
        doc = None
Packit Service a31ea6
    if doc == None:
Packit Service a31ea6
        print "Failed to parse %s" % (url)
Packit Service a31ea6
	return -1
Packit Service a31ea6
    ctxt = doc.xpathNewContext()
Packit Service a31ea6
    anchors = ctxt.xpathEval("//a[@href]")
Packit Service a31ea6
    links = 0
Packit Service a31ea6
    newmsg = 0
Packit Service a31ea6
    for anchor in anchors:
Packit Service a31ea6
	href = anchor.prop("href")
Packit Service a31ea6
	if href == None or href[0:3] != "msg":
Packit Service a31ea6
	    continue
Packit Service a31ea6
        try:
Packit Service a31ea6
	    links = links + 1
Packit Service a31ea6
Packit Service a31ea6
	    msg = libxml2.buildURI(href, url)
Packit Service a31ea6
	    title = anchor.content
Packit Service a31ea6
	    if title != None and title[0:4] == 'Re: ':
Packit Service a31ea6
	        title = title[4:]
Packit Service a31ea6
	    if title != None and title[0:6] == '[xml] ':
Packit Service a31ea6
	        title = title[6:]
Packit Service a31ea6
	    newmsg = newmsg + scanXMLMsgArchive(msg, title, force)
Packit Service a31ea6
Packit Service a31ea6
	except:
Packit Service a31ea6
	    pass
Packit Service a31ea6
Packit Service a31ea6
    return newmsg
Packit Service a31ea6
    
Packit Service a31ea6
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
#									#
Packit Service a31ea6
#          Main code: open the DB, the API XML and analyze it		#
Packit Service a31ea6
#									#
Packit Service a31ea6
#########################################################################
Packit Service a31ea6
def analyzeArchives(t = None, force = 0):
Packit Service a31ea6
    global wordsDictArchive
Packit Service a31ea6
Packit Service a31ea6
    ret = scanXMLDateArchive(t, force)
Packit Service a31ea6
    print "Indexed %d words in %d archive pages" % (len(wordsDictArchive), ret)
Packit Service a31ea6
Packit Service a31ea6
    i = 0
Packit Service a31ea6
    skipped = 0
Packit Service a31ea6
    for word in wordsDictArchive.keys():
Packit Service a31ea6
	refs = wordsDictArchive[word]
Packit Service a31ea6
	if refs  == None:
Packit Service a31ea6
	    skipped = skipped + 1
Packit Service a31ea6
	    continue;
Packit Service a31ea6
	for id in refs.keys():
Packit Service a31ea6
	    relevance = refs[id]
Packit Service a31ea6
	    updateWordArchive(word, id, relevance)
Packit Service a31ea6
	    i = i + 1
Packit Service a31ea6
Packit Service a31ea6
    print "Found %d associations in HTML pages" % (i)
Packit Service a31ea6
Packit Service a31ea6
def analyzeHTMLTop():
Packit Service a31ea6
    global wordsDictHTML
Packit Service a31ea6
Packit Service a31ea6
    ret = analyzeHTMLPages()
Packit Service a31ea6
    print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret)
Packit Service a31ea6
Packit Service a31ea6
    i = 0
Packit Service a31ea6
    skipped = 0
Packit Service a31ea6
    for word in wordsDictHTML.keys():
Packit Service a31ea6
	refs = wordsDictHTML[word]
Packit Service a31ea6
	if refs  == None:
Packit Service a31ea6
	    skipped = skipped + 1
Packit Service a31ea6
	    continue;
Packit Service a31ea6
	for resource in refs.keys():
Packit Service a31ea6
	    (relevance, id, section) = refs[resource]
Packit Service a31ea6
	    updateWordHTML(word, resource, section, id, relevance)
Packit Service a31ea6
	    i = i + 1
Packit Service a31ea6
Packit Service a31ea6
    print "Found %d associations in HTML pages" % (i)
Packit Service a31ea6
Packit Service a31ea6
def analyzeAPITop():
Packit Service a31ea6
    global wordsDict
Packit Service a31ea6
    global API
Packit Service a31ea6
Packit Service a31ea6
    try:
Packit Service a31ea6
	doc = loadAPI(API)
Packit Service a31ea6
	ret = analyzeAPI(doc)
Packit Service a31ea6
	print "Analyzed %d blocs" % (ret)
Packit Service a31ea6
	doc.freeDoc()
Packit Service a31ea6
    except:
Packit Service a31ea6
	print "Failed to parse and analyze %s" % (API)
Packit Service a31ea6
	print sys.exc_type, sys.exc_value
Packit Service a31ea6
	sys.exit(1)
Packit Service a31ea6
Packit Service a31ea6
    print "Indexed %d words" % (len(wordsDict))
Packit Service a31ea6
    i = 0
Packit Service a31ea6
    skipped = 0
Packit Service a31ea6
    for word in wordsDict.keys():
Packit Service a31ea6
	refs = wordsDict[word]
Packit Service a31ea6
	if refs  == None:
Packit Service a31ea6
	    skipped = skipped + 1
Packit Service a31ea6
	    continue;
Packit Service a31ea6
	for (module, symbol) in refs.keys():
Packit Service a31ea6
	    updateWord(word, symbol, refs[(module, symbol)])
Packit Service a31ea6
	    i = i + 1
Packit Service a31ea6
Packit Service a31ea6
    print "Found %d associations, skipped %d words" % (i, skipped)
Packit Service a31ea6
Packit Service a31ea6
def usage():
Packit Service a31ea6
    print "Usage index.py [--force] [--archive]  [--archive-year year] [--archive-month month] [--API] [--docs]"
Packit Service a31ea6
    sys.exit(1)
Packit Service a31ea6
Packit Service a31ea6
def main():
Packit Service a31ea6
    try:
Packit Service a31ea6
	openMySQL()
Packit Service a31ea6
    except:
Packit Service a31ea6
	print "Failed to open the database"
Packit Service a31ea6
	print sys.exc_type, sys.exc_value
Packit Service a31ea6
	sys.exit(1)
Packit Service a31ea6
Packit Service a31ea6
    args = sys.argv[1:]
Packit Service a31ea6
    force = 0
Packit Service a31ea6
    if args:
Packit Service a31ea6
        i = 0
Packit Service a31ea6
	while i < len(args):
Packit Service a31ea6
	    if args[i] == '--force':
Packit Service a31ea6
	        force = 1
Packit Service a31ea6
	    elif args[i] == '--archive':
Packit Service a31ea6
	        analyzeArchives(None, force)
Packit Service a31ea6
	    elif args[i] == '--archive-year':
Packit Service a31ea6
	        i = i + 1;
Packit Service a31ea6
		year = args[i]
Packit Service a31ea6
		months = ["January" , "February", "March", "April", "May",
Packit Service a31ea6
			  "June", "July", "August", "September", "October",
Packit Service a31ea6
			  "November", "December"];
Packit Service a31ea6
	        for month in months:
Packit Service a31ea6
		    try:
Packit Service a31ea6
		        str = "%s-%s" % (year, month)
Packit Service a31ea6
			T = time.strptime(str, "%Y-%B")
Packit Service a31ea6
			t = time.mktime(T) + 3600 * 24 * 10;
Packit Service a31ea6
			analyzeArchives(t, force)
Packit Service a31ea6
		    except:
Packit Service a31ea6
			print "Failed to index month archive:"
Packit Service a31ea6
			print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    elif args[i] == '--archive-month':
Packit Service a31ea6
	        i = i + 1;
Packit Service a31ea6
		month = args[i]
Packit Service a31ea6
		try:
Packit Service a31ea6
		    T = time.strptime(month, "%Y-%B")
Packit Service a31ea6
		    t = time.mktime(T) + 3600 * 24 * 10;
Packit Service a31ea6
		    analyzeArchives(t, force)
Packit Service a31ea6
		except:
Packit Service a31ea6
		    print "Failed to index month archive:"
Packit Service a31ea6
		    print sys.exc_type, sys.exc_value
Packit Service a31ea6
	    elif args[i] == '--API':
Packit Service a31ea6
	        analyzeAPITop()
Packit Service a31ea6
	    elif args[i] == '--docs':
Packit Service a31ea6
	        analyzeHTMLTop()
Packit Service a31ea6
	    else:
Packit Service a31ea6
	        usage()
Packit Service a31ea6
	    i = i + 1
Packit Service a31ea6
    else:
Packit Service a31ea6
        usage()
Packit Service a31ea6
Packit Service a31ea6
if __name__ == "__main__":
Packit Service a31ea6
    main()