Blame addon/doxysearch/doxysearch.cpp

Packit 1c1d7e
/******************************************************************************
Packit 1c1d7e
 *
Packit 1c1d7e
 * Copyright (C) 1997-2015 by Dimitri van Heesch.
Packit 1c1d7e
 *
Packit 1c1d7e
 * Permission to use, copy, modify, and distribute this software and its
Packit 1c1d7e
 * documentation under the terms of the GNU General Public License is hereby 
Packit 1c1d7e
 * granted. No representations are made about the suitability of this software 
Packit 1c1d7e
 * for any purpose. It is provided "as is" without express or implied warranty.
Packit 1c1d7e
 * See the GNU General Public License for more details.
Packit 1c1d7e
 *
Packit 1c1d7e
 * Documents produced by Doxygen are derivative works derived from the
Packit 1c1d7e
 * input used in their production; they are not affected by this license.
Packit 1c1d7e
 *
Packit 1c1d7e
 */
Packit 1c1d7e
Packit 1c1d7e
// STL includes
Packit 1c1d7e
#include <cstdio>
Packit 1c1d7e
#include <cstdlib>
Packit 1c1d7e
#include <string>
Packit 1c1d7e
#include <vector>
Packit 1c1d7e
#include <sstream>
Packit 1c1d7e
#include <iostream>
Packit 1c1d7e
#include <fstream>
Packit 1c1d7e
#include <string>
Packit 1c1d7e
#include <algorithm>
Packit 1c1d7e
Packit 1c1d7e
// Xapian includes
Packit 1c1d7e
#include <xapian.h>
Packit 1c1d7e
Packit 1c1d7e
#ifdef _WIN32
Packit 1c1d7e
#include <windows.h>
Packit 1c1d7e
#else
Packit 1c1d7e
#include <sys/stat.h>
Packit 1c1d7e
#endif
Packit 1c1d7e
Packit 1c1d7e
#define FIELD_TYPE 1
Packit 1c1d7e
#define FIELD_NAME 2
Packit 1c1d7e
#define FIELD_ARGS 3
Packit 1c1d7e
#define FIELD_TAG  4
Packit 1c1d7e
#define FIELD_URL  5
Packit 1c1d7e
#define FIELD_KEYW 6
Packit 1c1d7e
#define FIELD_DOC  7
Packit 1c1d7e
Packit 1c1d7e
#define HEX2DEC(x) (((x)>='0' && (x)<='9')?((x)-'0'):\
Packit 1c1d7e
                    ((x)>='a' && (x)<='f')?((x)-'a'+10):\
Packit 1c1d7e
                    ((x)>='A' && (x)<='F')?((x)-'A'+10):-1)
Packit 1c1d7e
Packit 1c1d7e
Packit 1c1d7e
bool dirExists(const std::string& dirName)
Packit 1c1d7e
{
Packit 1c1d7e
#ifdef _WIN32
Packit 1c1d7e
  DWORD ftyp = GetFileAttributesA(dirName.c_str());
Packit 1c1d7e
  if (ftyp == INVALID_FILE_ATTRIBUTES)
Packit 1c1d7e
    return false;  //something is wrong with your path!
Packit 1c1d7e
Packit 1c1d7e
  if (ftyp & FILE_ATTRIBUTE_DIRECTORY)
Packit 1c1d7e
    return true;   // this is a directory!
Packit 1c1d7e
#else
Packit 1c1d7e
  struct stat sb;
Packit 1c1d7e
Packit 1c1d7e
  if (stat(dirName.c_str(), &sb)==0 && S_ISDIR(sb.st_mode))
Packit 1c1d7e
  {
Packit 1c1d7e
    return true;
Packit 1c1d7e
  }
Packit 1c1d7e
#endif
Packit 1c1d7e
Packit 1c1d7e
  return false;
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
Packit 1c1d7e
/** decodes a URI encoded string into a normal string. */
Packit 1c1d7e
static std::string uriDecode(const std::string & sSrc)
Packit 1c1d7e
{
Packit 1c1d7e
  // Note from RFC1630: "Sequences which start with a percent
Packit 1c1d7e
  // sign but are not followed by two hexadecimal characters
Packit 1c1d7e
  // (0-9, A-F) are reserved for future extension"
Packit 1c1d7e
Packit 1c1d7e
  const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
Packit 1c1d7e
  const int SRC_LEN = sSrc.length();
Packit 1c1d7e
  const unsigned char * const SRC_END = pSrc + SRC_LEN;
Packit 1c1d7e
  // last decodable '%'
Packit 1c1d7e
  const unsigned char * const SRC_LAST_DEC = SRC_END - 2;
Packit 1c1d7e
Packit 1c1d7e
  char * const pStart = new char[SRC_LEN];
Packit 1c1d7e
  char * pEnd = pStart;
Packit 1c1d7e
Packit 1c1d7e
  while (pSrc < SRC_LAST_DEC)
Packit 1c1d7e
  {
Packit 1c1d7e
    if (*pSrc == '%') // replace %2A with corresponding ASCII character
Packit 1c1d7e
    {
Packit 1c1d7e
      char dec1, dec2;
Packit 1c1d7e
      unsigned char c1=*(pSrc+1);
Packit 1c1d7e
      unsigned char c2=*(pSrc+2);
Packit 1c1d7e
      if (-1 != (dec1 = HEX2DEC(c1))
Packit 1c1d7e
       && -1 != (dec2 = HEX2DEC(c2)))
Packit 1c1d7e
      {
Packit 1c1d7e
        *pEnd++ = (dec1 << 4) + dec2;
Packit 1c1d7e
        pSrc += 3;
Packit 1c1d7e
        continue;
Packit 1c1d7e
      }
Packit 1c1d7e
    }
Packit 1c1d7e
    else if (*pSrc == '+') // replace '+' with space
Packit 1c1d7e
    {
Packit 1c1d7e
      *pEnd++ = ' '; pSrc++;
Packit 1c1d7e
      continue;
Packit 1c1d7e
    }
Packit 1c1d7e
    *pEnd++ = *pSrc++;
Packit 1c1d7e
  }
Packit 1c1d7e
Packit 1c1d7e
  // the last 2- chars
Packit 1c1d7e
  while (pSrc < SRC_END) *pEnd++ = *pSrc++;
Packit 1c1d7e
Packit 1c1d7e
  std::string sResult(pStart, pEnd);
Packit 1c1d7e
  delete [] pStart;
Packit 1c1d7e
  return sResult;
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** return list of strings that result when splitting \a s using 
Packit 1c1d7e
 *  delimiter \a delim 
Packit 1c1d7e
 */
Packit 1c1d7e
static std::vector<std::string> split(const std::string &s, char delim) 
Packit 1c1d7e
{
Packit 1c1d7e
  std::vector<std::string> elems;
Packit 1c1d7e
  std::stringstream ss(s);
Packit 1c1d7e
  std::string item;
Packit 1c1d7e
  while (getline(ss, item, delim)) elems.push_back(item);
Packit 1c1d7e
  return elems;
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** Read type T from string \a s */
Packit 1c1d7e
template<class T>
Packit 1c1d7e
T fromString(const std::string& s)
Packit 1c1d7e
{
Packit 1c1d7e
  std::istringstream stream (s);
Packit 1c1d7e
  T t;
Packit 1c1d7e
  stream >> t;
Packit 1c1d7e
  return t;
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** Class that holds the starting position of a word */
Packit 1c1d7e
struct WordPosition
Packit 1c1d7e
{
Packit 1c1d7e
  WordPosition(int s,int i) : start(s), index(i) {}
Packit 1c1d7e
  int start;
Packit 1c1d7e
  int index;
Packit 1c1d7e
};
Packit 1c1d7e
Packit 1c1d7e
/** Class representing the '<' operator for WordPosition objects based on position. */
Packit 1c1d7e
struct WordPosition_less
Packit 1c1d7e
{
Packit 1c1d7e
  bool operator()(const WordPosition &p1,const WordPosition &p2)
Packit 1c1d7e
  {
Packit 1c1d7e
    return p1.start
Packit 1c1d7e
  }
Packit 1c1d7e
};
Packit 1c1d7e
Packit 1c1d7e
/** Class that holds a text fragment */
Packit 1c1d7e
struct Fragment
Packit 1c1d7e
{
Packit 1c1d7e
  Fragment(const std::string &t,int occ) : text(t), occurrences(occ) {}
Packit 1c1d7e
  std::string text;
Packit 1c1d7e
  int occurrences;
Packit 1c1d7e
};
Packit 1c1d7e
Packit 1c1d7e
/** Class representing the '>' operator for Fragment objects based on occurrence. */
Packit 1c1d7e
struct Fragment_greater
Packit 1c1d7e
{
Packit 1c1d7e
  bool operator()(const Fragment &p1,const Fragment &p2)
Packit 1c1d7e
  {
Packit 1c1d7e
    return p1.occurrences>p2.occurrences;
Packit 1c1d7e
  }
Packit 1c1d7e
};
Packit 1c1d7e
Packit 1c1d7e
/** Class representing a range within a string */
Packit 1c1d7e
struct Range
Packit 1c1d7e
{
Packit 1c1d7e
  Range(int s,int e) : start(s), end(e) {}
Packit 1c1d7e
  int start;
Packit 1c1d7e
  int end;
Packit 1c1d7e
};
Packit 1c1d7e
Packit 1c1d7e
/** Returns true if [start..start+len] is inside one of the \a ranges. */
Packit 1c1d7e
static bool insideRange(const std::vector<Range> &ranges,int start,int len)
Packit 1c1d7e
{
Packit 1c1d7e
  for (std::vector<Range>::const_iterator it = ranges.begin();
Packit 1c1d7e
       it!=ranges.end(); ++it
Packit 1c1d7e
      )
Packit 1c1d7e
  {
Packit 1c1d7e
    Range r = *it;
Packit 1c1d7e
    if (start>=r.start && start+len
Packit 1c1d7e
    {
Packit 1c1d7e
      return true;
Packit 1c1d7e
    }
Packit 1c1d7e
  }
Packit 1c1d7e
  return false;
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** Returns a list of text \a fragments from \a s containing one or
Packit 1c1d7e
 *  more \a words. The list is sorted according to the 
Packit 1c1d7e
 *  number of occurrences of words within the fragment.
Packit 1c1d7e
 */ 
Packit 1c1d7e
static void highlighter(const std::string &s,
Packit 1c1d7e
                 const std::vector<std::string> &words,
Packit 1c1d7e
                 std::vector<Fragment> &fragments)
Packit 1c1d7e
{
Packit 1c1d7e
  const std::string spanStart="";
Packit 1c1d7e
  const std::string spanEnd="";
Packit 1c1d7e
  const std::string dots="...";
Packit 1c1d7e
  const int fragLen = 60;
Packit 1c1d7e
  int sl=s.length();
Packit 1c1d7e
Packit 1c1d7e
  // find positions of words in s
Packit 1c1d7e
  size_t j=0;
Packit 1c1d7e
  std::vector<WordPosition> positions;
Packit 1c1d7e
  for (std::vector<std::string>::const_iterator it=words.begin();
Packit 1c1d7e
       it!=words.end();
Packit 1c1d7e
       ++it,++j
Packit 1c1d7e
      )
Packit 1c1d7e
  {
Packit 1c1d7e
    int pos=0;
Packit 1c1d7e
    size_t i;
Packit 1c1d7e
    std::string word = *it;
Packit 1c1d7e
    while ((i=s.find(word,pos))!=std::string::npos) 
Packit 1c1d7e
    {
Packit 1c1d7e
      positions.push_back(WordPosition(i,j));
Packit 1c1d7e
      pos=i+word.length();
Packit 1c1d7e
    }
Packit 1c1d7e
  }
Packit 1c1d7e
  // sort on position
Packit 1c1d7e
  std::sort(positions.begin(),positions.end(),WordPosition_less());
Packit 1c1d7e
  // get fragments around words
Packit 1c1d7e
  std::vector<Range> ranges;
Packit 1c1d7e
  for (std::vector<WordPosition>::const_iterator it=positions.begin();
Packit 1c1d7e
       it!=positions.end();
Packit 1c1d7e
       ++it)
Packit 1c1d7e
  {
Packit 1c1d7e
    WordPosition wp = *it;
Packit 1c1d7e
    std::string w = words[wp.index];
Packit 1c1d7e
    int i=wp.start;
Packit 1c1d7e
    int wl=w.length();
Packit 1c1d7e
    if (!insideRange(ranges,i,wl))
Packit 1c1d7e
    {
Packit 1c1d7e
      if (wl>fragLen)
Packit 1c1d7e
      {
Packit 1c1d7e
        fragments.push_back(Fragment(spanStart+w+spanEnd,1));
Packit 1c1d7e
        ranges.push_back(Range(i,i+wl));
Packit 1c1d7e
      }
Packit 1c1d7e
      else
Packit 1c1d7e
      {
Packit 1c1d7e
        std::string startFragment,endFragment;
Packit 1c1d7e
        int bi=i-(fragLen-wl)/2;
Packit 1c1d7e
        int ei=i+wl+(fragLen-wl)/2;
Packit 1c1d7e
        int occ=0;
Packit 1c1d7e
        if (bi<0)  { ei-=bi; bi=0; } else startFragment=dots;
Packit 1c1d7e
        if (ei>sl) { ei=sl; }        else endFragment=dots;
Packit 1c1d7e
        while (bi>0  && !isspace(s[bi])) bi--; // round to start of the word
Packit 1c1d7e
        while (ei
Packit 1c1d7e
        // highlight any word in s between indexes bi and ei
Packit 1c1d7e
        std::string fragment=startFragment;
Packit 1c1d7e
        int pos=bi;
Packit 1c1d7e
        for (std::vector<WordPosition>::const_iterator it2=positions.begin();
Packit 1c1d7e
            it2!=positions.end();
Packit 1c1d7e
            ++it2)
Packit 1c1d7e
        {
Packit 1c1d7e
          WordPosition wp2 = *it2;
Packit 1c1d7e
          std::string w2 = words[wp2.index];
Packit 1c1d7e
          int wl2 = w2.length();
Packit 1c1d7e
          if (wp2.start>=bi && wp2.start+wl2<=ei) // word is inside the range!
Packit 1c1d7e
          {
Packit 1c1d7e
            fragment+=s.substr(pos,wp2.start-pos)+
Packit 1c1d7e
              spanStart+
Packit 1c1d7e
              s.substr(wp2.start,wl2)+
Packit 1c1d7e
              spanEnd;
Packit 1c1d7e
            pos=wp2.start+wl2;
Packit 1c1d7e
            occ++;
Packit 1c1d7e
          }
Packit 1c1d7e
        }
Packit 1c1d7e
        fragment+=s.substr(pos,ei-pos)+endFragment;
Packit 1c1d7e
        fragments.push_back(Fragment(fragment,occ));
Packit 1c1d7e
        ranges.push_back(Range(bi,ei));
Packit 1c1d7e
      }
Packit 1c1d7e
    }
Packit 1c1d7e
  }
Packit 1c1d7e
  std::sort(fragments.begin(),fragments.end(),Fragment_greater());
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** Escapes a string such that is can be included in a JSON structure */
Packit 1c1d7e
static std::string escapeString(const std::string &s)
Packit 1c1d7e
{
Packit 1c1d7e
  std::stringstream dst;
Packit 1c1d7e
  for (unsigned int i=0;i
Packit 1c1d7e
  {
Packit 1c1d7e
    char ch = s[i];
Packit 1c1d7e
    switch (ch) 
Packit 1c1d7e
    {
Packit 1c1d7e
      case '\"': dst << "\\\""; break;
Packit 1c1d7e
      default: dst << ch; break;
Packit 1c1d7e
    }
Packit 1c1d7e
  }
Packit 1c1d7e
  return dst.str();
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
static void showError(const std::string &callback,const std::string &error)
Packit 1c1d7e
{
Packit 1c1d7e
  std::cout << callback << "({\"error\":\"" << error << "\"})";
Packit 1c1d7e
  exit(0);
Packit 1c1d7e
}
Packit 1c1d7e
Packit 1c1d7e
/** Main routine */
Packit 1c1d7e
int main(int argc,char **argv)
Packit 1c1d7e
{
Packit 1c1d7e
  // process inputs that were passed to us via QUERY_STRING
Packit 1c1d7e
  std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n";
Packit 1c1d7e
  std::string callback;
Packit 1c1d7e
  try
Packit 1c1d7e
  {
Packit 1c1d7e
    // get input parameters
Packit 1c1d7e
    const char *queryEnv = getenv("QUERY_STRING");
Packit 1c1d7e
    std::string queryString;
Packit 1c1d7e
    if (queryEnv)
Packit 1c1d7e
    {
Packit 1c1d7e
      queryString = queryEnv;
Packit 1c1d7e
    }
Packit 1c1d7e
    else if (argc>=2)
Packit 1c1d7e
    {
Packit 1c1d7e
      queryString = argv[1];
Packit 1c1d7e
    }
Packit 1c1d7e
    else
Packit 1c1d7e
    {
Packit 1c1d7e
      std::cout << "No input!\n";
Packit 1c1d7e
      exit(1);
Packit 1c1d7e
    }
Packit 1c1d7e
Packit 1c1d7e
    // parse query string
Packit 1c1d7e
    std::vector<std::string> parts = split(queryString,'&';;
Packit 1c1d7e
    std::string searchFor,callback;
Packit 1c1d7e
    int num=1,page=0;
Packit 1c1d7e
    for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it)
Packit 1c1d7e
    {
Packit 1c1d7e
      std::vector<std::string> kv = split(*it,'=');
Packit 1c1d7e
      if (kv.size()==2)
Packit 1c1d7e
      {
Packit 1c1d7e
        std::string val = uriDecode(kv[1]);
Packit 1c1d7e
        if      (kv[0]=="q")  searchFor = val; 
Packit 1c1d7e
        else if (kv[0]=="n")  num       = fromString<int>(val);
Packit 1c1d7e
        else if (kv[0]=="p")  page      = fromString<int>(val);
Packit 1c1d7e
        else if (kv[0]=="cb") callback  = val;
Packit 1c1d7e
      }
Packit 1c1d7e
    }
Packit 1c1d7e
Packit 1c1d7e
    std::string indexDir = "doxysearch.db";
Packit 1c1d7e
Packit 1c1d7e
    if (queryString=="test") // user test
Packit 1c1d7e
    {
Packit 1c1d7e
      bool dbOk = dirExists(indexDir);
Packit 1c1d7e
      if (dbOk)
Packit 1c1d7e
      {
Packit 1c1d7e
        std::cout << "Test successful.";
Packit 1c1d7e
      }
Packit 1c1d7e
      else
Packit 1c1d7e
      {
Packit 1c1d7e
        std::cout << "Test failed: cannot find search index " << indexDir;
Packit 1c1d7e
      }
Packit 1c1d7e
      exit(0);
Packit 1c1d7e
    }
Packit 1c1d7e
Packit 1c1d7e
    // create query
Packit 1c1d7e
    Xapian::Database db(indexDir);
Packit 1c1d7e
    Xapian::Enquire enquire(db);
Packit 1c1d7e
    Xapian::Query query;
Packit 1c1d7e
    std::vector<std::string> words = split(searchFor,' ');
Packit 1c1d7e
    for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it)
Packit 1c1d7e
    {
Packit 1c1d7e
      query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it));
Packit 1c1d7e
    }
Packit 1c1d7e
    enquire.set_query(query);
Packit 1c1d7e
Packit 1c1d7e
    // get results
Packit 1c1d7e
    Xapian::MSet matches = enquire.get_mset(page*num,num);
Packit 1c1d7e
    unsigned int hits    = matches.get_matches_estimated();
Packit 1c1d7e
    unsigned int offset  = page*num;
Packit 1c1d7e
    unsigned int pages   = num>0 ? (hits+num-1)/num : 0;
Packit 1c1d7e
    if (offset>hits)     offset=hits;
Packit 1c1d7e
    if (offset+num>hits) num=hits-offset;
Packit 1c1d7e
Packit 1c1d7e
    // write results as JSONP
Packit 1c1d7e
    std::cout << callback.c_str() << "(";
Packit 1c1d7e
    std::cout << "{" << std::endl 
Packit 1c1d7e
              << "  \"hits\":"   << hits   << "," << std::endl
Packit 1c1d7e
              << "  \"first\":"  << offset << "," << std::endl
Packit 1c1d7e
              << "  \"count\":"  << num    << "," << std::endl
Packit 1c1d7e
              << "  \"page\":"   << page   << "," << std::endl
Packit 1c1d7e
              << "  \"pages\":"  << pages  << "," << std::endl
Packit 1c1d7e
              << "  \"query\": \""  << escapeString(searchFor)  << "\"," << std::endl
Packit 1c1d7e
              << "  \"items\":[" << std::endl;
Packit 1c1d7e
    // foreach search result
Packit 1c1d7e
    unsigned int o = offset;
Packit 1c1d7e
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o) 
Packit 1c1d7e
    {
Packit 1c1d7e
      std::vector<Fragment> hl;
Packit 1c1d7e
      Xapian::Document doc = i.get_document();
Packit 1c1d7e
      highlighter(doc.get_value(FIELD_DOC),words,hl);
Packit 1c1d7e
      std::cout << "  {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl
Packit 1c1d7e
                << "   \"name\": \"" << doc.get_value(FIELD_NAME) << escapeString(doc.get_value(FIELD_ARGS)) << "\"," << std::endl
Packit 1c1d7e
                << "   \"tag\": \""  << doc.get_value(FIELD_TAG) << "\"," << std::endl
Packit 1c1d7e
                << "   \"url\": \""  << doc.get_value(FIELD_URL) << "\"," << std::endl;
Packit 1c1d7e
      std::cout << "   \"fragments\":[" << std::endl;
Packit 1c1d7e
      int c=0;
Packit 1c1d7e
      bool first=true;
Packit 1c1d7e
      for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c)
Packit 1c1d7e
      {
Packit 1c1d7e
        if (!first) std::cout << "," << std::endl;
Packit 1c1d7e
        std::cout << "     \"" << escapeString((*it).text) << "\"";
Packit 1c1d7e
        first=false;
Packit 1c1d7e
      }
Packit 1c1d7e
      if (!first) std::cout << std::endl;
Packit 1c1d7e
      std::cout << "   ]" << std::endl;
Packit 1c1d7e
      std::cout << "  }";
Packit 1c1d7e
      if (o
Packit 1c1d7e
      std::cout << std::endl;
Packit 1c1d7e
    }
Packit 1c1d7e
    std::cout << " ]" << std::endl << "})" << std::endl;
Packit 1c1d7e
  } 
Packit 1c1d7e
  catch (const Xapian::Error &e) // Xapian exception
Packit 1c1d7e
  {
Packit 1c1d7e
    showError(callback,e.get_description());
Packit 1c1d7e
  } 
Packit 1c1d7e
  catch (...) // Any other exception
Packit 1c1d7e
  {
Packit 1c1d7e
    showError(callback,"Unknown Exception!");
Packit 1c1d7e
    exit(1);
Packit 1c1d7e
  }
Packit 1c1d7e
  return 0;
Packit 1c1d7e
}