|
Packit |
1184b9 |
#include <string.h>
|
|
Packit |
1184b9 |
#include <stdlib.h>
|
|
Packit |
1184b9 |
#include <stdio.h>
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
#include "mythes.hxx"
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
#include <hunspell.hxx>
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
extern char * mystrdup(const char * s);
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
void myfreelist(char *** list, int n)
|
|
Packit |
1184b9 |
{
|
|
Packit |
1184b9 |
if (list && (n > 0)) {
|
|
Packit |
1184b9 |
for (int i = 0; i < n; i++) if ((*list)[i]) free((*list)[i]);
|
|
Packit |
1184b9 |
free(*list);
|
|
Packit |
1184b9 |
*list = NULL;
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
int main(int argc, char** argv)
|
|
Packit |
1184b9 |
{
|
|
Packit |
1184b9 |
FILE* wtclst;
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
/* first parse the command line options
|
|
Packit |
1184b9 |
* arg1 - index file, arg2 thesaurus data file, arg3 - file of words to check,
|
|
Packit |
1184b9 |
* arg4, arg5 - opt. Hunspell affix and dic file for stemming and
|
|
Packit |
1184b9 |
* morphological generation
|
|
Packit |
1184b9 |
*/
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
if (argc < 3) {
|
|
Packit |
1184b9 |
fprintf(stderr,"correct syntax is:\n");
|
|
Packit |
1184b9 |
fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check [affix_file dic_file]\n");
|
|
Packit |
1184b9 |
exit(1);
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
/* open the words to check list */
|
|
Packit |
1184b9 |
wtclst = fopen(argv[3], "r");
|
|
Packit |
1184b9 |
if (!wtclst) {
|
|
Packit |
1184b9 |
fprintf(stderr,"Error - could not open file of words to check\n");
|
|
Packit |
1184b9 |
exit(1);
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
// Hunspell for stemming and morphological generation of affixes synonyms
|
|
Packit |
1184b9 |
Hunspell * pH = NULL;
|
|
Packit |
1184b9 |
if (argc >= 5) pH = new Hunspell(argv[4], argv[5], (const char *) NULL);
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
// open a new thesaurus object
|
|
Packit |
1184b9 |
MyThes * pMT = new MyThes(argv[1], argv[2]);
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
// get the encoding used for the thesaurus data
|
|
Packit |
1184b9 |
char * encoding = pMT->get_th_encoding();
|
|
Packit |
1184b9 |
printf("Thesaurus uses encoding %s\n\n", encoding);
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
int k;
|
|
Packit |
1184b9 |
char buf[101];
|
|
Packit |
1184b9 |
char oldbuf[101];
|
|
Packit |
1184b9 |
mentry * pmean;
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
while(fgets(buf,100,wtclst)) {
|
|
Packit |
1184b9 |
oldbuf[0] = '\0';
|
|
Packit |
1184b9 |
k = strlen(buf);
|
|
Packit |
1184b9 |
*(buf + k - 1) = '\0';
|
|
Packit |
1184b9 |
int len = strlen(buf);
|
|
Packit |
1184b9 |
int count = pMT->Lookup(buf,len,&pmean);
|
|
Packit |
1184b9 |
// don't change value of pmean
|
|
Packit |
1184b9 |
// or count since needed for CleanUpAfterLookup routine
|
|
Packit |
1184b9 |
if (!count) {
|
|
Packit |
1184b9 |
int stemcount = 0;
|
|
Packit |
1184b9 |
char **stem;
|
|
Packit |
1184b9 |
if (pH) stemcount = pH->stem(&stem, buf); else stemcount = 0;
|
|
Packit |
1184b9 |
if (stemcount) {
|
|
Packit |
1184b9 |
printf("stem: %s\n", stem[0]);
|
|
Packit |
1184b9 |
strncpy(oldbuf,buf, sizeof(oldbuf)-1);
|
|
Packit |
1184b9 |
oldbuf[sizeof(oldbuf)-1] = 0;
|
|
Packit |
1184b9 |
strncpy(buf, stem[0], sizeof(buf)-1);
|
|
Packit |
1184b9 |
buf[sizeof(buf)-1] = 0;
|
|
Packit |
1184b9 |
len = strlen(buf);
|
|
Packit |
1184b9 |
count = pMT->Lookup(buf, len, &pmean);
|
|
Packit |
1184b9 |
myfreelist(&stem, stemcount);
|
|
Packit |
1184b9 |
} else oldbuf[0] = '\0';
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
mentry* pm = pmean;
|
|
Packit |
1184b9 |
if (count) {
|
|
Packit |
1184b9 |
printf("%s has %d meanings\n",buf,count);
|
|
Packit |
1184b9 |
for (int i=0; i < count; i++) {
|
|
Packit |
1184b9 |
printf(" meaning %d: %s\n",i,pm->defn);
|
|
Packit |
1184b9 |
for (int j=0; j < pm->count; j++) {
|
|
Packit |
1184b9 |
char ** gen;
|
|
Packit |
1184b9 |
int l = 0;
|
|
Packit |
1184b9 |
if (pH && oldbuf[0]) l = pH->generate(&gen, pm->psyns[j], oldbuf);
|
|
Packit |
1184b9 |
if (l) {
|
|
Packit |
1184b9 |
int k;
|
|
Packit |
1184b9 |
printf(" %s",gen[0]);
|
|
Packit |
1184b9 |
for (k = 1; k < l; k++) printf(", %s",gen[k]);
|
|
Packit |
1184b9 |
printf("\n");
|
|
Packit |
1184b9 |
myfreelist(&gen, l);
|
|
Packit |
1184b9 |
} else printf(" %s\n",pm->psyns[j]);
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
printf("\n");
|
|
Packit |
1184b9 |
pm++;
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
printf("\n\n");
|
|
Packit |
1184b9 |
// now clean up all allocated memory
|
|
Packit |
1184b9 |
pMT->CleanUpAfterLookup(&pmean,count);
|
|
Packit |
1184b9 |
} else {
|
|
Packit |
1184b9 |
printf("\"%s\" is not in thesaurus!\n",buf);
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|
|
Packit |
1184b9 |
fclose(wtclst);
|
|
Packit |
1184b9 |
delete pMT;
|
|
Packit |
1184b9 |
if (pH) delete pH;
|
|
Packit |
1184b9 |
return 0;
|
|
Packit |
1184b9 |
}
|
|
Packit |
1184b9 |
|