Blame example.c

Packit 0d49a1
#include <string.h>
Packit 0d49a1
#include <stdlib.h>
Packit 0d49a1
#include <stdio.h>
Packit 0d49a1
#include <ctype.h>
Packit 0d49a1
Packit 0d49a1
#include "hyphen.h"
Packit 0d49a1
Packit 0d49a1
#define BUFSIZE 1000
Packit 0d49a1
Packit 0d49a1
void help() {
Packit 0d49a1
    fprintf(stderr,"correct syntax is:\n"); 
Packit 0d49a1
    fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
Packit 0d49a1
    fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
Packit 0d49a1
    fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
Packit 0d49a1
    fprintf(stderr,"-n = print hyphenation vector\n");
Packit 0d49a1
}
Packit 0d49a1
Packit 0d49a1
/* get a pointer to the nth 8-bit or UTF-8 character of the word */
Packit 0d49a1
char * hindex(char * word, int n, int utf8) {
Packit 0d49a1
    int j = 0;
Packit 0d49a1
    while (j < n) {
Packit 0d49a1
        j++;
Packit 0d49a1
        word++;
Packit 0d49a1
        while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
Packit 0d49a1
    }
Packit 0d49a1
    return word;
Packit 0d49a1
}
Packit 0d49a1
Packit 0d49a1
/* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
Packit 0d49a1
void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
Packit 0d49a1
    int i, k, j = 0;
Packit 0d49a1
    char r;
Packit 0d49a1
    for (i = 0; (i + 1) < strlen(word); i++) {
Packit 0d49a1
        if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
Packit 0d49a1
        if ((hyphen[j] & 1)) {
Packit 0d49a1
            if (rep && rep[j]) {
Packit 0d49a1
              k = hindex(word, j - pos[j] + 1, utf8) - word;
Packit 0d49a1
              r = word[k];
Packit 0d49a1
              word[k] = 0;
Packit 0d49a1
              printf(" - %s%s", word, rep[j]);
Packit 0d49a1
              word[k] = r;
Packit 0d49a1
              printf("%s\n", hindex(word + k, cut[j], utf8));
Packit 0d49a1
            } else {
Packit 0d49a1
              k = hindex(word, j + 1, utf8) - word;
Packit 0d49a1
              r = word[k];
Packit 0d49a1
              word[k] = 0;
Packit 0d49a1
              printf(" - %s=", word);
Packit 0d49a1
              word[k] = r;
Packit 0d49a1
              printf("%s\n", word + k);
Packit 0d49a1
            }
Packit 0d49a1
        }
Packit 0d49a1
        j++;
Packit 0d49a1
    }
Packit 0d49a1
}
Packit 0d49a1
Packit 0d49a1
int 
Packit 0d49a1
main(int argc, char** argv)
Packit 0d49a1
{
Packit 0d49a1
Packit 0d49a1
    HyphenDict *dict;
Packit 0d49a1
    int df;
Packit 0d49a1
    int wtc;
Packit 0d49a1
    FILE* wtclst;
Packit 0d49a1
    int k, n, i, j, c;
Packit 0d49a1
    char buf[BUFSIZE + 1];
Packit 0d49a1
    int  nHyphCount;
Packit 0d49a1
    char *hyphens;
Packit 0d49a1
    char *lcword;
Packit 0d49a1
    char *hyphword;
Packit 0d49a1
    char hword[BUFSIZE * 2];
Packit 0d49a1
    int arg = 1;
Packit 0d49a1
    int optd = 1;
Packit 0d49a1
    int optn = 0;
Packit 0d49a1
    int optdd = 0;
Packit 0d49a1
    char ** rep;
Packit 0d49a1
    int * pos;
Packit 0d49a1
    int * cut;
Packit 0d49a1
Packit 0d49a1
  /* first parse the command line options */
Packit 0d49a1
  /* arg1 - hyphen dictionary file, arg2 - file of words to check */
Packit 0d49a1
Packit 0d49a1
  if (argv[arg]) {
Packit 0d49a1
       if (strcmp(argv[arg], "-o") == 0) {
Packit 0d49a1
            optd = 0;
Packit 0d49a1
            arg++;
Packit 0d49a1
       }
Packit 0d49a1
       if (strcmp(argv[arg], "-n") == 0) {
Packit 0d49a1
            optn = 1;
Packit 0d49a1
            arg++;
Packit 0d49a1
       }
Packit 0d49a1
       if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
Packit 0d49a1
            optd = 1;
Packit 0d49a1
            optdd = 1;
Packit 0d49a1
            arg++;
Packit 0d49a1
       }
Packit 0d49a1
  }
Packit 0d49a1
Packit 0d49a1
  if (argv[arg]) {
Packit 0d49a1
       df = arg++;
Packit 0d49a1
  } else {
Packit 0d49a1
    help();
Packit 0d49a1
    exit(1);
Packit 0d49a1
  }
Packit 0d49a1
Packit 0d49a1
  if (argv[arg]) {
Packit 0d49a1
       wtc = arg++;
Packit 0d49a1
  } else {
Packit 0d49a1
    help();
Packit 0d49a1
    exit(1);
Packit 0d49a1
  }
Packit 0d49a1
Packit 0d49a1
  /* load the hyphenation dictionary */  
Packit 0d49a1
  if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
Packit 0d49a1
       fprintf(stderr, "Couldn't find file %s\n", argv[df]);
Packit 0d49a1
       fflush(stderr);
Packit 0d49a1
       exit(1);
Packit 0d49a1
  }
Packit 0d49a1
Packit 0d49a1
  /* open the words to check list */
Packit 0d49a1
  wtclst = fopen(argv[wtc],"r");
Packit 0d49a1
  if (!wtclst) {
Packit 0d49a1
    fprintf(stderr,"Error - could not open file of words to check\n");
Packit 0d49a1
    exit(1);
Packit 0d49a1
  }
Packit 0d49a1
Packit 0d49a1
    
Packit 0d49a1
  /* now read each word from the wtc file */
Packit 0d49a1
    while(fgets(buf,BUFSIZE,wtclst) != NULL) {
Packit 0d49a1
       k = strlen(buf);
Packit 0d49a1
       if (k && buf[k - 1] == '\n') buf[k - 1] = '\0';
Packit 0d49a1
       if (k >=2 && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
Packit 0d49a1
Packit 0d49a1
       /* set aside some buffers to hold lower cased */
Packit 0d49a1
       /* and hyphen information */
Packit 0d49a1
       lcword = (char *) malloc(k+1);
Packit 0d49a1
       hyphens = (char *)malloc(k+5);
Packit 0d49a1
       /* basic ascii lower-case, not suitable for real-world usage*/
Packit 0d49a1
       for (i = 0; i < k; ++i) {
Packit 0d49a1
         lcword[i] = buf[i];
Packit 0d49a1
         if ( (lcword[i] >= 'A') && (lcword[i] <= 'Z') )
Packit 0d49a1
           lcword[i] += 32;
Packit 0d49a1
       }
Packit 0d49a1
Packit 0d49a1
       /* first remove any trailing periods */
Packit 0d49a1
       n = k-1;
Packit 0d49a1
       while((n >=0) && (lcword[n] == '.')) n--;
Packit 0d49a1
       n++;
Packit 0d49a1
Packit 0d49a1
       /* now actually try to hyphenate the word */
Packit 0d49a1
       
Packit 0d49a1
       rep = NULL;
Packit 0d49a1
       pos = NULL;
Packit 0d49a1
       cut = NULL;
Packit 0d49a1
       hword[0] = '\0';
Packit 0d49a1
Packit 0d49a1
       if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
Packit 0d49a1
	    (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
Packit 0d49a1
             free(hyphens);
Packit 0d49a1
             free(lcword);
Packit 0d49a1
             fprintf(stderr, "hyphenation error\n");
Packit 0d49a1
             exit(1);
Packit 0d49a1
       }
Packit 0d49a1
Packit 0d49a1
       if (optn) fprintf(stderr, "%s\n", hyphens); 
Packit 0d49a1
Packit 0d49a1
       if (!optd) {
Packit 0d49a1
         /* now backfill hyphens[] for any removed periods */
Packit 0d49a1
         for (c = n; c < k; c++) hyphens[c] = '0';
Packit 0d49a1
         hyphens[k] = '\0';
Packit 0d49a1
Packit 0d49a1
         /* now create a new char string showing hyphenation positions */
Packit 0d49a1
         /* count the hyphens and allocate space for the new hypehanted string */
Packit 0d49a1
         nHyphCount = 0;
Packit 0d49a1
         for (i = 0; i < n; i++)
Packit 0d49a1
           if (hyphens[i]&1)
Packit 0d49a1
             nHyphCount++;
Packit 0d49a1
         hyphword = (char *) malloc(k+1+nHyphCount);
Packit 0d49a1
         j = 0;
Packit 0d49a1
         for (i = 0; i < n; i++) {
Packit 0d49a1
	   hyphword[j++] = buf[i];
Packit 0d49a1
           if (hyphens[i]&1) {
Packit 0d49a1
	      hyphword[j++] = '-';
Packit 0d49a1
	   }
Packit 0d49a1
         }
Packit 0d49a1
         hyphword[j] = '\0';
Packit 0d49a1
         fprintf(stdout,"%s\n",hyphword);
Packit 0d49a1
         fflush(stdout);
Packit 0d49a1
         free(hyphword);
Packit 0d49a1
      } else {
Packit 0d49a1
         fprintf(stdout,"%s\n", hword);
Packit 0d49a1
Packit 0d49a1
Packit 0d49a1
         if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
Packit 0d49a1
         if (rep) {
Packit 0d49a1
            for (i = 0; i < n - 1; i++) {
Packit 0d49a1
                if (rep[i]) free(rep[i]);
Packit 0d49a1
            }
Packit 0d49a1
            free(rep);
Packit 0d49a1
            free(pos);
Packit 0d49a1
            free(cut);
Packit 0d49a1
         }
Packit 0d49a1
      }
Packit 0d49a1
      free(hyphens);
Packit 0d49a1
      free(lcword);
Packit 0d49a1
    }
Packit 0d49a1
Packit 0d49a1
    fclose(wtclst);
Packit 0d49a1
    hnj_hyphen_free(dict);
Packit 0d49a1
    return 0;
Packit 0d49a1
}