Blame src/libopts/tokenize.c

Packit Service f5fc51
/** \file tokenize.c
Packit Service f5fc51
 *
Packit Service f5fc51
 *  Tokenize a string, accommodating quoted strings.
Packit Service f5fc51
 *
Packit Service f5fc51
 * @addtogroup autoopts
Packit Service f5fc51
 * @{
Packit Service f5fc51
 */
Packit Service f5fc51
/*
Packit Service f5fc51
 *  This file defines the string_tokenize interface
Packit Service f5fc51
 *  This file is part of AutoOpts, a companion to AutoGen.
Packit Service f5fc51
 *  AutoOpts is free software.
Packit Service f5fc51
 *  AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
Packit Service f5fc51
 *
Packit Service f5fc51
 *  AutoOpts is available under any one of two licenses.  The license
Packit Service f5fc51
 *  in use must be one of these two and the choice is under the control
Packit Service f5fc51
 *  of the user of the license.
Packit Service f5fc51
 *
Packit Service f5fc51
 *   The GNU Lesser General Public License, version 3 or later
Packit Service f5fc51
 *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
Packit Service f5fc51
 *
Packit Service f5fc51
 *   The Modified Berkeley Software Distribution License
Packit Service f5fc51
 *      See the file "COPYING.mbsd"
Packit Service f5fc51
 *
Packit Service f5fc51
 *  These files have the following sha256 sums:
Packit Service f5fc51
 *
Packit Service f5fc51
 *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
Packit Service f5fc51
 *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
Packit Service f5fc51
 *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
Packit Service f5fc51
 */
Packit Service f5fc51
Packit Service f5fc51
static void
Packit Service f5fc51
copy_cooked(ch_t ** ppDest, char const ** ppSrc)
Packit Service f5fc51
{
Packit Service f5fc51
    ch_t * pDest = (ch_t *)*ppDest;
Packit Service f5fc51
    const ch_t * pSrc  = (const ch_t *)(*ppSrc + 1);
Packit Service f5fc51
Packit Service f5fc51
    for (;;) {
Packit Service f5fc51
        ch_t ch = *(pSrc++);
Packit Service f5fc51
        switch (ch) {
Packit Service f5fc51
        case NUL:   *ppSrc = NULL; return;
Packit Service f5fc51
        case '"':   goto done;
Packit Service f5fc51
        case '\\':
Packit Service f5fc51
            pSrc += ao_string_cook_escape_char((char *)pSrc, (char *)&ch, 0x7F);
Packit Service f5fc51
            if (ch == 0x7F)
Packit Service f5fc51
                break;
Packit Service f5fc51
            /* FALLTHROUGH */
Packit Service f5fc51
Packit Service f5fc51
        default:
Packit Service f5fc51
            *(pDest++) = ch;
Packit Service f5fc51
        }
Packit Service f5fc51
    }
Packit Service f5fc51
Packit Service f5fc51
 done:
Packit Service f5fc51
    *ppDest = (ch_t *)pDest; /* next spot for storing character */
Packit Service f5fc51
    *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
Packit Service f5fc51
}
Packit Service f5fc51
Packit Service f5fc51
Packit Service f5fc51
static void
Packit Service f5fc51
copy_raw(ch_t ** ppDest, char const ** ppSrc)
Packit Service f5fc51
{
Packit Service f5fc51
    ch_t * pDest = *ppDest;
Packit Service f5fc51
    cc_t * pSrc  = (cc_t *) (*ppSrc + 1);
Packit Service f5fc51
Packit Service f5fc51
    for (;;) {
Packit Service f5fc51
        ch_t ch = *(pSrc++);
Packit Service f5fc51
        switch (ch) {
Packit Service f5fc51
        case NUL:   *ppSrc = NULL; return;
Packit Service f5fc51
        case '\'':  goto done;
Packit Service f5fc51
        case '\\':
Packit Service f5fc51
            /*
Packit Service f5fc51
             *  *Four* escapes are handled:  newline removal, escape char
Packit Service f5fc51
             *  quoting and apostrophe quoting
Packit Service f5fc51
             */
Packit Service f5fc51
            switch (*pSrc) {
Packit Service f5fc51
            case NUL:   *ppSrc = NULL; return;
Packit Service f5fc51
            case '\r':
Packit Service f5fc51
                if (*(++pSrc) == NL)
Packit Service f5fc51
                    ++pSrc;
Packit Service f5fc51
                continue;
Packit Service f5fc51
Packit Service f5fc51
            case NL:
Packit Service f5fc51
                ++pSrc;
Packit Service f5fc51
                continue;
Packit Service f5fc51
Packit Service f5fc51
            case '\'':
Packit Service f5fc51
                ch = '\'';
Packit Service f5fc51
                /* FALLTHROUGH */
Packit Service f5fc51
Packit Service f5fc51
            case '\\':
Packit Service f5fc51
                ++pSrc;
Packit Service f5fc51
                break;
Packit Service f5fc51
            }
Packit Service f5fc51
            /* FALLTHROUGH */
Packit Service f5fc51
Packit Service f5fc51
        default:
Packit Service f5fc51
            *(pDest++) = ch;
Packit Service f5fc51
        }
Packit Service f5fc51
    }
Packit Service f5fc51
Packit Service f5fc51
 done:
Packit Service f5fc51
    *ppDest = pDest; /* next spot for storing character */
Packit Service f5fc51
    *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
Packit Service f5fc51
}
Packit Service f5fc51
Packit Service f5fc51
static token_list_t *
Packit Service f5fc51
alloc_token_list(char const * str)
Packit Service f5fc51
{
Packit Service f5fc51
    token_list_t * res;
Packit Service f5fc51
Packit Service f5fc51
    int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
Packit Service f5fc51
Packit Service f5fc51
    if (str == NULL) goto enoent_res;
Packit Service f5fc51
Packit Service f5fc51
    /*
Packit Service f5fc51
     *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
Packit Service f5fc51
     *  an empty string was passed.
Packit Service f5fc51
     */
Packit Service f5fc51
    str = SPN_WHITESPACE_CHARS(str);
Packit Service f5fc51
    if (*str == NUL)  goto enoent_res;
Packit Service f5fc51
Packit Service f5fc51
    /*
Packit Service f5fc51
     *  Take an approximate count of tokens.  If no quoted strings are used,
Packit Service f5fc51
     *  it will be accurate.  If quoted strings are used, it will be a little
Packit Service f5fc51
     *  high and we'll squander the space for a few extra pointers.
Packit Service f5fc51
     */
Packit Service f5fc51
    {
Packit Service f5fc51
        char const * pz = str;
Packit Service f5fc51
Packit Service f5fc51
        do {
Packit Service f5fc51
            max_token_ct++;
Packit Service f5fc51
            pz = BRK_WHITESPACE_CHARS(pz+1);
Packit Service f5fc51
            pz = SPN_WHITESPACE_CHARS(pz);
Packit Service f5fc51
        } while (*pz != NUL);
Packit Service f5fc51
Packit Service f5fc51
        res = malloc(sizeof(*res) + (size_t)(pz - str)
Packit Service f5fc51
                     + ((size_t)max_token_ct * sizeof(ch_t *)));
Packit Service f5fc51
    }
Packit Service f5fc51
Packit Service f5fc51
    if (res == NULL)
Packit Service f5fc51
        errno = ENOMEM;
Packit Service f5fc51
    else res->tkn_list[0] = (ch_t *)(res->tkn_list + (max_token_ct - 1));
Packit Service f5fc51
Packit Service f5fc51
    return res;
Packit Service f5fc51
Packit Service f5fc51
    enoent_res:
Packit Service f5fc51
Packit Service f5fc51
    errno = ENOENT;
Packit Service f5fc51
    return NULL;
Packit Service f5fc51
}
Packit Service f5fc51
Packit Service f5fc51
/*=export_func ao_string_tokenize
Packit Service f5fc51
 *
Packit Service f5fc51
 * what: tokenize an input string
Packit Service f5fc51
 *
Packit Service f5fc51
 * arg:  + char const * + string + string to be tokenized +
Packit Service f5fc51
 *
Packit Service f5fc51
 * ret_type:  token_list_t *
Packit Service f5fc51
 * ret_desc:  pointer to a structure that lists each token
Packit Service f5fc51
 *
Packit Service f5fc51
 * doc:
Packit Service f5fc51
 *
Packit Service f5fc51
 * This function will convert one input string into a list of strings.
Packit Service f5fc51
 * The list of strings is derived by separating the input based on
Packit Service f5fc51
 * white space separation.  However, if the input contains either single
Packit Service f5fc51
 * or double quote characters, then the text after that character up to
Packit Service f5fc51
 * a matching quote will become the string in the list.
Packit Service f5fc51
 *
Packit Service f5fc51
 *  The returned pointer should be deallocated with @code{free(3C)} when
Packit Service f5fc51
 *  are done using the data.  The data are placed in a single block of
Packit Service f5fc51
 *  allocated memory.  Do not deallocate individual token/strings.
Packit Service f5fc51
 *
Packit Service f5fc51
 *  The structure pointed to will contain at least these two fields:
Packit Service f5fc51
 *  @table @samp
Packit Service f5fc51
 *  @item tkn_ct
Packit Service f5fc51
 *  The number of tokens found in the input string.
Packit Service f5fc51
 *  @item tok_list
Packit Service f5fc51
 *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
Packit Service f5fc51
 *  the last pointer set to NULL.
Packit Service f5fc51
 *  @end table
Packit Service f5fc51
 *
Packit Service f5fc51
 * There are two types of quoted strings: single quoted (@code{'}) and
Packit Service f5fc51
 * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
Packit Service f5fc51
 * escape characters (@code{\\}) are simply another character, except when
Packit Service f5fc51
 * preceding the following characters:
Packit Service f5fc51
 * @example
Packit Service f5fc51
 * @code{\\}  double backslashes reduce to one
Packit Service f5fc51
 * @code{'}   incorporates the single quote into the string
Packit Service f5fc51
 * @code{\n}  suppresses both the backslash and newline character
Packit Service f5fc51
 * @end example
Packit Service f5fc51
 *
Packit Service f5fc51
 * Double quote strings are formed according to the rules of string
Packit Service f5fc51
 * constants in ANSI-C programs.
Packit Service f5fc51
 *
Packit Service f5fc51
 * example:
Packit Service f5fc51
 * @example
Packit Service f5fc51
 *    #include <stdlib.h>
Packit Service f5fc51
 *    int ix;
Packit Service f5fc51
 *    token_list_t * ptl = ao_string_tokenize(some_string)
Packit Service f5fc51
 *    for (ix = 0; ix < ptl->tkn_ct; ix++)
Packit Service f5fc51
 *       do_something_with_tkn(ptl->tkn_list[ix]);
Packit Service f5fc51
 *    free(ptl);
Packit Service f5fc51
 * @end example
Packit Service f5fc51
 * Note that everything is freed with the one call to @code{free(3C)}.
Packit Service f5fc51
 *
Packit Service f5fc51
 * err:
Packit Service f5fc51
 *  NULL is returned and @code{errno} will be set to indicate the problem:
Packit Service f5fc51
 *  @itemize @bullet
Packit Service f5fc51
 *  @item
Packit Service f5fc51
 *  @code{EINVAL} - There was an unterminated quoted string.
Packit Service f5fc51
 *  @item
Packit Service f5fc51
 *  @code{ENOENT} - The input string was empty.
Packit Service f5fc51
 *  @item
Packit Service f5fc51
 *  @code{ENOMEM} - There is not enough memory.
Packit Service f5fc51
 *  @end itemize
Packit Service f5fc51
=*/
Packit Service f5fc51
token_list_t *
Packit Service f5fc51
ao_string_tokenize(char const * str)
Packit Service f5fc51
{
Packit Service f5fc51
    token_list_t * res = alloc_token_list(str);
Packit Service f5fc51
    ch_t * pzDest;
Packit Service f5fc51
Packit Service f5fc51
    /*
Packit Service f5fc51
     *  Now copy each token into the output buffer.
Packit Service f5fc51
     */
Packit Service f5fc51
    if (res == NULL)
Packit Service f5fc51
        return res;
Packit Service f5fc51
Packit Service f5fc51
    pzDest = (ch_t *)(res->tkn_list[0]);
Packit Service f5fc51
    res->tkn_ct  = 0;
Packit Service f5fc51
Packit Service f5fc51
    do  {
Packit Service f5fc51
        res->tkn_list[ res->tkn_ct++ ] = pzDest;
Packit Service f5fc51
        for (;;) {
Packit Service f5fc51
            int ch = (ch_t)*str;
Packit Service f5fc51
            if (IS_WHITESPACE_CHAR(ch)) {
Packit Service f5fc51
            found_white_space:
Packit Service f5fc51
                str = SPN_WHITESPACE_CHARS(str+1);
Packit Service f5fc51
                break;
Packit Service f5fc51
            }
Packit Service f5fc51
Packit Service f5fc51
            switch (ch) {
Packit Service f5fc51
            case '"':
Packit Service f5fc51
                copy_cooked(&pzDest, &str);
Packit Service f5fc51
                if (str == NULL) {
Packit Service f5fc51
                    free(res);
Packit Service f5fc51
                    errno = EINVAL;
Packit Service f5fc51
                    return NULL;
Packit Service f5fc51
                }
Packit Service f5fc51
                if (IS_WHITESPACE_CHAR(*str))
Packit Service f5fc51
                    goto found_white_space;
Packit Service f5fc51
                break;
Packit Service f5fc51
Packit Service f5fc51
            case '\'':
Packit Service f5fc51
                copy_raw(&pzDest, &str);
Packit Service f5fc51
                if (str == NULL) {
Packit Service f5fc51
                    free(res);
Packit Service f5fc51
                    errno = EINVAL;
Packit Service f5fc51
                    return NULL;
Packit Service f5fc51
                }
Packit Service f5fc51
                if (IS_WHITESPACE_CHAR(*str))
Packit Service f5fc51
                    goto found_white_space;
Packit Service f5fc51
                break;
Packit Service f5fc51
Packit Service f5fc51
            case NUL:
Packit Service f5fc51
                goto copy_done;
Packit Service f5fc51
Packit Service f5fc51
            default:
Packit Service f5fc51
                str++;
Packit Service f5fc51
                *(pzDest++) = (unsigned char)ch;
Packit Service f5fc51
            }
Packit Service f5fc51
        } copy_done:;
Packit Service f5fc51
Packit Service f5fc51
        /*
Packit Service f5fc51
         * NUL terminate the last token and see if we have any more tokens.
Packit Service f5fc51
         */
Packit Service f5fc51
        *(pzDest++) = NUL;
Packit Service f5fc51
    } while (*str != NUL);
Packit Service f5fc51
Packit Service f5fc51
    res->tkn_list[ res->tkn_ct ] = NULL;
Packit Service f5fc51
Packit Service f5fc51
    return res;
Packit Service f5fc51
}
Packit Service f5fc51
Packit Service f5fc51
#ifdef TEST
Packit Service f5fc51
#include <stdio.h>
Packit Service f5fc51
#include <string.h>
Packit Service f5fc51
Packit Service f5fc51
int
Packit Service f5fc51
main(int argc, char ** argv)
Packit Service f5fc51
{
Packit Service f5fc51
    if (argc == 1) {
Packit Service f5fc51
        printf("USAGE:  %s arg [ ... ]\n", *argv);
Packit Service f5fc51
        return 1;
Packit Service f5fc51
    }
Packit Service f5fc51
    while (--argc > 0) {
Packit Service f5fc51
        char * arg = *(++argv);
Packit Service f5fc51
        token_list_t * p = ao_string_tokenize(arg);
Packit Service f5fc51
        if (p == NULL) {
Packit Service f5fc51
            printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
Packit Service f5fc51
                   arg, errno, strerror(errno));
Packit Service f5fc51
        } else {
Packit Service f5fc51
            int ix = 0;
Packit Service f5fc51
            printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
Packit Service f5fc51
            do {
Packit Service f5fc51
                printf(" %3d:  ``%s''\n", ix+1, p->tkn_list[ix]);
Packit Service f5fc51
            } while (++ix < p->tkn_ct);
Packit Service f5fc51
            free(p);
Packit Service f5fc51
        }
Packit Service f5fc51
    }
Packit Service f5fc51
    return 0;
Packit Service f5fc51
}
Packit Service f5fc51
#endif
Packit Service f5fc51
Packit Service f5fc51
/** @}
Packit Service f5fc51
 *
Packit Service f5fc51
 * Local Variables:
Packit Service f5fc51
 * mode: C
Packit Service f5fc51
 * c-file-style: "stroustrup"
Packit Service f5fc51
 * indent-tabs-mode: nil
Packit Service f5fc51
 * End:
Packit Service f5fc51
 * end of autoopts/tokenize.c */