Blob Blame History Raw
/*
 * lens.h: Repreentation of lenses
 *
 * Copyright (C) 2007-2016 David Lutterkort
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 *
 * Author: David Lutterkort <dlutter@redhat.com>
 */

#ifndef LENS_H_
#define LENS_H_

#include "syntax.h"
#include "fa.h"
#include "jmt.h"

/* keep in sync with tag name table */
enum lens_tag {
    L_DEL = 42,    /* Shift tag values so we fail fast(er) on bad pointers */
    L_STORE,
    L_VALUE,
    L_KEY,
    L_LABEL,
    L_SEQ,
    L_COUNTER,
    L_CONCAT,
    L_UNION,
    L_SUBTREE,
    L_STAR,
    L_MAYBE,
    L_REC,
    L_SQUARE
};

/* A lens. The way the type information is computed is a little
 * delicate. There are various regexps involved to form the final type:
 *
 * CTYPE - the concrete type, used to parse file -> tree
 * ATYPE - the abstract type, used to parse tree -> file
 * KTYPE - the 'key' type, matching the label that this lens
 *         can produce, or NULL if no label is produced
 * VTYPE - the 'value' type, matching the value that this lens
 *         can produce, or NULL if no value is produce
 *
 * We distinguish between regular and recursive (context-free) lenses. Only
 * L_REC and the combinators can be marked recursive.
 *
 * Types are computed at different times, depending on whether the lens is
 * recursive or not. For non-recursive lenses, types are computed when the
 * lens is constructed by one of the LNS_MAKE_* functions; for recursive
 * lenses, we never compute an explicit ctype (since regular approximations
 * of it are pretty much useless), we do however compute regular
 * approximations of the ktype, vtype, and atype in LNS_CHECK_REC. That
 * means that recursive lenses accept context free languages in the string
 * -> tree direction, but only regular tree languages in the tree -> string
 * direction.
 *
 * Any lens that uses a recursive lens somehow is marked as recursive
 * itself.
 */
struct lens {
    unsigned int              ref;
    enum lens_tag             tag;
    struct info              *info;
    struct regexp            *ctype;  /* NULL when recursive == 1 */
    struct regexp            *atype;
    struct regexp            *ktype;
    struct regexp            *vtype;
    struct jmt               *jmt;    /* When recursive == 1, might have jmt */
    unsigned int              value : 1;
    unsigned int              key : 1;
    unsigned int              recursive : 1;
    unsigned int              consumes_value : 1;
    /* Whether we are inside a recursive lens or outside */
    unsigned int              rec_internal : 1;
    unsigned int              ctype_nullable : 1;
    union {
        /* Primitive lenses */
        struct {                   /* L_DEL uses both */
            struct regexp *regexp; /* L_STORE, L_KEY */
            struct string *string; /* L_VALUE, L_LABEL, L_SEQ, L_COUNTER */
        };
        /* Combinators */
        struct lens *child;         /* L_SUBTREE, L_STAR, L_MAYBE, L_SQUARE */
        struct {                    /* L_UNION, L_CONCAT */
            unsigned int nchildren;
            struct lens **children;
        };
        struct {
            struct lens *body;      /* L_REC */
            /* We represent a recursive lens as two instances of struct
             * lens with L_REC. One has rec_internal set to 1, the other
             * has it set to 0. The one with rec_internal is used within
             * the body, the other is what is used from the 'outside'. This
             * is necessary to break the cycles inherent in recursive
             * lenses with reference counting. The link through alias is
             * set up in lns_check_rec, and not reference counted.
             *
             * Generally, any lens used in the body of a recursive lens is
             * marked with rec_internal == 1; lenses that use the recursive
             * lens 'from the outside' are marked with rec_internal ==
             * 0. In the latter case, we can assign types right away,
             * except for the ctype, which we never have for any recursive
             * lens.
             */
            struct lens *alias;
        };
    };
};

/* Constructors for various lens types. Constructor assumes ownership of
 * arguments without incrementing. Caller owns returned lenses.
 *
 * The return type is VALUE instead of LENS so that we can return an
 * exception iftypechecking fails.
 */
struct value *lns_make_prim(enum lens_tag tag, struct info *info,
                            struct regexp *regexp, struct string *string);
struct value *lns_make_union(struct info *, struct lens *, struct lens *,
                             int check);
struct value *lns_make_concat(struct info *, struct lens *, struct lens *,
                              int check);
struct value *lns_make_subtree(struct info *, struct lens *);
struct value *lns_make_star(struct info *, struct lens *,
                            int check);
struct value *lns_make_plus(struct info *, struct lens *,
                            int check);
struct value *lns_make_maybe(struct info *, struct lens *,
                             int check);
struct value *lns_make_square(struct info *, struct lens *, struct lens *,
                              struct lens *lens, int check);


/* Pretty-print a lens */
char *format_lens(struct lens *l);

/* Pretty-print the atype of a lens. Allocates BUF, which must be freed by
 * the caller */
int lns_format_atype(struct lens *, char **buf);

/* Recursive lenses */
struct value *lns_make_rec(struct info *info);
struct value *lns_check_rec(struct info *info,
                            struct lens *body, struct lens *rec,
                            int check);

/* Auxiliary data structures used during get/put/create */
struct skel {
    struct skel *next;
    enum lens_tag tag;
    union {
        char        *text;    /* L_DEL */
        struct skel *skels;   /* L_CONCAT, L_STAR, L_SQUARE */
    };
    /* Also tag == L_SUBTREE, with no data in the union */
};

struct lns_error {
    struct lens  *lens;
    struct lens  *last;       /* The last lens that matched */
    struct lens  *next;       /* The next lens that should match but doesn't */
    int           pos;        /* Errors from get/parse */
    char         *path;       /* Errors from put, pos will be -1 */
    char         *message;
};

struct dict *make_dict(char *key, struct skel *skel, struct dict *subdict);
void dict_lookup(const char *key, struct dict *dict,
                 struct skel **skel, struct dict **subdict);
int dict_append(struct dict **dict, struct dict *d2);
void free_skel(struct skel *skel);
void free_dict(struct dict *dict);
void free_lns_error(struct lns_error *err);

/* Parse text TEXT with LENS. INFO indicates where TEXT was read from.
 *
 * If ERR is non-NULL, *ERR is set to NULL on success, and to an error
 * message on failure; the constructed tree is always returned. If ERR is
 * NULL, return the tree on success, and NULL on failure.
 *
 * ENABLE_SPAN indicates whether span information should be collected or not
 */
struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
                     int enable_span, struct lns_error **err);
struct skel *lns_parse(struct lens *lens, const char *text,
                       struct dict **dict, struct lns_error **err);

/* Write tree TREE that was initially read from TEXT (but might have been
 * modified) into file OUT using LENS.
 *
 * If ERR is non-NULL, *ERR is set to NULL on success, and to an error
 * message on failure.
 *
 * INFO indicates where we are writing to, and its flags indicate whether
 * to update spans or not.
 */
void lns_put(struct info *info, FILE *out, struct lens *lens, struct tree *tree,
             const char *text, int enable_span, struct lns_error **err);

/* Free up temporary data structures, most importantly compiled
   regular expressions */
void lens_release(struct lens *lens);
void free_lens(struct lens *lens);

/*
 * Encoding of tree levels into strings
 */

/* Special characters used when encoding one level of the tree as a string.
 * We encode one tree node as KEY . ENC_EQ . VALUE . ENC_SLASH; if KEY or
 * VALUE are NULL, we use ENC_NULL, which is the empty string. This has the
 * effect that NULL strings are treated the same as empty strings.
 *
 * This encoding is used both for actual trees in the put direction, and to
 * produce regular expressions describing one level in the tree (we
 * disregard subtrees)
 *
 * For this to work, neither ENC_EQ nor ENC_SLASH can be allowed in a
 * VALUE; we do this behind the scenes by rewriting regular expressions for
 * values.
 */
#define ENC_EQ        "\003"
#define ENC_SLASH     "\004"
#define ENC_NULL      ""
#define ENC_EQ_CH     (ENC_EQ[0])
#define ENC_SLASH_CH  (ENC_SLASH[0])

/* The reserved range of characters that we do not allow in user-supplied
   regular expressions, since we need them for internal bookkeeping.

   This range must include the ENC_* characters
*/
#define RESERVED_FROM "\001"
#define RESERVED_TO   ENC_SLASH
#define RESERVED_FROM_CH (RESERVED_FROM[0])
#define RESERVED_TO_CH   ENC_SLASH_CH
/* The range of reserved chars as it appears in a regex */
#define RESERVED_RANGE_RX RESERVED_FROM "-" RESERVED_TO
/* The equivalent of "." in a regexp for display */
#define RESERVED_DOT_RX "[^" RESERVED_RANGE_RX "\n]"

/* The length of the string S encoded */
#define ENCLEN(s) ((s) == NULL ? strlen(ENC_NULL) : strlen(s))
#define ENCSTR(s) ((s) == NULL ? ENC_NULL : s)

/* helper to access first and last child */
#define child_first(l) (l)->children[0]
#define child_last(l) (l)->children[(l)->nchildren - 1]

/* Format an encoded level as
 *    { key1 = value1 } { key2 = value2 } .. { keyN = valueN }
 */
char *enc_format(const char *e, size_t len);
/* Format an encoded level similar to ENC_FORMAT, but put each tree node
 * on a new line indented by INDENT spaces. If INDENT is negative, produce the
 * same output as ENC_FORMAT
 *    { key1 = value1 } { key2 = value2 } .. { keyN = valueN }
 */
char *enc_format_indent(const char *e, size_t len, int indent);

#if ENABLE_DEBUG
void dump_lens_tree(struct lens *lens);
void dump_lens(FILE *out, struct lens *lens);
#endif

#endif


/*
 * Local variables:
 *  indent-tabs-mode: nil
 *  c-indent-level: 4
 *  c-basic-offset: 4
 *  tab-width: 4
 * End:
 */