Blob Blame History Raw
/**
 * @file xml.c
 * @author Radek Krejci <rkrejci@cesnet.cz>
 * @brief XML parser implementation for libyang
 *
 * Copyright (c) 2015 CESNET, z.s.p.o.
 *
 * This source code is licensed under BSD 3-Clause License (the "License").
 * You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://opensource.org/licenses/BSD-3-Clause
 */

#include <assert.h>
#include <errno.h>
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <fcntl.h>

#include "common.h"
#include "hash_table.h"
#include "printer.h"
#include "parser.h"
#include "tree_schema.h"
#include "xml_internal.h"

#define ign_xmlws(p)                                                    \
    while (is_xmlws(*p)) {                                              \
        p++;                                                            \
    }

static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);

API const struct lyxml_ns *
lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
{
    struct lyxml_attr *attr;

    if (!elem) {
        return NULL;
    }

    for (attr = elem->attr; attr; attr = attr->next) {
        if (attr->type != LYXML_ATTR_NS) {
            continue;
        }
        if (!attr->name) {
            if (!prefix) {
                /* default namespace found */
                if (!attr->value) {
                    /* empty default namespace -> no default namespace */
                    return NULL;
                }
                return (struct lyxml_ns *)attr;
            }
        } else if (prefix && !strcmp(attr->name, prefix)) {
            /* prefix found */
            return (struct lyxml_ns *)attr;
        }
    }

    /* go recursively */
    return lyxml_get_ns(elem->parent, prefix);
}

static void
lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
{
    const struct lyxml_ns *tmp_ns;
    struct lyxml_elem *ns_root, *attr_root;

    if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
        /* find the root of attr */
        for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);

        /* find the root of attr NS */
        for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);

        /* attr NS is defined outside attr parent subtree */
        if (ns_root != attr_root) {
            if (copy_ns) {
                tmp_ns = attr->ns;
                /* we may have already copied the NS over? */
                attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);

                /* we haven't copied it over, copy it now */
                if (!attr->ns) {
                    attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
                }
            } else {
                attr->ns = NULL;
            }
        }
    }
}

static struct lyxml_attr *
lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
{
    struct lyxml_attr *result, *a;

    if (!attr || !parent) {
        return NULL;
    }

    if (attr->type == LYXML_ATTR_NS) {
        /* this is correct, despite that all attributes seems like a standard
         * attributes (struct lyxml_attr), some of them can be namespace
         * definitions (and in that case they are struct lyxml_ns).
         */
        result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
    } else {
        result = calloc(1, sizeof (struct lyxml_attr));
    }
    LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);

    result->value = lydict_insert(ctx, attr->value, 0);
    result->name = lydict_insert(ctx, attr->name, 0);
    result->type = attr->type;

    /* set namespace in case of standard attributes */
    if (result->type == LYXML_ATTR_STD && attr->ns) {
        result->ns = attr->ns;
        lyxml_correct_attr_ns(ctx, result, parent, 1);
    }

    /* set parent pointer in case of namespace attribute */
    if (result->type == LYXML_ATTR_NS) {
        ((struct lyxml_ns *)result)->parent = parent;
    }

    /* put attribute into the parent's attributes list */
    if (parent->attr) {
        /* go to the end of the list */
        for (a = parent->attr; a->next; a = a->next);
        /* and append new attribute */
        a->next = result;
    } else {
        /* add the first attribute in the list */
        parent->attr = result;
    }

    return result;
}

void
lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns, int correct_attrs)
{
    const struct lyxml_ns *tmp_ns;
    struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
    struct lyxml_attr *attr;

    /* find the root of elem */
    for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);

    LY_TREE_DFS_BEGIN(elem, tmp, iter) {
        if (iter->ns) {
            /* find the root of elem NS */
            for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);

            /* elem NS is defined outside elem subtree */
            if (ns_root != elem_root) {
                if (copy_ns) {
                    tmp_ns = iter->ns;
                    /* we may have already copied the NS over? */
                    iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);

                    /* we haven't copied it over, copy it now */
                    if (!iter->ns) {
                        iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
                    }
                } else {
                    iter->ns = NULL;
                }
            }
        }
        if (correct_attrs) {
            LY_TREE_FOR(iter->attr, attr) {
                lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
            }
        }
        LY_TREE_DFS_END(elem, tmp, iter);
    }
}

struct lyxml_elem *
lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive)
{
    struct lyxml_elem *result, *child;
    struct lyxml_attr *attr;

    if (!elem) {
        return NULL;
    }

    result = calloc(1, sizeof *result);
    LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
    result->content = lydict_insert(ctx, elem->content, 0);
    result->name = lydict_insert(ctx, elem->name, 0);
    result->flags = elem->flags;
    result->prev = result;

    if (parent) {
        lyxml_add_child(ctx, parent, result);
    }

    /* keep old namespace for now */
    result->ns = elem->ns;

    /* duplicate attributes */
    for (attr = elem->attr; attr; attr = attr->next) {
        lyxml_dup_attr(ctx, result, attr);
    }

    /* correct namespaces */
    lyxml_correct_elem_ns(ctx, result, 1, 0);

    if (!recursive) {
        return result;
    }

    /* duplicate children */
    LY_TREE_FOR(elem->child, child) {
        lyxml_dup_elem(ctx, child, result, 1);
    }

    return result;
}

API struct lyxml_elem *
lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root)
{
    return lyxml_dup_elem(ctx, root, NULL, 1);
}

void
lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
{
    struct lyxml_elem *parent, *first;

    if (!elem) {
        return;
    }

    /* store pointers to important nodes */
    parent = elem->parent;

    /* unlink from parent */
    if (parent) {
        if (parent->child == elem) {
            /* we unlink the first child */
            /* update the parent's link */
            parent->child = elem->next;
        }
        /* forget about the parent */
        elem->parent = NULL;
    }

    if (copy_ns < 2) {
        lyxml_correct_elem_ns(ctx, elem, copy_ns, 1);
    }

    /* unlink from siblings */
    if (elem->prev == elem) {
        /* there are no more siblings */
        return;
    }
    if (elem->next) {
        elem->next->prev = elem->prev;
    } else {
        /* unlinking the last element */
        if (parent) {
            first = parent->child;
        } else {
            first = elem;
            while (first->prev->next) {
                first = first->prev;
            }
        }
        first->prev = elem->prev;
    }
    if (elem->prev->next) {
        elem->prev->next = elem->next;
    }

    /* clean up the unlinked element */
    elem->next = NULL;
    elem->prev = elem;
}

API void
lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
    if (!elem) {
        return;
    }

    lyxml_unlink_elem(ctx, elem, 1);
}

void
lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
{
    struct lyxml_attr *aiter, *aprev;

    if (!attr) {
        return;
    }

    if (parent) {
        /* unlink attribute from the parent's list of attributes */
        aprev = NULL;
        for (aiter = parent->attr; aiter; aiter = aiter->next) {
            if (aiter == attr) {
                break;
            }
            aprev = aiter;
        }
        if (!aiter) {
            /* attribute to remove not found */
            return;
        }

        if (!aprev) {
            /* attribute is first in parent's list of attributes */
            parent->attr = attr->next;
        } else {
            /* reconnect previous attribute to the next */
            aprev->next = attr->next;
        }
    }
    lydict_remove(ctx, attr->name);
    lydict_remove(ctx, attr->value);
    free(attr);
}

void
lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
    struct lyxml_attr *a, *next;
    if (!elem || !elem->attr) {
        return;
    }

    a = elem->attr;
    do {
        next = a->next;

        lydict_remove(ctx, a->name);
        lydict_remove(ctx, a->value);
        free(a);

        a = next;
    } while (a);
}

static void
lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
    struct lyxml_elem *e, *next;

    if (!elem) {
        return;
    }

    lyxml_free_attrs(ctx, elem);
    LY_TREE_FOR_SAFE(elem->child, next, e) {
        lyxml_free_elem(ctx, e);
    }
    lydict_remove(ctx, elem->name);
    lydict_remove(ctx, elem->content);
    free(elem);
}

API void
lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
    if (!elem) {
        return;
    }

    lyxml_unlink_elem(ctx, elem, 2);
    lyxml_free_elem(ctx, elem);
}

API void
lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
{
    struct lyxml_elem *iter, *aux;

    if (!elem) {
        return;
    }

    /* optimization - avoid freeing (unlinking) the last node of the siblings list */
    /* so, first, free the node's predecessors to the beginning of the list ... */
    for(iter = elem->prev; iter->next; iter = aux) {
        aux = iter->prev;
        lyxml_free(ctx, iter);
    }
    /* ... then, the node is the first in the siblings list, so free them all */
    LY_TREE_FOR_SAFE(elem, aux, iter) {
        lyxml_free(ctx, iter);
    }
}

API const char *
lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
{
    struct lyxml_attr *a;

    assert(elem);
    assert(name);

    for (a = elem->attr; a; a = a->next) {
        if (a->type != LYXML_ATTR_STD) {
            continue;
        }

        if (!strcmp(name, a->name)) {
            if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
                return a->value;
            }
        }
    }

    return NULL;
}

int
lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
{
    struct lyxml_elem *e;

    assert(parent);
    assert(elem);

    /* (re)link element to parent */
    if (elem->parent) {
        lyxml_unlink_elem(ctx, elem, 1);
    }
    elem->parent = parent;

    /* link parent to element */
    if (parent->child) {
        e = parent->child;
        elem->prev = e->prev;
        elem->next = NULL;
        elem->prev->next = elem;
        e->prev = elem;
    } else {
        parent->child = elem;
        elem->prev = elem;
        elem->next = NULL;
    }

    return EXIT_SUCCESS;
}

int
lyxml_getutf8(struct ly_ctx *ctx, const char *buf, unsigned int *read)
{
    int c, aux;
    int i;

    c = buf[0];
    *read = 0;

    /* buf is NULL terminated string, so 0 means EOF */
    if (!c) {
        LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
        return 0;
    }
    *read = 1;

    /* process character byte(s) */
    if ((c & 0xf8) == 0xf0) {
        /* four bytes character */
        *read = 4;

        c &= 0x07;
        for (i = 1; i <= 3; i++) {
            aux = buf[i];
            if ((aux & 0xc0) != 0x80) {
                LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
                return 0;
            }

            c = (c << 6) | (aux & 0x3f);
        }

        if (c < 0x1000 || c > 0x10ffff) {
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
            return 0;
        }
    } else if ((c & 0xf0) == 0xe0) {
        /* three bytes character */
        *read = 3;

        c &= 0x0f;
        for (i = 1; i <= 2; i++) {
            aux = buf[i];
            if ((aux & 0xc0) != 0x80) {
                LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
                return 0;
            }

            c = (c << 6) | (aux & 0x3f);
        }

        if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
            return 0;
        }
    } else if ((c & 0xe0) == 0xc0) {
        /* two bytes character */
        *read = 2;

        aux = buf[1];
        if ((aux & 0xc0) != 0x80) {
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
            return 0;
        }
        c = ((c & 0x1f) << 6) | (aux & 0x3f);

        if (c < 0x80) {
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
            return 0;
        }
    } else if (!(c & 0x80)) {
        /* one byte character */
        if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
            /* invalid character */
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
            return 0;
        }
    } else {
        /* invalid character */
        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
        return 0;
    }

    return c;
}

/* logs directly */
static int
parse_ignore(struct ly_ctx *ctx, const char *data, const char *endstr, unsigned int *len)
{
    unsigned int slen;
    const char *c = data;

    slen = strlen(endstr);

    while (*c && strncmp(c, endstr, slen)) {
        c++;
    }
    if (!*c) {
        LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
        return EXIT_FAILURE;
    }
    c += slen;

    *len = c - data;
    return EXIT_SUCCESS;
}

/* logs directly, fails when return == NULL and *len == 0 */
static char *
parse_text(struct ly_ctx *ctx, const char *data, char delim, unsigned int *len)
{
#define BUFSIZE 1024

    char buf[BUFSIZE];
    char *result = NULL, *aux;
    unsigned int r;
    int o, size = 0;
    int cdsect = 0;
    int32_t n;

    for (*len = o = 0; cdsect || data[*len] != delim; o++) {
        if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
            LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
            goto error;
        }

loop:

        if (o > BUFSIZE - 4) {
            /* add buffer into the result */
            if (result) {
                size = size + o;
                result = ly_realloc(result, size + 1);
            } else {
                size = o;
                result = malloc((size + 1) * sizeof *result);
            }
            LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
            memcpy(&result[size - o], buf, o);

            /* write again into the beginning of the buffer */
            o = 0;
        }

        if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
            /* CDSect */
            if (!cdsect) {
                cdsect = 1;
                *len += 9;
            }
            if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
                *len += 3;
                cdsect = 0;
                o--;            /* we don't write any data in this iteration */
            } else {
                buf[o] = data[*len];
                (*len)++;
            }
        } else if (data[*len] == '&') {
            (*len)++;
            if (data[*len] != '#') {
                /* entity reference - only predefined refs are supported */
                if (!strncmp(&data[*len], "lt;", 3)) {
                    buf[o] = '<';
                    *len += 3;
                } else if (!strncmp(&data[*len], "gt;", 3)) {
                    buf[o] = '>';
                    *len += 3;
                } else if (!strncmp(&data[*len], "amp;", 4)) {
                    buf[o] = '&';
                    *len += 4;
                } else if (!strncmp(&data[*len], "apos;", 5)) {
                    buf[o] = '\'';
                    *len += 5;
                } else if (!strncmp(&data[*len], "quot;", 5)) {
                    buf[o] = '\"';
                    *len += 5;
                } else {
                    LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
                    goto error;
                }
            } else {
                /* character reference */
                (*len)++;
                if (isdigit(data[*len])) {
                    for (n = 0; isdigit(data[*len]); (*len)++) {
                        n = (10 * n) + (data[*len] - '0');
                    }
                    if (data[*len] != ';') {
                        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
                        goto error;
                    }
                } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
                    for (n = 0; isxdigit(data[*len]); (*len)++) {
                        if (isdigit(data[*len])) {
                            r = (data[*len] - '0');
                        } else if (data[*len] > 'F') {
                            r = 10 + (data[*len] - 'a');
                        } else {
                            r = 10 + (data[*len] - 'A');
                        }
                        n = (16 * n) + r;
                    }
                } else {
                    LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
                    goto error;

                }
                r = pututf8(ctx, &buf[o], n);
                if (!r) {
                    LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
                    goto error;
                }
                o += r - 1;     /* o is ++ in for loop */
                (*len)++;
            }
        } else {
            r = copyutf8(ctx, &buf[o], &data[*len]);
            if (!r) {
                goto error;
            }

            o += r - 1;     /* o is ++ in for loop */
            (*len) = (*len) + r;
        }
    }

    if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
        /* ignore loop's end condition on beginning of CDSect */
        goto loop;
    }
#undef BUFSIZE

    if (o) {
        if (result) {
            size = size + o;
            aux = realloc(result, size + 1);
            result = aux;
        } else {
            size = o;
            result = malloc((size + 1) * sizeof *result);
        }
        LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
        memcpy(&result[size - o], buf, o);
    }
    if (result) {
        result[size] = '\0';
    } else {
        size = 0;
        result = strdup("");
        LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL)
    }

    return result;

error:
    *len = 0;
    free(result);
    return NULL;
}

/* logs directly */
static struct lyxml_attr *
parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
{
    const char *c = data, *start, *delim;
    char *prefix = NULL, xml_flag, *str;
    int uc;
    struct lyxml_attr *attr = NULL, *a;
    unsigned int size;

    /* check if it is attribute or namespace */
    if (!strncmp(c, "xmlns", 5)) {
        /* namespace */
        attr = calloc(1, sizeof (struct lyxml_ns));
        LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);

        attr->type = LYXML_ATTR_NS;
        ((struct lyxml_ns *)attr)->parent = parent;
        c += 5;
        if (*c != ':') {
            /* default namespace, prefix will be empty */
            goto equal;
        }
        c++;                    /* go after ':' to the prefix value */
    } else {
        /* attribute */
        attr = calloc(1, sizeof *attr);
        LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);

        attr->type = LYXML_ATTR_STD;
    }

    /* process name part of the attribute */
    start = c;
    uc = lyxml_getutf8(ctx, c, &size);
    if (!is_xmlnamestartchar(uc)) {
        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
        free(attr);
        return NULL;
    }
    xml_flag = 4;
    if (*c == 'x') {
        xml_flag = 1;
    }
    c += size;
    uc = lyxml_getutf8(ctx, c, &size);
    while (is_xmlnamechar(uc)) {
        if (attr->type == LYXML_ATTR_STD) {
            if ((*c == ':') && (xml_flag != 3)) {
                /* attribute in a namespace (but disregard the special "xml" namespace) */
                start = c + 1;

                /* look for the prefix in namespaces */
                prefix = malloc((c - data + 1) * sizeof *prefix);
                LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
                memcpy(prefix, data, c - data);
                prefix[c - data] = '\0';
                attr->ns = lyxml_get_ns(parent, prefix);
            } else if (((*c == 'm') && (xml_flag == 1)) ||
                    ((*c == 'l') && (xml_flag == 2))) {
                ++xml_flag;
            } else {
                xml_flag = 4;
            }
        }
        c += size;
        uc = lyxml_getutf8(ctx, c, &size);
    }

    /* store the name */
    size = c - start;
    attr->name = lydict_insert(ctx, start, size);

equal:
    /* check Eq mark that can be surrounded by whitespaces */
    ign_xmlws(c);
    if (*c != '=') {
        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
        goto error;
    }
    c++;
    ign_xmlws(c);

    /* process value part of the attribute */
    if (!*c || (*c != '"' && *c != '\'')) {
        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
        goto error;
    }
    delim = c;
    str = parse_text(ctx, ++c, *delim, &size);
    if (!str && !size) {
        goto error;
    }
    attr->value = lydict_insert_zc(ctx, str);

    *len = c + size + 1 - data; /* +1 is delimiter size */

    /* put attribute into the parent's attributes list */
    if (parent->attr) {
        /* go to the end of the list */
        for (a = parent->attr; a->next; a = a->next);
        /* and append new attribute */
        a->next = attr;
    } else {
        /* add the first attribute in the list */
        parent->attr = attr;
    }

    free(prefix);
    return attr;

error:
    lyxml_free_attr(ctx, NULL, attr);
    free(prefix);
    return NULL;
}

/* logs directly */
struct lyxml_elem *
lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent, int options)
{
    const char *c = data, *start, *e;
    const char *lws;    /* leading white space for handling mixed content */
    int uc;
    char *str;
    char *prefix = NULL;
    unsigned int prefix_len = 0;
    struct lyxml_elem *elem = NULL, *child;
    struct lyxml_attr *attr;
    unsigned int size;
    int nons_flag = 0, closed_flag = 0;

    *len = 0;

    if (*c != '<') {
        return NULL;
    }

    /* locate element name */
    c++;
    e = c;

    uc = lyxml_getutf8(ctx, e, &size);
    if (!is_xmlnamestartchar(uc)) {
        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
        return NULL;
    }
    e += size;
    uc = lyxml_getutf8(ctx, e, &size);
    while (is_xmlnamechar(uc)) {
        if (*e == ':') {
            if (prefix_len) {
                LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
                goto error;
            }
            /* element in a namespace */
            start = e + 1;

            /* look for the prefix in namespaces */
            prefix_len = e - c;
            LY_CHECK_ERR_GOTO(prefix, LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, e), error);
            prefix = malloc((prefix_len + 1) * sizeof *prefix);
            LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
            memcpy(prefix, c, prefix_len);
            prefix[prefix_len] = '\0';
            c = start;
        }
        e += size;
        uc = lyxml_getutf8(ctx, e, &size);
    }
    if (!*e) {
        LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
        free(prefix);
        return NULL;
    }

    /* allocate element structure */
    elem = calloc(1, sizeof *elem);
    LY_CHECK_ERR_RETURN(!elem, free(prefix); LOGMEM(ctx), NULL);

    elem->next = NULL;
    elem->prev = elem;
    if (parent) {
        lyxml_add_child(ctx, parent, elem);
    }

    /* store the name into the element structure */
    elem->name = lydict_insert(ctx, c, e - c);
    c = e;

process:
    ign_xmlws(c);
    if (!strncmp("/>", c, 2)) {
        /* we are done, it was EmptyElemTag */
        c += 2;
        elem->content = lydict_insert(ctx, "", 0);
        closed_flag = 1;
    } else if (*c == '>') {
        /* process element content */
        c++;
        lws = NULL;

        while (*c) {
            if (!strncmp(c, "</", 2)) {
                if (lws && !elem->child) {
                    /* leading white spaces were actually content */
                    goto store_content;
                }

                /* Etag */
                c += 2;
                /* get name and check it */
                e = c;
                uc = lyxml_getutf8(ctx, e, &size);
                if (!is_xmlnamestartchar(uc)) {
                    LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
                    goto error;
                }
                e += size;
                uc = lyxml_getutf8(ctx, e, &size);
                while (is_xmlnamechar(uc)) {
                    if (*e == ':') {
                        /* element in a namespace */
                        start = e + 1;

                        /* look for the prefix in namespaces */
                        if (!prefix || memcmp(prefix, c, e - c)) {
                            LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
                                   "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
                            goto error;
                        }
                        c = start;
                    }
                    e += size;
                    uc = lyxml_getutf8(ctx, e, &size);
                }
                if (!*e) {
                    LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
                    goto error;
                }

                /* check that it corresponds to opening tag */
                size = e - c;
                str = malloc((size + 1) * sizeof *str);
                LY_CHECK_ERR_GOTO(!str, LOGMEM(ctx), error);
                memcpy(str, c, e - c);
                str[e - c] = '\0';
                if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
                    LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
                           "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
                    free(str);
                    goto error;
                }
                free(str);
                c = e;

                ign_xmlws(c);
                if (*c != '>') {
                    LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
                    goto error;
                }
                c++;
                if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
                    /* there was no content, but we don't want NULL (only if mixed content) */
                    elem->content = lydict_insert(ctx, "", 0);
                }
                closed_flag = 1;
                break;

            } else if (!strncmp(c, "<?", 2)) {
                if (lws) {
                    /* leading white spaces were only formatting */
                    lws = NULL;
                }
                /* PI - ignore it */
                c += 2;
                if (parse_ignore(ctx, c, "?>", &size)) {
                    goto error;
                }
                c += size;
            } else if (!strncmp(c, "<!--", 4)) {
                if (lws) {
                    /* leading white spaces were only formatting */
                    lws = NULL;
                }
                /* Comment - ignore it */
                c += 4;
                if (parse_ignore(ctx, c, "-->", &size)) {
                    goto error;
                }
                c += size;
            } else if (!strncmp(c, "<![CDATA[", 9)) {
                /* CDSect */
                goto store_content;
            } else if (*c == '<') {
                if (lws) {
                    if (elem->flags & LYXML_ELEM_MIXED) {
                        /* we have a mixed content */
                        goto store_content;
                    } else {
                        /* leading white spaces were only formatting */
                        lws = NULL;
                    }
                }
                if (elem->content) {
                    /* we have a mixed content */
                    if (options & LYXML_PARSE_NOMIXEDCONTENT) {
                        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
                        goto error;
                    }
                    child = calloc(1, sizeof *child);
                    LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
                    child->content = elem->content;
                    elem->content = NULL;
                    lyxml_add_child(ctx, elem, child);
                    elem->flags |= LYXML_ELEM_MIXED;
                }
                child = lyxml_parse_elem(ctx, c, &size, elem, options);
                if (!child) {
                    goto error;
                }
                c += size;      /* move after processed child element */
            } else if (is_xmlws(*c)) {
                lws = c;
                ign_xmlws(c);
            } else {
store_content:
                /* store text content */
                if (lws) {
                    /* process content including the leading white spaces */
                    c = lws;
                    lws = NULL;
                }
                str = parse_text(ctx, c, '<', &size);
                if (!str && !size) {
                    goto error;
                }
                elem->content = lydict_insert_zc(ctx, str);
                c += size;      /* move after processed text content */

                if (elem->child) {
                    /* we have a mixed content */
                    if (options & LYXML_PARSE_NOMIXEDCONTENT) {
                        LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
                        goto error;
                    }
                    child = calloc(1, sizeof *child);
                    LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
                    child->content = elem->content;
                    elem->content = NULL;
                    lyxml_add_child(ctx, elem, child);
                    elem->flags |= LYXML_ELEM_MIXED;
                }
            }
        }
    } else {
        /* process attribute */
        attr = parse_attr(ctx, c, &size, elem);
        if (!attr) {
            goto error;
        }
        c += size;              /* move after processed attribute */

        /* check namespace */
        if (attr->type == LYXML_ATTR_NS) {
            if ((!prefix || !prefix[0]) && !attr->name) {
                if (attr->value) {
                    /* default prefix */
                    elem->ns = (struct lyxml_ns *)attr;
                } else {
                    /* xmlns="" -> no namespace */
                    nons_flag = 1;
                }
            } else if (prefix && prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
                /* matching namespace with prefix */
                elem->ns = (struct lyxml_ns *)attr;
            }
        }

        /* go back to finish element processing */
        goto process;
    }

    *len = c - data;

    if (!closed_flag) {
        LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
        goto error;
    }

    if (!elem->ns && !nons_flag && parent) {
        elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
    }
    free(prefix);
    return elem;

error:
    lyxml_free(ctx, elem);
    free(prefix);
    return NULL;
}

/* logs directly */
API struct lyxml_elem *
lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
{
    const char *c = data;
    unsigned int len;
    struct lyxml_elem *root, *first = NULL, *next;

    if (!ctx) {
        LOGARG;
        return NULL;
    }

repeat:
    /* process document */
    while (1) {
        if (!*c) {
            /* eof */
            return first;
        } else if (is_xmlws(*c)) {
            /* skip whitespaces */
            ign_xmlws(c);
        } else if (!strncmp(c, "<?", 2)) {
            /* XMLDecl or PI - ignore it */
            c += 2;
            if (parse_ignore(ctx, c, "?>", &len)) {
                goto error;
            }
            c += len;
        } else if (!strncmp(c, "<!--", 4)) {
            /* Comment - ignore it */
            c += 2;
            if (parse_ignore(ctx, c, "-->", &len)) {
                goto error;
            }
            c += len;
        } else if (!strncmp(c, "<!", 2)) {
            /* DOCTYPE */
            /* TODO - standalone ignore counting < and > */
            LOGERR(ctx, LY_EINVAL, "DOCTYPE not supported in XML documents.");
            goto error;
        } else if (*c == '<') {
            /* element - process it in next loop to strictly follow XML
             * format
             */
            break;
        } else {
            LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
            goto error;
        }
    }

    root = lyxml_parse_elem(ctx, c, &len, NULL, options);
    if (!root) {
        goto error;
    } else if (!first) {
        first = root;
    } else {
        first->prev->next = root;
        root->prev = first->prev;
        first->prev = root;
    }
    c += len;

    /* ignore the rest of document where can be comments, PIs and whitespaces,
     * note that we are not detecting syntax errors in these parts
     */
    ign_xmlws(c);
    if (*c) {
        if (options & LYXML_PARSE_MULTIROOT) {
            goto repeat;
        } else {
            LOGWRN(ctx, "There are some not parsed data:\n%s", c);
        }
    }

    return first;

error:
    LY_TREE_FOR_SAFE(first, next, root) {
        lyxml_free(ctx, root);
    }
    return NULL;
}

API struct lyxml_elem *
lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
{
    struct lyxml_elem *elem = NULL;
    size_t length;
    int fd;
    char *addr;

    if (!filename || !ctx) {
        LOGARG;
        return NULL;
    }

    fd = open(filename, O_RDONLY);
    if (fd == -1) {
        LOGERR(ctx, LY_EINVAL,"Opening file \"%s\" failed.", filename);
        return NULL;
    }
    if (lyp_mmap(ctx, fd, 0, &length, (void **)&addr)) {
        LOGERR(ctx, LY_ESYS, "Mapping file descriptor into memory failed (%s()).", __func__);
        goto error;
    } else if (!addr) {
        /* empty XML file */
        goto error;
    }

    elem = lyxml_parse_mem(ctx, addr, options);
    lyp_munmap(addr, length);
    close(fd);

    return elem;

error:
    if (fd != -1) {
        close(fd);
    }

    return NULL;
}

int
lyxml_dump_text(struct lyout *out, const char *text, LYXML_DATA_TYPE type)
{
    unsigned int i, n;

    if (!text) {
        return 0;
    }

    for (i = n = 0; text[i]; i++) {
        switch (text[i]) {
        case '&':
            n += ly_print(out, "&amp;");
            break;
        case '<':
            n += ly_print(out, "&lt;");
            break;
        case '>':
            /* not needed, just for readability */
            n += ly_print(out, "&gt;");
            break;
        case '"':
            if (type == LYXML_DATA_ATTR) {
                n += ly_print(out, "&quot;");
                break;
            }
            /* falls through */
        default:
            ly_write(out, &text[i], 1);
            n++;
        }
    }

    return n;
}

static int
dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem)
{
    int size = 0;
    struct lyxml_attr *a;
    struct lyxml_elem *child;
    const char *delim, *delim_outer;
    int indent;

    if (!e->name) {
        /* mixed content */
        if (e->content) {
            return lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);
        } else {
            return 0;
        }
    }

    delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
    indent = 2 * level;
    if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
        delim = "";
    }
    if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
        delim_outer = "";
        indent = 0;
    }
    if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) {
        delim_outer = "";
    }

    if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN))  {
        /* opening tag */
        if (e->ns && e->ns->prefix) {
            size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
        } else {
            size += ly_print(out, "%*s<%s", indent, "", e->name);
        }
    } else if (options & LYXML_PRINT_CLOSE) {
        indent = 0;
        goto close;
    }

    /* attributes */
    for (a = e->attr; a; a = a->next) {
        if (a->type == LYXML_ATTR_NS) {
            if (a->name) {
                size += ly_print(out, " xmlns:%s=\"%s\"", a->name, a->value ? a->value : "");
            } else {
                size += ly_print(out, " xmlns=\"%s\"", a->value ? a->value : "");
            }
        } else if (a->ns && a->ns->prefix) {
            size += ly_print(out, " %s:%s=\"%s\"", a->ns->prefix, a->name, a->value);
        } else {
            size += ly_print(out, " %s=\"%s\"", a->name, a->value);
        }
    }

    /* apply options */
    if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
        size += ly_print(out, "/>%s", delim);
        return size;
    } else if (options & LYXML_PRINT_OPEN) {
        ly_print(out, ">");
        return ++size;
    } else if (options & LYXML_PRINT_ATTRS) {
        return size;
    }

    if (!e->child && (!e->content || !e->content[0])) {
        size += ly_print(out, "/>%s", delim);
        return size;
    } else if (e->content && e->content[0]) {
        ly_print(out, ">");
        size++;

        size += lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);

        if (e->ns && e->ns->prefix) {
            size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
        } else {
            size += ly_print(out, "</%s>%s", e->name, delim);
        }
        return size;
    } else {
        size += ly_print(out, ">%s", delim);
    }

    /* go recursively */
    LY_TREE_FOR(e->child, child) {
        if (options & LYXML_PRINT_FORMAT) {
            size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0);
        } else {
            size += dump_elem(out, child, level, 0, 0);
        }
    }

close:
    /* closing tag */
    if (e->ns && e->ns->prefix) {
        size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
    } else {
        size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
    }

    return size;
}

static int
dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
{
    const struct lyxml_elem *start, *iter, *next;
    int ret = 0;

    if (e->parent) {
        start = e->parent->child;
    } else {
        start = e;
        while(start->prev && start->prev->next) {
            start = start->prev;
        }
    }

    LY_TREE_FOR_SAFE(start, next, iter) {
        ret += dump_elem(out, iter, 0, options, (next ? 0 : 1));
    }

    return ret;
}

API int
lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
{
    struct lyout out;

    if (!stream || !elem) {
        return 0;
    }

    memset(&out, 0, sizeof out);

    out.type = LYOUT_STREAM;
    out.method.f = stream;

    if (options & LYXML_PRINT_SIBLINGS) {
        return dump_siblings(&out, elem, options);
    } else {
        return dump_elem(&out, elem, 0, options, 1);
    }
}

API int
lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
{
    struct lyout out;

    if (fd < 0 || !elem) {
        return 0;
    }

    memset(&out, 0, sizeof out);

    out.type = LYOUT_FD;
    out.method.fd = fd;

    if (options & LYXML_PRINT_SIBLINGS) {
        return dump_siblings(&out, elem, options);
    } else {
        return dump_elem(&out, elem, 0, options, 1);
    }
}

API int
lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
{
    struct lyout out;
    int r;

    if (!strp || !elem) {
        return 0;
    }

    memset(&out, 0, sizeof out);

    out.type = LYOUT_MEMORY;

    if (options & LYXML_PRINT_SIBLINGS) {
        r = dump_siblings(&out, elem, options);
    } else {
        r = dump_elem(&out, elem, 0, options, 1);
    }

    *strp = out.method.mem.buf;
    return r;
}

API int
lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
{
    struct lyout out;

    if (!writeclb || !elem) {
        return 0;
    }

    memset(&out, 0, sizeof out);

    out.type = LYOUT_CALLBACK;
    out.method.clb.f = writeclb;
    out.method.clb.arg = arg;

    if (options & LYXML_PRINT_SIBLINGS) {
        return dump_siblings(&out, elem, options);
    } else {
        return dump_elem(&out, elem, 0, options, 1);
    }
}