Blame src/enc_csv.c

Packit 1422b7
/**
Packit 1422b7
 * @file enc_csv.c
Packit 1422b7
 * Encoder for CSV format. Note: CEE currently think about what a
Packit 1422b7
 * CEE-compliant CSV format may look like. As such, the format of
Packit 1422b7
 * this output will most probably change once the final decision
Packit 1422b7
 * has been made. At this time (2010-12), I do NOT even try to
Packit 1422b7
 * stay inline with the discussion.
Packit 1422b7
 *
Packit 1422b7
 * This file contains code from all related objects that is required in
Packit 1422b7
 * order to encode this format. The core idea of putting all of this into
Packit 1422b7
 * a single file is that this makes it very straightforward to write
Packit 1422b7
 * encoders for different encodings, as all is in one place.
Packit 1422b7
 *
Packit 1422b7
 */
Packit 1422b7
/*
Packit 1422b7
 * liblognorm - a fast samples-based log normalization library
Packit 1422b7
 * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
Packit 1422b7
 *
Packit 1422b7
 * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013
Packit 1422b7
 *
Packit 1422b7
 * This file is part of liblognorm.
Packit 1422b7
 *
Packit 1422b7
 * This library is free software; you can redistribute it and/or
Packit 1422b7
 * modify it under the terms of the GNU Lesser General Public
Packit 1422b7
 * License as published by the Free Software Foundation; either
Packit 1422b7
 * version 2.1 of the License, or (at your option) any later version.
Packit 1422b7
 *
Packit 1422b7
 * This library is distributed in the hope that it will be useful,
Packit 1422b7
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 1422b7
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 1422b7
 * Lesser General Public License for more details.
Packit 1422b7
 *
Packit 1422b7
 * You should have received a copy of the GNU Lesser General Public
Packit 1422b7
 * License along with this library; if not, write to the Free Software
Packit 1422b7
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit 1422b7
 *
Packit 1422b7
 * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution.
Packit 1422b7
 */
Packit 1422b7
#include "config.h"
Packit 1422b7
#include <stdlib.h>
Packit 1422b7
#include <stdio.h>
Packit 1422b7
#include <stdarg.h>
Packit 1422b7
#include <assert.h>
Packit 1422b7
#include <string.h>
Packit 1422b7
Packit 1422b7
#include <libestr.h>
Packit 1422b7
Packit 1422b7
#include "lognorm.h"
Packit 1422b7
#include "internal.h"
Packit 1422b7
#include "enc.h"
Packit 1422b7
Packit 1422b7
static char hexdigit[16] =
Packit 1422b7
	{'0', '1', '2', '3', '4', '5', '6', '7', '8',
Packit 1422b7
	 '9', 'A', 'B', 'C', 'D', 'E', 'F' };
Packit 1422b7
Packit 1422b7
/* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully
Packit 1422b7
 * supported. The algorithm is that we must build the wide character from
Packit 1422b7
 * UTF-8 (if char > 127) and build the full 4-octet Unicode character out
Packit 1422b7
 * of it. Then, this needs to be encoded. Currently, we work on a
Packit 1422b7
 * byte-by-byte basis, which simply is incorrect.
Packit 1422b7
 * rgerhards, 2010-11-09
Packit 1422b7
 */
Packit 1422b7
static int
Packit 1422b7
ln_addValue_CSV(const char *buf, es_str_t **str)
Packit 1422b7
{
Packit 1422b7
	int r;
Packit 1422b7
	unsigned char c;
Packit 1422b7
	es_size_t i;
Packit 1422b7
	char numbuf[4];
Packit 1422b7
	int j;
Packit 1422b7
Packit 1422b7
	assert(str != NULL);
Packit 1422b7
	assert(*str != NULL);
Packit 1422b7
	assert(buf != NULL);
Packit 1422b7
Packit 1422b7
	for(i = 0; i < strlen(buf); i++) {
Packit 1422b7
		c = buf[i];
Packit 1422b7
		if((c >= 0x23 && c <= 0x5b)
Packit 1422b7
		   || (c >= 0x5d /* && c <= 0x10FFFF*/)
Packit 1422b7
		   || c == 0x20 || c == 0x21) {
Packit 1422b7
			/* no need to escape */
Packit 1422b7
			es_addChar(str, c);
Packit 1422b7
		} else {
Packit 1422b7
			/* we must escape, try RFC4627-defined special sequences first */
Packit 1422b7
			switch(c) {
Packit 1422b7
			case '\0':
Packit 1422b7
				es_addBuf(str, "\\u0000", 6);
Packit 1422b7
				break;
Packit 1422b7
			case '\"':
Packit 1422b7
				es_addBuf(str, "\\\"", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\\':
Packit 1422b7
				es_addBuf(str, "\\\\", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\010':
Packit 1422b7
				es_addBuf(str, "\\b", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\014':
Packit 1422b7
				es_addBuf(str, "\\f", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\n':
Packit 1422b7
				es_addBuf(str, "\\n", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\r':
Packit 1422b7
				es_addBuf(str, "\\r", 2);
Packit 1422b7
				break;
Packit 1422b7
			case '\t':
Packit 1422b7
				es_addBuf(str, "\\t", 2);
Packit 1422b7
				break;
Packit 1422b7
			default:
Packit 1422b7
				/* TODO : proper Unicode encoding (see header comment) */
Packit 1422b7
				for(j = 0 ; j < 4 ; ++j) {
Packit 1422b7
					numbuf[3-j] = hexdigit[c % 16];
Packit 1422b7
					c = c / 16;
Packit 1422b7
				}
Packit 1422b7
				es_addBuf(str, "\\u", 2);
Packit 1422b7
				es_addBuf(str, numbuf, 4);
Packit 1422b7
				break;
Packit 1422b7
			}
Packit 1422b7
		}
Packit 1422b7
	}
Packit 1422b7
	r = 0;
Packit 1422b7
Packit 1422b7
	return r;
Packit 1422b7
}
Packit 1422b7
Packit 1422b7
Packit 1422b7
static int
Packit 1422b7
ln_addField_CSV(struct json_object *field, es_str_t **str)
Packit 1422b7
{
Packit 1422b7
	int r, i;
Packit 1422b7
	struct json_object *obj;
Packit 1422b7
	int needComma = 0;
Packit 1422b7
	const char *value;
Packit 1422b7
	
Packit 1422b7
	assert(field != NULL);
Packit 1422b7
	assert(str != NULL);
Packit 1422b7
	assert(*str != NULL);
Packit 1422b7
Packit 1422b7
	switch(json_object_get_type(field)) {
Packit 1422b7
	case json_type_array:
Packit 1422b7
		CHKR(es_addChar(str, '['));
Packit 1422b7
		for (i = json_object_array_length(field) - 1; i >= 0; i--) {
Packit 1422b7
			if(needComma)
Packit 1422b7
				es_addChar(str, ',');
Packit 1422b7
			else
Packit 1422b7
				needComma = 1;
Packit 1422b7
			CHKN(obj = json_object_array_get_idx(field, i));
Packit 1422b7
			CHKN(value = json_object_get_string(obj));
Packit 1422b7
			CHKR(ln_addValue_CSV(value, str));
Packit 1422b7
		}
Packit 1422b7
		CHKR(es_addChar(str, ']'));
Packit 1422b7
		break;
Packit 1422b7
	case json_type_string:
Packit 1422b7
	case json_type_int:
Packit 1422b7
		CHKN(value = json_object_get_string(field));
Packit 1422b7
		CHKR(ln_addValue_CSV(value, str));
Packit 1422b7
		break;
Packit 1422b7
	case json_type_null:
Packit 1422b7
	case json_type_boolean:
Packit 1422b7
	case json_type_double:
Packit 1422b7
	case json_type_object:
Packit 1422b7
		CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1));
Packit 1422b7
		break;
Packit 1422b7
	default:
Packit 1422b7
		CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
Packit 1422b7
	}
Packit 1422b7
Packit 1422b7
	r = 0;
Packit 1422b7
Packit 1422b7
done:
Packit 1422b7
	return r;
Packit 1422b7
}
Packit 1422b7
Packit 1422b7
Packit 1422b7
int
Packit 1422b7
ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData)
Packit 1422b7
{
Packit 1422b7
	int r = -1;
Packit 1422b7
	int needComma = 0;
Packit 1422b7
	struct json_object *field;
Packit 1422b7
	char *namelist = NULL, *name, *nn;
Packit 1422b7
Packit 1422b7
	assert(json != NULL);
Packit 1422b7
	assert(json_object_is_type(json, json_type_object));
Packit 1422b7
	
Packit 1422b7
	if((*str = es_newStr(256)) == NULL)
Packit 1422b7
		goto done;
Packit 1422b7
	if(extraData == NULL)
Packit 1422b7
		goto done;
Packit 1422b7
Packit 1422b7
	CHKN(namelist = es_str2cstr(extraData, NULL));
Packit 1422b7
Packit 1422b7
	for (name = namelist; name != NULL; name = nn) {
Packit 1422b7
		for (nn = name; *nn != '\0' && *nn != ',' && *nn != ' '; nn++)
Packit 1422b7
			{ /* do nothing */ }
Packit 1422b7
		if (*nn == '\0') {
Packit 1422b7
			nn = NULL;
Packit 1422b7
		} else {
Packit 1422b7
			*nn = '\0';
Packit 1422b7
			nn++;
Packit 1422b7
		}
Packit 1422b7
		json_object_object_get_ex(json, name, &field);
Packit 1422b7
		if (needComma) {
Packit 1422b7
			CHKR(es_addChar(str, ','));
Packit 1422b7
		} else {
Packit 1422b7
			needComma = 1;
Packit 1422b7
		}
Packit 1422b7
		if (field != NULL) {
Packit 1422b7
			CHKR(es_addChar(str, '"'));
Packit 1422b7
			ln_addField_CSV(field, str);
Packit 1422b7
			CHKR(es_addChar(str, '"'));
Packit 1422b7
		}
Packit 1422b7
	}
Packit 1422b7
	r = 0;
Packit 1422b7
done:
Packit 1422b7
	if (namelist != NULL)
Packit 1422b7
		free(namelist);
Packit 1422b7
	return r;
Packit 1422b7
}