/**
* @file enc_csv.c
* Encoder for CSV format. Note: CEE currently think about what a
* CEE-compliant CSV format may look like. As such, the format of
* this output will most probably change once the final decision
* has been made. At this time (2010-12), I do NOT even try to
* stay inline with the discussion.
*
* This file contains code from all related objects that is required in
* order to encode this format. The core idea of putting all of this into
* a single file is that this makes it very straightforward to write
* encoders for different encodings, as all is in one place.
*
*/
/*
* liblognorm - a fast samples-based log normalization library
* Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
*
* Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013
*
* This file is part of liblognorm.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution.
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <string.h>
#include <libestr.h>
#include "lognorm.h"
#include "internal.h"
#include "enc.h"
static char hexdigit[16] =
{'0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', 'A', 'B', 'C', 'D', 'E', 'F' };
/* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully
* supported. The algorithm is that we must build the wide character from
* UTF-8 (if char > 127) and build the full 4-octet Unicode character out
* of it. Then, this needs to be encoded. Currently, we work on a
* byte-by-byte basis, which simply is incorrect.
* rgerhards, 2010-11-09
*/
static int
ln_addValue_CSV(const char *buf, es_str_t **str)
{
int r;
unsigned char c;
es_size_t i;
char numbuf[4];
int j;
assert(str != NULL);
assert(*str != NULL);
assert(buf != NULL);
for(i = 0; i < strlen(buf); i++) {
c = buf[i];
if((c >= 0x23 && c <= 0x5b)
|| (c >= 0x5d /* && c <= 0x10FFFF*/)
|| c == 0x20 || c == 0x21) {
/* no need to escape */
es_addChar(str, c);
} else {
/* we must escape, try RFC4627-defined special sequences first */
switch(c) {
case '\0':
es_addBuf(str, "\\u0000", 6);
break;
case '\"':
es_addBuf(str, "\\\"", 2);
break;
case '\\':
es_addBuf(str, "\\\\", 2);
break;
case '\010':
es_addBuf(str, "\\b", 2);
break;
case '\014':
es_addBuf(str, "\\f", 2);
break;
case '\n':
es_addBuf(str, "\\n", 2);
break;
case '\r':
es_addBuf(str, "\\r", 2);
break;
case '\t':
es_addBuf(str, "\\t", 2);
break;
default:
/* TODO : proper Unicode encoding (see header comment) */
for(j = 0 ; j < 4 ; ++j) {
numbuf[3-j] = hexdigit[c % 16];
c = c / 16;
}
es_addBuf(str, "\\u", 2);
es_addBuf(str, numbuf, 4);
break;
}
}
}
r = 0;
return r;
}
static int
ln_addField_CSV(struct json_object *field, es_str_t **str)
{
int r, i;
struct json_object *obj;
int needComma = 0;
const char *value;
assert(field != NULL);
assert(str != NULL);
assert(*str != NULL);
switch(json_object_get_type(field)) {
case json_type_array:
CHKR(es_addChar(str, '['));
for (i = json_object_array_length(field) - 1; i >= 0; i--) {
if(needComma)
es_addChar(str, ',');
else
needComma = 1;
CHKN(obj = json_object_array_get_idx(field, i));
CHKN(value = json_object_get_string(obj));
CHKR(ln_addValue_CSV(value, str));
}
CHKR(es_addChar(str, ']'));
break;
case json_type_string:
case json_type_int:
CHKN(value = json_object_get_string(field));
CHKR(ln_addValue_CSV(value, str));
break;
case json_type_null:
case json_type_boolean:
case json_type_double:
case json_type_object:
CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1));
break;
default:
CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
}
r = 0;
done:
return r;
}
int
ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData)
{
int r = -1;
int needComma = 0;
struct json_object *field;
char *namelist = NULL, *name, *nn;
assert(json != NULL);
assert(json_object_is_type(json, json_type_object));
if((*str = es_newStr(256)) == NULL)
goto done;
if(extraData == NULL)
goto done;
CHKN(namelist = es_str2cstr(extraData, NULL));
for (name = namelist; name != NULL; name = nn) {
for (nn = name; *nn != '\0' && *nn != ',' && *nn != ' '; nn++)
{ /* do nothing */ }
if (*nn == '\0') {
nn = NULL;
} else {
*nn = '\0';
nn++;
}
json_object_object_get_ex(json, name, &field);
if (needComma) {
CHKR(es_addChar(str, ','));
} else {
needComma = 1;
}
if (field != NULL) {
CHKR(es_addChar(str, '"'));
ln_addField_CSV(field, str);
CHKR(es_addChar(str, '"'));
}
}
r = 0;
done:
if (namelist != NULL)
free(namelist);
return r;
}