|
Packit |
cdaae3 |
/*
|
|
Packit |
cdaae3 |
Copyright 2016-2017 David Anderson. All rights reserved.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
This program is free software; you can redistribute it and/or modify it
|
|
Packit |
cdaae3 |
under the terms of version 2 of the GNU General Public License as
|
|
Packit |
cdaae3 |
published by the Free Software Foundation.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
This program is distributed in the hope that it would be useful, but
|
|
Packit |
cdaae3 |
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
cdaae3 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
Further, this software is distributed without any warranty that it is
|
|
Packit |
cdaae3 |
free of the rightful claim of any third person regarding infringement
|
|
Packit |
cdaae3 |
or the like. Any license provided herein, whether implied or
|
|
Packit |
cdaae3 |
otherwise, applies only to this software file. Patent licenses, if
|
|
Packit |
cdaae3 |
any, provided herein do not apply to combinations of this program with
|
|
Packit |
cdaae3 |
other software, or any other product whatsoever.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
You should have received a copy of the GNU General Public License along
|
|
Packit |
cdaae3 |
with this program; if not, write the Free Software Foundation, Inc., 51
|
|
Packit |
cdaae3 |
Franklin Street - Fifth Floor, Boston MA 02110-1301, USA.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
*/
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
#include "globals.h"
|
|
Packit |
cdaae3 |
#include "naming.h"
|
|
Packit |
cdaae3 |
#include "dwconf.h"
|
|
Packit |
cdaae3 |
#include "esb.h"
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
/* This does a uri-style conversion of control characters.
|
|
Packit |
cdaae3 |
So SOH prints as %01 for example.
|
|
Packit |
cdaae3 |
Which stops corrupted or crafted strings from
|
|
Packit |
cdaae3 |
doing things to the terminal the string is routed to.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
We do not translate an input % to %% (as in real uri)
|
|
Packit |
cdaae3 |
as that would be a bit confusing for most readers.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
The conversion makes it possble to print UTF-8 strings
|
|
Packit |
cdaae3 |
reproducibly, sort of (not showing the
|
|
Packit |
cdaae3 |
real glyph!).
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
Only call this in a printf or sprintf, and
|
|
Packit |
cdaae3 |
only call it once in any single printf/sprintf.
|
|
Packit |
cdaae3 |
Othewise you will get bogus results and confusion. */
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
/* ASCII control codes:
|
|
Packit |
cdaae3 |
We leave newline as is, NUL is end of string,
|
|
Packit |
cdaae3 |
the others are translated.
|
|
Packit |
cdaae3 |
NUL Null 0 00 Ctrl-@ ^@
|
|
Packit |
cdaae3 |
SOH Start of heading 1 01 Alt-1 Ctrl-A ^A
|
|
Packit |
cdaae3 |
STX Start of text 2 02 Alt-2 Ctrl-B ^B
|
|
Packit |
cdaae3 |
ETX End of text 3 03 Alt-3 Ctrl-C ^C
|
|
Packit |
cdaae3 |
EOT End of transmission 4 04 Alt-4 Ctrl-D ^D
|
|
Packit |
cdaae3 |
ENQ Enquiry 5 05 Alt-5 Ctrl-E ^E
|
|
Packit |
cdaae3 |
ACK Acknowledge 6 06 Alt-6 Ctrl-F ^F
|
|
Packit |
cdaae3 |
BEL Bell 7 07 Alt-7 Ctrl-G ^G
|
|
Packit |
cdaae3 |
BS Backspace 8 08 Alt-8 Ctrl-H ^H
|
|
Packit |
cdaae3 |
HT Horizontal tab 9 09 Alt-9 Ctrl-I ^I
|
|
Packit |
cdaae3 |
LF Line feed 10 0A Alt-10 Ctrl-J ^J
|
|
Packit |
cdaae3 |
VT Vertical tab 11 0B Alt-11 Ctrl-K ^K
|
|
Packit |
cdaae3 |
FF Form feed 12 0C Alt-12 Ctrl-L ^L
|
|
Packit |
cdaae3 |
CR Carriage return 13 0D Alt-13 Ctrl-M ^M
|
|
Packit |
cdaae3 |
SO Shift out 14 0E Alt-14 Ctrl-N ^N
|
|
Packit |
cdaae3 |
SI Shift in 15 0F Alt-15 Ctrl-O ^O
|
|
Packit |
cdaae3 |
DLE Data line escape 16 10 Alt-16 Ctrl-P ^P
|
|
Packit |
cdaae3 |
DC1 Device control 1 17 11 Alt-17 Ctrl-Q ^Q
|
|
Packit |
cdaae3 |
DC2 Device control 2 18 12 Alt-18 Ctrl-R ^R
|
|
Packit |
cdaae3 |
DC3 Device control 3 19 13 Alt-19 Ctrl-S ^S
|
|
Packit |
cdaae3 |
DC4 Device control 4 20 14 Alt-20 Ctrl-T ^T
|
|
Packit |
cdaae3 |
NAK Negative acknowledge 21 15 Alt-21 Ctrl-U ^U
|
|
Packit |
cdaae3 |
SYN Synchronous idle 22 16 Alt-22 Ctrl-V ^V
|
|
Packit |
cdaae3 |
ETB End transmission block 23 17 Alt-23 Ctrl-W ^W
|
|
Packit |
cdaae3 |
CAN Cancel 24 18 Alt-24 Ctrl-X ^X
|
|
Packit |
cdaae3 |
EM End of medium 25 19 Alt-25 Ctrl-Y ^Y
|
|
Packit |
cdaae3 |
SU Substitute 26 1A Alt-26 Ctrl-Z ^Z
|
|
Packit |
cdaae3 |
ES Escape 27 1B Alt-27 Ctrl-[ ^[
|
|
Packit |
cdaae3 |
FS File separator 28 1C Alt-28 Ctrl-\ ^\
|
|
Packit |
cdaae3 |
GS Group separator 29 1D Alt-29 Ctrl-] ^]
|
|
Packit |
cdaae3 |
RS Record separator 30 1E Alt-30 Ctrl-^ ^^
|
|
Packit |
cdaae3 |
US Unit separator 31 1F Alt-31 Ctrl-_ ^_
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
In addition, characters decimal 141, 157, 127,128, 129
|
|
Packit |
cdaae3 |
143,144,157
|
|
Packit |
cdaae3 |
appear to be questionable too.
|
|
Packit |
cdaae3 |
Not in iso-8859-1 nor in html character entities list.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
We translate all strings with a % to do sanitizing and
|
|
Packit |
cdaae3 |
we change a literal ASCII '%' char to %27 so readers
|
|
Packit |
cdaae3 |
know any % is a sanitized char. We could double up
|
|
Packit |
cdaae3 |
a % into %% on output, but switching to %27 is simpler
|
|
Packit |
cdaae3 |
and for readers and prevents ambiguity.
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
Since we do not handle utf-8 properly nor detect it
|
|
Packit |
cdaae3 |
we turn all non-ASCII to %xx below.
|
|
Packit |
cdaae3 |
*/
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
static struct esb_s localesb = {0,0,0};
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
#define FALSE 0
|
|
Packit |
cdaae3 |
#define TRUE 1
|
|
Packit |
cdaae3 |
boolean no_sanitize_string_garbage = FALSE;
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
/* This is safe to use because it is only
|
|
Packit |
cdaae3 |
callable here and we copy the value
|
|
Packit |
cdaae3 |
returned in the static buffer
|
|
Packit |
cdaae3 |
to a safe spot immediately. */
|
|
Packit |
cdaae3 |
static const char *
|
|
Packit |
cdaae3 |
as_number(int c)
|
|
Packit |
cdaae3 |
{
|
|
Packit |
cdaae3 |
static char tmpbuf[4];
|
|
Packit |
cdaae3 |
snprintf(tmpbuf,sizeof(tmpbuf),"%%%02x",c & 0xff);
|
|
Packit |
cdaae3 |
return tmpbuf;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
/* do_sanity_insert() and no_questionable_chars()
|
|
Packit |
cdaae3 |
absolutely must have the same idea of
|
|
Packit |
cdaae3 |
questionable characters. Be Careful. */
|
|
Packit |
cdaae3 |
static void
|
|
Packit |
cdaae3 |
do_sanity_insert( const char *s,struct esb_s *mesb)
|
|
Packit |
cdaae3 |
{
|
|
Packit |
cdaae3 |
const char *cp = s;
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
for( ; *cp; cp++) {
|
|
Packit |
cdaae3 |
unsigned c = *cp & 0xff ;
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
if (c >= 0x20 && c <=0x7e) {
|
|
Packit |
cdaae3 |
/* Usual case, ASCII printable characters. */
|
|
Packit |
cdaae3 |
esb_appendn(mesb,cp,1);
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (c == '%') {
|
|
Packit |
cdaae3 |
/* %xx for this too. Simple and unambiguous */
|
|
Packit |
cdaae3 |
esb_append(mesb,as_number(c));
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
#ifdef _WIN32
|
|
Packit |
cdaae3 |
if (c == 0x0D) {
|
|
Packit |
cdaae3 |
esb_appendn(mesb,cp,1);
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
#endif /* _WIN32 */
|
|
Packit |
cdaae3 |
if (c < 0x20) {
|
|
Packit |
cdaae3 |
esb_append(mesb,as_number(c));
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (c >= 0x7f) {
|
|
Packit |
cdaae3 |
/* ISO-8859 or UTF-8. Not handled well yet. */
|
|
Packit |
cdaae3 |
esb_append(mesb,as_number(c));
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
esb_appendn(mesb,cp,1);
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
/* This routine improves overall dwarfdump
|
|
Packit |
cdaae3 |
run times a lot by separating strings
|
|
Packit |
cdaae3 |
that might print badly from strings that
|
|
Packit |
cdaae3 |
will print fine.
|
|
Packit |
cdaae3 |
In one large test case it reduces run time
|
|
Packit |
cdaae3 |
from 140 seconds to 13 seconds. */
|
|
Packit |
cdaae3 |
static int
|
|
Packit |
cdaae3 |
no_questionable_chars(const char *s) {
|
|
Packit |
cdaae3 |
const char *cp = s;
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
for( ; *cp; cp++) {
|
|
Packit |
cdaae3 |
unsigned c = *cp & 0xff ;
|
|
Packit |
cdaae3 |
if (c >= 0x20 && c <=0x7e) {
|
|
Packit |
cdaae3 |
/* Usual case, ASCII printable characters */
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
#ifdef _WIN32
|
|
Packit |
cdaae3 |
if (c == 0x0D) {
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
#endif /* _WIN32 */
|
|
Packit |
cdaae3 |
if (c == 0x0A || c == 0x09 ) {
|
|
Packit |
cdaae3 |
continue;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (c == '%') {
|
|
Packit |
cdaae3 |
/* Always sanitize a % ASCII char. */
|
|
Packit |
cdaae3 |
return FALSE;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (c < 0x20) {
|
|
Packit |
cdaae3 |
return FALSE;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (c >= 0x7f) {
|
|
Packit |
cdaae3 |
/* This notices iso-8859 and UTF-8
|
|
Packit |
cdaae3 |
data as we don't deal with them
|
|
Packit |
cdaae3 |
properly in dwarfdump. */
|
|
Packit |
cdaae3 |
return FALSE;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
return TRUE;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
void
|
|
Packit |
cdaae3 |
sanitized_string_destructor(void)
|
|
Packit |
cdaae3 |
{
|
|
Packit |
cdaae3 |
esb_destructor(&localesb);
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
const char *
|
|
Packit |
cdaae3 |
sanitized(const char *s)
|
|
Packit |
cdaae3 |
{
|
|
Packit |
cdaae3 |
const char *sout = 0;
|
|
Packit |
cdaae3 |
|
|
Packit |
cdaae3 |
if (no_sanitize_string_garbage) {
|
|
Packit |
cdaae3 |
return s;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
if (no_questionable_chars(s)) {
|
|
Packit |
cdaae3 |
/* The original string is safe as is. */
|
|
Packit |
cdaae3 |
return s;
|
|
Packit |
cdaae3 |
}
|
|
Packit |
cdaae3 |
/* Using esb_destructor is quite expensive in cpu time
|
|
Packit |
cdaae3 |
when we build the next sanitized string
|
|
Packit |
cdaae3 |
so we just empty the localesb.
|
|
Packit |
cdaae3 |
One reason it's expensive is that we do the appends
|
|
Packit |
cdaae3 |
in such small batches in do_sanity-insert().
|
|
Packit |
cdaae3 |
*/
|
|
Packit |
cdaae3 |
esb_empty_string(&localesb);
|
|
Packit |
cdaae3 |
do_sanity_insert(s,&localesb);
|
|
Packit |
cdaae3 |
sout = esb_get_string(&localesb);
|
|
Packit |
cdaae3 |
return sout;
|
|
Packit |
cdaae3 |
}
|