Index: bogofilter/src/lexer.c =================================================================== --- bogofilter/src/lexer.c (revision 7029) +++ bogofilter/src/lexer.c (revision 7030) @@ -329,7 +329,7 @@ count += cnt; /* Note: some malformed messages can cause xfgetsl() to report - ** "Invalid buffer size, exiting." ** and then abort. This + ** "Invalid buffer size, exiting." and then abort. This ** can happen when the parser is in html mode and there's a ** leading '<' but no closing '>'. ** @@ -343,9 +343,12 @@ if (count >= MAX_TOKEN_LEN * 2 && long_token(buff.t.u.text, (uint) count)) { - uint start = buff.t.leng - count; - uint length = count - max_token_len; - buff_shift(&buff, start, length); + /* Make sure not to shift bytes outside the buffer */ + if (buff.t.leng >= (uint) count) { + uint start = buff.t.leng - count; + uint length = count - max_token_len; + buff_shift(&buff, start, length); + } count = buff.t.leng; } else Index: bogofilter/NEWS =================================================================== --- bogofilter/NEWS (revision 7029) +++ bogofilter/NEWS (revision 7030) @@ -15,6 +15,14 @@ ------------------------------------------------------------------------------- + 2015-02-28 + + * Fix the lexer to not try to delete parts from HTML tokens if it is + reading garbage (for instance, binary files misdeclared as HTML). + This was exposed on Fedora 20 and 21 but not Ubuntu 14.04 (x86_64), + and is possibly related to its newer flex 2.5.37 that may have + changed the way it uses yyinput() a bit. Reported by Matt Garretson. + 2015-02-25 * Fix the lexer to handle MIME multipart messages properly when the