Adrian Reber 80a818
Index: bogofilter/src/lexer.c
Adrian Reber 80a818
===================================================================
Adrian Reber 80a818
--- bogofilter/src/lexer.c	(revision 7029)
Adrian Reber 80a818
+++ bogofilter/src/lexer.c	(revision 7030)
Adrian Reber 80a818
@@ -329,7 +329,7 @@
Adrian Reber 80a818
 	count += cnt;
Adrian Reber 80a818
 
Adrian Reber 80a818
 	/* Note: some malformed messages can cause xfgetsl() to report
Adrian Reber 80a818
-	** "Invalid buffer size, exiting."  ** and then abort.  This
Adrian Reber 80a818
+	** "Invalid buffer size, exiting."  and then abort.  This
Adrian Reber 80a818
 	** can happen when the parser is in html mode and there's a
Adrian Reber 80a818
 	** leading '<' but no closing '>'.
Adrian Reber 80a818
 	**
Adrian Reber 80a818
@@ -343,9 +343,12 @@
Adrian Reber 80a818
 
Adrian Reber 80a818
 	if (count >= MAX_TOKEN_LEN * 2 && 
Adrian Reber 80a818
 	    long_token(buff.t.u.text, (uint) count)) {
Adrian Reber 80a818
-	    uint start = buff.t.leng - count;
Adrian Reber 80a818
-	    uint length = count - max_token_len;
Adrian Reber 80a818
-	    buff_shift(&buff, start, length);
Adrian Reber 80a818
+	    /* Make sure not to shift bytes outside the buffer */
Adrian Reber 80a818
+	    if (buff.t.leng >= (uint) count) {
Adrian Reber 80a818
+		    uint start = buff.t.leng - count;
Adrian Reber 80a818
+		    uint length = count - max_token_len;
Adrian Reber 80a818
+		    buff_shift(&buff, start, length);
Adrian Reber 80a818
+	    }
Adrian Reber 80a818
 	    count = buff.t.leng;
Adrian Reber 80a818
 	}
Adrian Reber 80a818
 	else
Adrian Reber 80a818
Index: bogofilter/NEWS
Adrian Reber 80a818
===================================================================
Adrian Reber 80a818
--- bogofilter/NEWS	(revision 7029)
Adrian Reber 80a818
+++ bogofilter/NEWS	(revision 7030)
Adrian Reber 80a818
@@ -15,6 +15,14 @@
Adrian Reber 80a818
 
Adrian Reber 80a818
 -------------------------------------------------------------------------------
Adrian Reber 80a818
 
Adrian Reber 80a818
+	2015-02-28
Adrian Reber 80a818
+
Adrian Reber 80a818
+	* Fix the lexer to not try to delete parts from HTML tokens if it is
Adrian Reber 80a818
+	  reading garbage (for instance, binary files misdeclared as HTML).
Adrian Reber 80a818
+	  This was exposed on Fedora 20 and 21 but not Ubuntu 14.04 (x86_64),
Adrian Reber 80a818
+	  and is possibly related to its newer flex 2.5.37 that may have
Adrian Reber 80a818
+	  changed the way it uses yyinput() a bit.  Reported by Matt Garretson.
Adrian Reber 80a818
+
Adrian Reber 80a818
 	2015-02-25
Adrian Reber 80a818
 
Adrian Reber 80a818
 	* Fix the lexer to handle MIME multipart messages properly when the