Blob Blame History Raw
From 83a95bd8c8561875b948cadd417c653dbe7ef2e2 Mon Sep 17 00:00:00 2001
From: Yuliy Pisetsky <ypisetsky@fb.com>
Date: Thu, 01 Jan 2015 23:36:55 +0000
Subject: grep -F: fix a heap buffer (read) overrun

grep's read buffer is often filled to its full size, except when
reading the final buffer of a file.  In that case, the number of
bytes read may be far less than the size of the buffer.  However, for
certain unusual pattern/text combinations, grep -F would mistakenly
examine bytes in that uninitialized region of memory when searching
for a match.  With carefully chosen inputs, one can cause grep -F to
read beyond the end of that buffer altogether.  This problem arose via
commit v2.18-90-g73893ff with the introduction of a more efficient
heuristic using what is now the memchr_kwset function. The use of
that function in bmexec_trans could leave TP much larger than EP,
and the subsequent call to bm_delta2_search would mistakenly access
beyond end of the main input read buffer.

* src/kwset.c (bmexec_trans): When TP reaches or exceeds EP,
do not call bm_delta2_search.
* tests/kwset-abuse: New file.
* tests/Makefile.am (TESTS): Add it.
* THANKS.in: Update.
* NEWS (Bug fixes): Mention it.

Prior to this patch, this command would trigger a UMR:

  printf %0360db 0 | valgrind src/grep -F $(printf %019dXb 0)

  Use of uninitialised value of size 8
     at 0x4142BE: bmexec_trans (kwset.c:657)
     by 0x4143CA: bmexec (kwset.c:678)
     by 0x414973: kwsexec (kwset.c:848)
     by 0x414DC4: Fexecute (kwsearch.c:128)
     by 0x404E2E: grepbuf (grep.c:1238)
     by 0x4054BF: grep (grep.c:1417)
     by 0x405CEB: grepdesc (grep.c:1645)
     by 0x405EC1: grep_command_line_arg (grep.c:1692)
     by 0x4077D4: main (grep.c:2570)

See the accompanying test for how to trigger the heap buffer overrun.

Thanks to Nima Aghdaii for testing and finding numerous
ways to break early iterations of this patch.
---
--- a/THANKS.in
+++ b/THANKS.in
@@ -62,6 +62,7 @@ Michael Aichlmayr                   mikla@nx.com
 Miles Bader                         miles@ccs.mt.nec.co.jp
 Mirraz Mirraz                       mirraz1@rambler.ru
 Nelson H. F. Beebe                  beebe@math.utah.edu
+Nima Aghdaii                        naghdaii@fb.com
 Olaf Kirch                          okir@ns.lst.de
 Paul Kimoto                         kimoto@spacenet.tn.cornell.edu
 Péter Radics                        mitchnull@gmail.com
diff --git a/src/kwset.c b/src/kwset.c
index 6d21893..998dbfe 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -643,6 +643,8 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
                     if (! tp)
                       return -1;
                     tp++;
+                    if (ep <= tp)
+                      break;
                   }
               }
           }
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 217a731..2f69835 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -72,6 +72,7 @@ TESTS =						\
   inconsistent-range				\
   invalid-multibyte-infloop			\
   khadafy					\
+  kwset-abuse					\
   long-line-vs-2GiB-read			\
   match-lines					\
   max-count-overread				\
diff --git a/tests/Makefile.in b/tests/Makefile.in
index e40a070..9ecafe7 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -1376,6 +1376,7 @@ TESTS = \
   inconsistent-range				\
   invalid-multibyte-infloop			\
   khadafy					\
+  kwset-abuse					\
   long-line-vs-2GiB-read			\
   match-lines					\
   max-count-overread				\
@@ -2030,6 +2031,13 @@
 	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
+kwset-abuse.log: kwset-abuse
+	@p='kwset-abuse'; \
+	b='kwset-abuse'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
 long-line-vs-2GiB-read.log: long-line-vs-2GiB-read
 	@p='long-line-vs-2GiB-read'; \
diff --git a/tests/kwset-abuse b/tests/kwset-abuse
new file mode 100755
index 0000000..6d8ec0c
--- a/dev/null
+++ b/tests/kwset-abuse
@@ -0,0 +1,32 @@
+#! /bin/sh
+# Evoke a segfault in a hard-to-reach code path of kwset.c.
+# This bug affected grep versions 2.19 through 2.21.
+#
+# Copyright (C) 2015 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+fail=0
+
+# This test case chooses a haystack of size 260,000, since prodding
+# with gdb showed a reallocation slightly larger than that in fillbuf.
+# To reach the buggy code, the needle must have length < 1/11 that of
+# the haystack, and 10,000 is a nice round number that fits the bill.
+printf '%0260000dXy\n' 0 | grep -F $(printf %010000dy 0)
+
+test $? = 1 || fail=1
+
+Exit $fail
--
cgit v0.9.0.2