Tim Waugh a8485b
--- grep-2.5.1a/src/search.c.w	2006-02-20 14:27:27.000000000 +0000
Tim Waugh a8485b
+++ grep-2.5.1a/src/search.c	2006-02-20 14:32:07.000000000 +0000
Tim Waugh a8485b
@@ -507,10 +507,114 @@
Tim Waugh f416c4
 	      if (match_words)
Tim Waugh f416c4
 		while (start >= 0)
Tim Waugh f416c4
 		  {
Tim Waugh f416c4
-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
Tim Waugh f416c4
-			&& (len == end - beg - 1
Tim Waugh f416c4
-			    || !WCHAR ((unsigned char) beg[start + len])))
Tim Waugh f416c4
-		      goto success_in_beg_and_end;
Tim Waugh f416c4
+		    int lword_match = 0;
Tim Waugh f416c4
+		    if (start == 0)
Tim Waugh f416c4
+		      lword_match = 1;
Tim Waugh f416c4
+		    else
Tim Waugh f416c4
+		      {
Tim Waugh f416c4
+			assert (start > 0);
Tim Waugh f416c4
+#ifdef MBS_SUPPORT
Tim Waugh f416c4
+			if (mb_cur_max > 1)
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+			    const char *s;
Tim Waugh a8485b
+			    size_t mr;
Tim Waugh f416c4
+			    wchar_t pwc;
Tim Waugh f416c4
+
Tim Waugh a8485b
+			    /* Locate the start of the multibyte character
Tim Waugh a8485b
+			       before the match position (== beg + start). */
Tim Waugh f416c4
+			    if (using_utf8)
Tim Waugh f416c4
+			      {
Tim Waugh a8485b
+				/* UTF-8 is a special case: scan backwards
Tim Waugh a8485b
+				   until we find a 7-bit character or a
Tim Waugh a8485b
+				   lead byte. */
Tim Waugh f416c4
+				s = beg + start - 1;
Tim Waugh f416c4
+				while (s > buf
Tim Waugh f416c4
+				       && (unsigned char) *s >= 0x80
Tim Waugh f416c4
+				       && (unsigned char) *s <= 0xbf)
Tim Waugh f416c4
+				  --s;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else
Tim Waugh a8485b
+			      {
Tim Waugh a8485b
+				/* Scan forwards to find the start of the
Tim Waugh a8485b
+				   last complete character before the
Tim Waugh a8485b
+				   match position.  */
Tim Waugh a8485b
+				size_t bytes_left = start - 1;
Tim Waugh a8485b
+				s = beg;
Tim Waugh a8485b
+				while (bytes_left > 0)
Tim Waugh a8485b
+				  {
Tim Waugh a8485b
+				    mr = mbrlen (s, bytes_left, &mbs);
Tim Waugh a8485b
+				    if (mr == (size_t) -1 || mr == 0)
Tim Waugh a8485b
+				      {
Tim Waugh a8485b
+					memset (&mbs, '\0', sizeof (mbs));
Tim Waugh a8485b
+					s++;
Tim Waugh a8485b
+					bytes_left--;
Tim Waugh a8485b
+					continue;
Tim Waugh a8485b
+				      }
Tim Waugh a8485b
+				    if (mr == (size_t) -2)
Tim Waugh a8485b
+				      {
Tim Waugh a8485b
+					memset (&mbs, '\0', sizeof (mbs));
Tim Waugh a8485b
+					break;
Tim Waugh a8485b
+				      }
Tim Waugh a8485b
+				    s += mr;
Tim Waugh a8485b
+				    bytes_left -= mr;
Tim Waugh a8485b
+				  }
Tim Waugh a8485b
+			      }
Tim Waugh a8485b
+			    mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
Tim Waugh a8485b
+			    if (mr == (size_t) -2 || mr == (size_t) -1 ||
Tim Waugh a8485b
+				mr == 0)
Tim Waugh f416c4
+			      {
Tim Waugh f416c4
+				memset (&mbs, '\0', sizeof (mbstate_t));
Tim Waugh f416c4
+				lword_match = 1;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else if (!(iswalnum (pwc) || pwc == L'_')
Tim Waugh a8485b
+				     && mr == beg + start - s)
Tim Waugh f416c4
+			      lword_match = 1;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+			else
Tim Waugh f416c4
+#endif /* MBS_SUPPORT */
Tim Waugh f416c4
+			if (!WCHAR ((unsigned char) beg[start - 1]))
Tim Waugh f416c4
+			  lword_match = 1;
Tim Waugh f416c4
+		      }
Tim Waugh f416c4
+
Tim Waugh f416c4
+		    if (lword_match)
Tim Waugh f416c4
+		      {
Tim Waugh f416c4
+			int rword_match = 0;
Tim Waugh f416c4
+			if (start + len == end - beg - 1)
Tim Waugh f416c4
+			  rword_match = 1;
Tim Waugh f416c4
+			else
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+#ifdef MBS_SUPPORT
Tim Waugh f416c4
+			    if (mb_cur_max > 1)
Tim Waugh f416c4
+			      {
Tim Waugh f416c4
+				wchar_t nwc;
Tim Waugh f416c4
+				int mr;
Tim Waugh f416c4
+
Tim Waugh f416c4
+				mr = mbtowc (&nwc, beg + start + len,
Tim Waugh f416c4
+					     end - beg - start - len - 1);
Tim Waugh f416c4
+				if (mr <= 0)
Tim Waugh f416c4
+				  {
Tim Waugh f416c4
+				    memset (&mbs, '\0', sizeof (mbstate_t));
Tim Waugh f416c4
+				    rword_match = 1;
Tim Waugh f416c4
+				  }
Tim Waugh f416c4
+				else if (!iswalnum (nwc) && nwc != L'_')
Tim Waugh f416c4
+				  rword_match = 1;
Tim Waugh f416c4
+			      }
Tim Waugh f416c4
+			    else
Tim Waugh f416c4
+#endif /* MBS_SUPPORT */
Tim Waugh f416c4
+			    if (!WCHAR ((unsigned char) beg[start + len]))
Tim Waugh f416c4
+			      rword_match = 1;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+
Tim Waugh f416c4
+			if (rword_match)
Tim Waugh f416c4
+			  {
Tim Waugh f416c4
+			    if (!exact)
Tim Waugh f416c4
+			      /* Returns the whole line. */
Tim Waugh f416c4
+			      goto success_in_beg_and_end;
Tim Waugh f416c4
+			    else
Tim Waugh f416c4
+			      /* Returns just this word match. */
Tim Waugh f416c4
+			      goto success_in_start_and_len;
Tim Waugh f416c4
+			  }
Tim Waugh f416c4
+		      }
Tim Waugh f416c4
 		    if (len > 0)
Tim Waugh f416c4
 		      {
Tim Waugh f416c4
 			/* Try a shorter length anchored at the same place. */