#From arnold Thu May 9 17:27:03 2002 #Return-Path: #Received: (from arnold@localhost) # by skeeve.com (8.11.6/8.11.6) id g49ER3K27925 # for arnold; Thu, 9 May 2002 17:27:03 +0300 #Date: Thu, 9 May 2002 17:27:03 +0300 #From: Aharon Robbins #Message-Id: <200205091427.g49ER3K27925@skeeve.com> #To: arnold@skeeve.com #Subject: fixme #X-SpamBouncer: 1.4 (10/07/01) #X-SBRule: Pattern Match (Other Patterns) (Score: 4850) #X-SBRule: Pattern Match (Spam Phone #) (Score: 0) #X-SBClass: Blocked #Status: O # #Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail #From: laura@madonnaweb.com (laura fairhead) #Newsgroups: comp.lang.awk #Subject: bug in gawk3.1.0 regex code #Date: Wed, 08 May 2002 23:31:40 GMT #Organization: that'll be the daewooo :) #Lines: 211 #Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE> #Reply-To: laura@madonnaweb.com #NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48) #X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286]) #X-Newsreader: Forte Free Agent 1.21/32.243 #Xref: dfw-artgen comp.lang.awk:13059 # # #I believe I've just found a bug in gawk3.1.0 implementation of #extended regular expressions. It seems to be down to the alternation #operator; when using an end anchor '$' as a subexpression in an #alternation and the entire matched RE is a nul-string it fails #to match the end of string, for example; # #gsub(/$|2/,"x") #print # #input = 12345 #expected output = 1x345x #actual output = 1x345 # #The start anchor '^' always works as expected; # #gsub(/^|2/,"x") #print # #input = 12345 #expected output = x1x345 #actual output = x1x345 # #This was with POSIX compliance enabled althought that doesn't #effect the result. # #I checked on gawk3.0.6 and got exactly the same results however #gawk2.15.6 gives the expected results. # #I'm about to post a bug report about this into gnu.utils.bug #but I thought I'd post it here first in case anyone has #any input/comments/whatever .... # #Complete test results were as follows; # #input 12345 #output gsub(/regex/,"x",input) # #regex output #(^) x12345 #($) 12345x #(^)|($) x12345x #($)|(^) x12345x #(2) 1x345 #(^)|2 x1x345 #2|(^) x1x345 #($)|2 1x345 #2|($) 1x345 #(2)|(^) x1x345 #(^)|(2) x1x345 #(2)|($) 1x345 #($)|(2) 1x345 #.((2)|(^)) x345 #.((^)|(2)) x345 #.((2)|($)) x34x #.(($)|(2)) x34x #x{0}((2)|(^)) x1x345 #x{0}((^)|(2)) x1x345 #x{0}((2)|($)) 1x345 #x{0}(($)|(2)) 1x345 #x*((2)|(^)) x1x345 #x*((^)|(2)) x1x345 #x*((2)|($)) 1x345 #x*(($)|(2)) 1x345 # #Here's the test program I used, a few of the cases use ERE {n[,[m]]} #operators so that will have to be commented out or have a check #added or something (should have put a conditional in I know... ;-) # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # BEGIN{ TESTSTR="12345" print "input "TESTSTR print "output gsub(/regex/,\"x\",input)" print "" print "regex output" $0=TESTSTR gsub(/(^)/,"x") print "(^) "$0 $0=TESTSTR gsub(/($)/,"x") print "($) "$0 $0=TESTSTR gsub(/(^)|($)/,"x") print "(^)|($) "$0 $0=TESTSTR gsub(/($)|(^)/,"x") print "($)|(^) "$0 $0=TESTSTR gsub(/2/,"x") print "(2) "$0 $0=TESTSTR gsub(/(^)|2/,"x") print "(^)|2 "$0 $0=TESTSTR gsub(/2|(^)/,"x") print "2|(^) "$0 $0=TESTSTR gsub(/($)|2/,"x") print "($)|2 "$0 $0=TESTSTR gsub(/2|($)/,"x") print "2|($) "$0 $0=TESTSTR gsub(/(2)|(^)/,"x") print "(2)|(^) "$0 $0=TESTSTR gsub(/(^)|(2)/,"x") print "(^)|(2) "$0 $0=TESTSTR gsub(/(2)|($)/,"x") print "(2)|($) "$0 $0=TESTSTR gsub(/($)|(2)/,"x") print "($)|(2) "$0 $0=TESTSTR gsub(/.((2)|(^))/,"x") print ".((2)|(^)) "$0 $0=TESTSTR gsub(/.((^)|(2))/,"x") print ".((^)|(2)) "$0 $0=TESTSTR gsub(/.((2)|($))/,"x") print ".((2)|($)) "$0 $0=TESTSTR gsub(/.(($)|(2))/,"x") print ".(($)|(2)) "$0 # $0=TESTSTR # gsub(/x{0}((2)|(^))/,"x") # print "x{0}((2)|(^)) "$0 # # $0=TESTSTR # gsub(/x{0}((^)|(2))/,"x") # print "x{0}((^)|(2)) "$0 # # $0=TESTSTR # gsub(/x{0}((2)|($))/,"x") # print "x{0}((2)|($)) "$0 # # $0=TESTSTR # gsub(/x{0}(($)|(2))/,"x") # print "x{0}(($)|(2)) "$0 $0=TESTSTR gsub(/x*((2)|(^))/,"x") print "x*((2)|(^)) "$0 $0=TESTSTR gsub(/x*((^)|(2))/,"x") print "x*((^)|(2)) "$0 $0=TESTSTR gsub(/x*((2)|($))/,"x") print "x*((2)|($)) "$0 $0=TESTSTR gsub(/x*(($)|(2))/,"x") print "x*(($)|(2)) "$0 # $0=TESTSTR # gsub(/x{0}^/,"x") # print "x{0}^ "$0 # # $0=TESTSTR # gsub(/x{0}$/,"x") # print "x{0}$ "$0 # # $0=TESTSTR # gsub(/(x{0}^)|2/,"x") # print "(x{0}^)|2 "$0 # # $0=TESTSTR # gsub(/(x{0}$)|2/,"x") # print "(x{0}$)|2 "$0 } # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # #byefrom # #-- #laura fairhead # laura@madonnaweb.com http://lf.8k.com # # if you are bored crack my sig. #1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A #EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E #630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA #8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978 #80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492