Blame regex.c

Packit 4e1bf9
/* regex.c: regular expression interface routines for the ed line editor. */
Packit 4e1bf9
/*  GNU ed - The GNU line editor.
Packit 4e1bf9
    Copyright (C) 1993, 1994 Andrew Moore, Talke Studio
Packit 4e1bf9
    Copyright (C) 2006-2017 Antonio Diaz Diaz.
Packit 4e1bf9
Packit 4e1bf9
    This program is free software: you can redistribute it and/or modify
Packit 4e1bf9
    it under the terms of the GNU General Public License as published by
Packit 4e1bf9
    the Free Software Foundation, either version 2 of the License, or
Packit 4e1bf9
    (at your option) any later version.
Packit 4e1bf9
Packit 4e1bf9
    This program is distributed in the hope that it will be useful,
Packit 4e1bf9
    but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 4e1bf9
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 4e1bf9
    GNU General Public License for more details.
Packit 4e1bf9
Packit 4e1bf9
    You should have received a copy of the GNU General Public License
Packit 4e1bf9
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
Packit 4e1bf9
*/
Packit 4e1bf9
Packit 4e1bf9
#include <stddef.h>
Packit 4e1bf9
#include <errno.h>
Packit 4e1bf9
#include <sys/types.h>
Packit 4e1bf9
#include <regex.h>
Packit 4e1bf9
#include <stdio.h>
Packit 4e1bf9
#include <stdlib.h>
Packit 4e1bf9
#include <string.h>
Packit 4e1bf9
Packit 4e1bf9
#include "ed.h"
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
static regex_t * subst_regex_ = 0;	/* regex of previous substitution */
Packit 4e1bf9
Packit 4e1bf9
static char * rbuf = 0;		/* replacement buffer */
Packit 4e1bf9
static int rbufsz = 0;		/* replacement buffer size */
Packit 4e1bf9
static int rlen = 0;		/* replacement length */
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
bool subst_regex( void ) { return subst_regex_ != 0; }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* translate characters in a string */
Packit 4e1bf9
static void translit_text( char * p, int len, const char from, const char to )
Packit 4e1bf9
  {
Packit 4e1bf9
  while( --len >= 0 )
Packit 4e1bf9
    {
Packit 4e1bf9
    if( *p == from ) *p = to;
Packit 4e1bf9
    ++p;
Packit 4e1bf9
    }
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* overwrite newlines with ASCII NULs */
Packit 4e1bf9
static void newline_to_nul( char * const s, const int len )
Packit 4e1bf9
  { translit_text( s, len, '\n', '\0' ); }
Packit 4e1bf9
Packit 4e1bf9
/* overwrite ASCII NULs with newlines */
Packit 4e1bf9
static void nul_to_newline( char * const s, const int len )
Packit 4e1bf9
  { translit_text( s, len, '\0', '\n' ); }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* expand a POSIX character class */
Packit 4e1bf9
static const char * parse_char_class( const char * p )
Packit 4e1bf9
  {
Packit 4e1bf9
  char c, d;
Packit 4e1bf9
Packit 4e1bf9
  if( *p == '^' ) ++p;
Packit 4e1bf9
  if( *p == ']' ) ++p;
Packit 4e1bf9
  for( ; *p != ']' && *p != '\n'; ++p )
Packit 4e1bf9
    if( *p == '[' && ( ( d = p[1] ) == '.' || d == ':' || d == '=' ) )
Packit 4e1bf9
      for( ++p, c = *++p; *p != ']' || c != d; ++p )
Packit 4e1bf9
        if( ( c = *p ) == '\n' )
Packit 4e1bf9
          return 0;
Packit 4e1bf9
  return ( ( *p == ']' ) ? p : 0 );
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* copy a pattern string from the command buffer; return pointer to the copy */
Packit 4e1bf9
static char * extract_pattern( const char ** const ibufpp, const char delimiter )
Packit 4e1bf9
  {
Packit 4e1bf9
  static char * buf = 0;
Packit 4e1bf9
  static int bufsz = 0;
Packit 4e1bf9
  const char * nd = *ibufpp;
Packit 4e1bf9
  int len;
Packit 4e1bf9
Packit 4e1bf9
  while( *nd != delimiter && *nd != '\n' )
Packit 4e1bf9
    {
Packit 4e1bf9
    if( *nd == '[' )
Packit 4e1bf9
      {
Packit 4e1bf9
      nd = parse_char_class( ++nd );
Packit 4e1bf9
      if( !nd ) { set_error_msg( "Unbalanced brackets ([])" ); return 0; }
Packit 4e1bf9
      }
Packit 4e1bf9
    else if( *nd == '\\' && *++nd == '\n' )
Packit 4e1bf9
      { set_error_msg( "Trailing backslash (\\)" ); return 0; }
Packit 4e1bf9
    ++nd;
Packit 4e1bf9
    }
Packit 4e1bf9
  len = nd - *ibufpp;
Packit 4e1bf9
  if( !resize_buffer( &buf, &bufsz, len + 1 ) ) return 0;
Packit 4e1bf9
  memcpy( buf, *ibufpp, len );
Packit 4e1bf9
  buf[len] = 0;
Packit 4e1bf9
  *ibufpp = nd;
Packit 4e1bf9
  if( isbinary() ) nul_to_newline( buf, len );
Packit 4e1bf9
  return buf;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* return pointer to compiled regex from command buffer, or to previous
Packit 4e1bf9
   compiled regex if empty RE. return 0 if error */
Packit 4e1bf9
static regex_t * get_compiled_regex( const char ** const ibufpp,
Packit 4e1bf9
                                     const bool test_delimiter )
Packit 4e1bf9
  {
Packit 4e1bf9
  static regex_t store[2];		/* space for two compiled regexes */
Packit 4e1bf9
  static regex_t * exp = 0;
Packit 4e1bf9
  const char * pat;
Packit 4e1bf9
  const char delimiter = **ibufpp;
Packit 4e1bf9
  int n;
Packit 4e1bf9
Packit 4e1bf9
  if( delimiter == ' ' )
Packit 4e1bf9
    { set_error_msg( "Invalid pattern delimiter" ); return 0; }
Packit 4e1bf9
  if( delimiter == '\n' || *++*ibufpp == delimiter ||
Packit 4e1bf9
      ( **ibufpp == '\n' && !test_delimiter ) )
Packit 4e1bf9
    {
Packit 4e1bf9
    if( !exp ) set_error_msg( "No previous pattern" );
Packit 4e1bf9
    return exp;
Packit 4e1bf9
    }
Packit 4e1bf9
  pat = extract_pattern( ibufpp, delimiter );
Packit 4e1bf9
  if( !pat ) return 0;
Packit 4e1bf9
  if( test_delimiter && delimiter != **ibufpp )
Packit 4e1bf9
    { set_error_msg( "Missing pattern delimiter" ); return 0; }
Packit 4e1bf9
  /* exp compiled && not copied */
Packit 4e1bf9
  if( exp && exp != subst_regex_ ) regfree( exp );
Packit 4e1bf9
  else exp = ( &store[0] != subst_regex_ ) ? &store[0] : &store[1];
Packit 4e1bf9
  n = regcomp( exp, pat, 0 );
Packit 4e1bf9
  if( n )
Packit 4e1bf9
    {
Packit 4e1bf9
    char buf[80];
Packit 4e1bf9
    regerror( n, exp, buf, sizeof buf );
Packit 4e1bf9
    set_error_msg( buf );
Packit 4e1bf9
    exp = 0;
Packit 4e1bf9
    }
Packit 4e1bf9
  return exp;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
bool set_subst_regex( const char ** const ibufpp )
Packit 4e1bf9
  {
Packit 4e1bf9
  regex_t * exp;
Packit 4e1bf9
Packit 4e1bf9
  disable_interrupts();
Packit 4e1bf9
  exp = get_compiled_regex( ibufpp, true );
Packit 4e1bf9
  if( exp && exp != subst_regex_ )
Packit 4e1bf9
    {
Packit 4e1bf9
    if( subst_regex_ ) regfree( subst_regex_ );
Packit 4e1bf9
    subst_regex_ = exp;
Packit 4e1bf9
    }
Packit 4e1bf9
  enable_interrupts();
Packit 4e1bf9
  return ( exp ? true : false );
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* add line matching a regular expression to the global-active list */
Packit 4e1bf9
bool build_active_list( const char ** const ibufpp, const int first_addr,
Packit 4e1bf9
                        const int second_addr, const bool match )
Packit 4e1bf9
  {
Packit 4e1bf9
  const regex_t * exp;
Packit 4e1bf9
  const line_t * lp;
Packit 4e1bf9
  int addr;
Packit 4e1bf9
  const char delimiter = **ibufpp;
Packit 4e1bf9
Packit 4e1bf9
  if( delimiter == ' ' || delimiter == '\n' )
Packit 4e1bf9
    { set_error_msg( "Invalid pattern delimiter" ); return false; }
Packit 4e1bf9
  exp = get_compiled_regex( ibufpp, false );
Packit 4e1bf9
  if( !exp ) return false;
Packit 4e1bf9
  if( **ibufpp == delimiter ) ++*ibufpp;
Packit 4e1bf9
  clear_active_list();
Packit 4e1bf9
  lp = search_line_node( first_addr );
Packit 4e1bf9
  for( addr = first_addr; addr <= second_addr; ++addr, lp = lp->q_forw )
Packit 4e1bf9
    {
Packit 4e1bf9
    char * const s = get_sbuf_line( lp );
Packit 4e1bf9
    if( !s ) return false;
Packit 4e1bf9
    if( isbinary() ) nul_to_newline( s, lp->len );
Packit 4e1bf9
    if( match == !regexec( exp, s, 0, 0, 0 ) && !set_active_node( lp ) )
Packit 4e1bf9
      return false;
Packit 4e1bf9
    }
Packit 4e1bf9
  return true;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* return the address of the next line matching a regular expression in a
Packit 4e1bf9
   given direction. wrap around begin/end of editor buffer if necessary */
Packit 4e1bf9
int next_matching_node_addr( const char ** const ibufpp, const bool forward )
Packit 4e1bf9
  {
Packit 4e1bf9
  const regex_t * const exp = get_compiled_regex( ibufpp, false );
Packit 4e1bf9
  int addr = current_addr();
Packit 4e1bf9
Packit 4e1bf9
  if( !exp ) return -1;
Packit 4e1bf9
  do {
Packit 4e1bf9
    addr = ( forward ? inc_addr( addr ) : dec_addr( addr ) );
Packit 4e1bf9
    if( addr )
Packit 4e1bf9
      {
Packit 4e1bf9
      const line_t * const lp = search_line_node( addr );
Packit 4e1bf9
      char * const s = get_sbuf_line( lp );
Packit 4e1bf9
      if( !s ) return -1;
Packit 4e1bf9
      if( isbinary() ) nul_to_newline( s, lp->len );
Packit 4e1bf9
      if( !regexec( exp, s, 0, 0, 0 ) ) return addr;
Packit 4e1bf9
      }
Packit 4e1bf9
    }
Packit 4e1bf9
  while( addr != current_addr() );
Packit 4e1bf9
  set_error_msg( "No match" );
Packit 4e1bf9
  return -1;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* extract substitution replacement from the command buffer */
Packit 4e1bf9
bool extract_replacement( const char ** const ibufpp, const bool isglobal )
Packit 4e1bf9
  {
Packit 4e1bf9
  static char * buf = 0;		/* temporary buffer */
Packit 4e1bf9
  static int bufsz = 0;
Packit 4e1bf9
  int i = 0;
Packit 4e1bf9
  const char delimiter = **ibufpp;
Packit 4e1bf9
Packit 4e1bf9
  if( delimiter == '\n' )
Packit 4e1bf9
    { set_error_msg( "Missing pattern delimiter" ); return false; }
Packit 4e1bf9
  ++*ibufpp;
Packit 4e1bf9
  if( **ibufpp == '%' && ( (*ibufpp)[1] == delimiter || (*ibufpp)[1] == '\n' ) )
Packit 4e1bf9
    {
Packit 4e1bf9
    ++*ibufpp;
Packit 4e1bf9
    if( !rbuf ) { set_error_msg( "No previous substitution" ); return false; }
Packit 4e1bf9
    return true;
Packit 4e1bf9
    }
Packit 4e1bf9
  while( **ibufpp != delimiter )
Packit 4e1bf9
    {
Packit 4e1bf9
    if( **ibufpp == '\n' )
Packit 4e1bf9
      {
Packit 4e1bf9
      if( isglobal && (*ibufpp)[1] != 0 )
Packit 4e1bf9
        { set_error_msg( "Invalid newline substitution" ); return false; }
Packit 4e1bf9
      break;
Packit 4e1bf9
      }
Packit 4e1bf9
    if( !resize_buffer( &buf, &bufsz, i + 2 ) ) return false;
Packit 4e1bf9
    if( ( buf[i++] = *(*ibufpp)++ ) == '\\' &&
Packit 4e1bf9
        ( buf[i++] = *(*ibufpp)++ ) == '\n' && !isglobal )
Packit 4e1bf9
      {
Packit 4e1bf9
      /* not reached if isglobal; in command-list, newlines are unescaped */
Packit 4e1bf9
      int size = 0;
Packit 4e1bf9
      *ibufpp = get_stdin_line( &size );
Packit 4e1bf9
      if( !*ibufpp ) return false;			/* error */
Packit 4e1bf9
      if( size <= 0 ) return false;			/* EOF */
Packit 4e1bf9
      }
Packit 4e1bf9
    }
Packit 4e1bf9
  /* make sure that buf gets allocated if empty replacement */
Packit 4e1bf9
  if( !resize_buffer( &buf, &bufsz, i + 1 ) ) return false;
Packit 4e1bf9
  buf[i] = 0;
Packit 4e1bf9
  disable_interrupts();
Packit 4e1bf9
  { char * p = buf; buf = rbuf; rbuf = p;		/* swap buffers */
Packit 4e1bf9
    rlen = i; i = bufsz; bufsz = rbufsz; rbufsz = i; }
Packit 4e1bf9
  enable_interrupts();
Packit 4e1bf9
  return true;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* Produce replacement text from matched text and replacement template.
Packit 4e1bf9
   Return new offset to end of replacement text, or -1 if error. */
Packit 4e1bf9
static int replace_matched_text( char ** txtbufp, int * const txtbufszp,
Packit 4e1bf9
                                 const char * const txt,
Packit 4e1bf9
                                 const regmatch_t * const rm, int offset,
Packit 4e1bf9
                                 const int re_nsub )
Packit 4e1bf9
  {
Packit 4e1bf9
  const char * sub = rbuf;
Packit 4e1bf9
Packit 4e1bf9
  for( ; sub - rbuf < rlen; ++sub )
Packit 4e1bf9
    {
Packit 4e1bf9
    int n;
Packit 4e1bf9
    if( *sub == '&' )
Packit 4e1bf9
      {
Packit 4e1bf9
      int j = rm[0].rm_so; int k = rm[0].rm_eo;
Packit 4e1bf9
      if( !resize_buffer( txtbufp, txtbufszp, offset + k - j ) ) return -1;
Packit 4e1bf9
      while( j < k ) (*txtbufp)[offset++] = txt[j++];
Packit 4e1bf9
      }
Packit 4e1bf9
    else if( *sub == '\\' && *++sub >= '1' && *sub <= '9' &&
Packit 4e1bf9
             ( n = *sub - '0' ) <= re_nsub )
Packit 4e1bf9
      {
Packit 4e1bf9
      int j = rm[n].rm_so; int k = rm[n].rm_eo;
Packit 4e1bf9
      if( !resize_buffer( txtbufp, txtbufszp, offset + k - j ) ) return -1;
Packit 4e1bf9
      while( j < k ) (*txtbufp)[offset++] = txt[j++];
Packit 4e1bf9
      }
Packit 4e1bf9
    else
Packit 4e1bf9
      {
Packit 4e1bf9
      if( !resize_buffer( txtbufp, txtbufszp, offset + 1 ) ) return -1;
Packit 4e1bf9
      (*txtbufp)[offset++] = *sub;
Packit 4e1bf9
      }
Packit 4e1bf9
    }
Packit 4e1bf9
  if( !resize_buffer( txtbufp, txtbufszp, offset + 1 ) ) return -1;
Packit 4e1bf9
  (*txtbufp)[offset] = 0;
Packit 4e1bf9
  return offset;
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* Produce new text with one or all matches replaced in a line.
Packit 4e1bf9
   Return size of the new line text, 0 if no change, -1 if error */
Packit 4e1bf9
static int line_replace( char ** txtbufp, int * const txtbufszp,
Packit 4e1bf9
                         const line_t * const lp, const int snum )
Packit 4e1bf9
  {
Packit 4e1bf9
  enum { se_max = 30 };	/* max subexpressions in a regular expression */
Packit 4e1bf9
  regmatch_t rm[se_max];
Packit 4e1bf9
  char * txt = get_sbuf_line( lp );
Packit 4e1bf9
  const char * eot;
Packit 4e1bf9
  int i = 0, offset = 0;
Packit 4e1bf9
  const bool global = ( snum <= 0 );
Packit 4e1bf9
  bool changed = false;
Packit 4e1bf9
Packit 4e1bf9
  if( !txt ) return -1;
Packit 4e1bf9
  if( isbinary() ) nul_to_newline( txt, lp->len );
Packit 4e1bf9
  eot = txt + lp->len;
Packit 4e1bf9
  if( !regexec( subst_regex_, txt, se_max, rm, 0 ) )
Packit 4e1bf9
    {
Packit 4e1bf9
    int matchno = 0;
Packit 4e1bf9
    do {
Packit 4e1bf9
      if( global || snum == ++matchno )
Packit 4e1bf9
        {
Packit 4e1bf9
        changed = true; i = rm[0].rm_so;
Packit 4e1bf9
        if( !resize_buffer( txtbufp, txtbufszp, offset + i ) ) return -1;
Packit 4e1bf9
        if( isbinary() ) newline_to_nul( txt, rm[0].rm_eo );
Packit 4e1bf9
        memcpy( *txtbufp + offset, txt, i ); offset += i;
Packit 4e1bf9
        offset = replace_matched_text( txtbufp, txtbufszp, txt, rm, offset,
Packit 4e1bf9
                                       subst_regex_->re_nsub );
Packit 4e1bf9
        if( offset < 0 ) return -1;
Packit 4e1bf9
        }
Packit 4e1bf9
      else
Packit 4e1bf9
        {
Packit 4e1bf9
        i = rm[0].rm_eo;
Packit 4e1bf9
        if( !resize_buffer( txtbufp, txtbufszp, offset + i ) ) return -1;
Packit 4e1bf9
        if( isbinary() ) newline_to_nul( txt, i );
Packit 4e1bf9
        memcpy( *txtbufp + offset, txt, i ); offset += i;
Packit 4e1bf9
        }
Packit 4e1bf9
      txt += rm[0].rm_eo;
Packit 4e1bf9
      }
Packit 4e1bf9
    while( *txt && ( !changed || ( global && rm[0].rm_eo ) ) &&
Packit 4e1bf9
           !regexec( subst_regex_, txt, se_max, rm, REG_NOTBOL ) );
Packit 4e1bf9
    i = eot - txt;
Packit 4e1bf9
    if( !resize_buffer( txtbufp, txtbufszp, offset + i + 2 ) ) return -1;
Packit 4e1bf9
    if( global && i > 0 && !rm[0].rm_eo )
Packit 4e1bf9
      { set_error_msg( "Infinite substitution loop" ); return -1; }
Packit 4e1bf9
    if( isbinary() ) newline_to_nul( txt, i );
Packit 4e1bf9
    memcpy( *txtbufp + offset, txt, i );		/* tail copy */
Packit 4e1bf9
    memcpy( *txtbufp + offset + i, "\n", 2 );
Packit 4e1bf9
    }
Packit 4e1bf9
  return ( changed ? offset + i + 1 : 0 );
Packit 4e1bf9
  }
Packit 4e1bf9
Packit 4e1bf9
Packit 4e1bf9
/* for each line in a range, change text matching a regular expression
Packit 4e1bf9
   according to a substitution template (replacement); return false if error */
Packit 4e1bf9
bool search_and_replace( const int first_addr, const int second_addr,
Packit 4e1bf9
                         const int snum, const bool isglobal )
Packit 4e1bf9
  {
Packit 4e1bf9
  static char * txtbuf = 0;		/* new text of line buffer */
Packit 4e1bf9
  static int txtbufsz = 0;		/* new text of line buffer size */
Packit 4e1bf9
  int addr = first_addr;
Packit 4e1bf9
  int lc;
Packit 4e1bf9
  bool match_found = false;
Packit 4e1bf9
Packit 4e1bf9
  for( lc = 0; lc <= second_addr - first_addr; ++lc, ++addr )
Packit 4e1bf9
    {
Packit 4e1bf9
    const line_t * const lp = search_line_node( addr );
Packit 4e1bf9
    const int size = line_replace( &txtbuf, &txtbufsz, lp, snum );
Packit 4e1bf9
    if( size < 0 ) return false;
Packit 4e1bf9
    if( size )
Packit 4e1bf9
      {
Packit 4e1bf9
      const char * txt = txtbuf;
Packit 4e1bf9
      const char * const eot = txtbuf + size;
Packit 4e1bf9
      undo_t * up = 0;
Packit 4e1bf9
      disable_interrupts();
Packit 4e1bf9
      if( !delete_lines( addr, addr, isglobal ) )
Packit 4e1bf9
        { enable_interrupts(); return false; }
Packit 4e1bf9
      set_current_addr( addr - 1 );
Packit 4e1bf9
      do {
Packit 4e1bf9
        txt = put_sbuf_line( txt, eot - txt );
Packit 4e1bf9
        if( !txt ) { enable_interrupts(); return false; }
Packit 4e1bf9
        if( up ) up->tail = search_line_node( current_addr() );
Packit 4e1bf9
        else
Packit 4e1bf9
          {
Packit 4e1bf9
          up = push_undo_atom( UADD, current_addr(), current_addr() );
Packit 4e1bf9
          if( !up ) { enable_interrupts(); return false; }
Packit 4e1bf9
          }
Packit 4e1bf9
        }
Packit 4e1bf9
      while( txt != eot );
Packit 4e1bf9
      enable_interrupts();
Packit 4e1bf9
      addr = current_addr();
Packit 4e1bf9
      match_found = true;
Packit 4e1bf9
      }
Packit 4e1bf9
    }
Packit 4e1bf9
  if( !match_found && !isglobal )
Packit 4e1bf9
    { set_error_msg( "No match" ); return false; }
Packit 4e1bf9
  return true;
Packit 4e1bf9
  }