Tree - source-git/augeas - CentOS Git server

source-git / augeas

Blame gnulib/lib/rawmemchr.c

Blob History Raw

Packit	06dd63	`/* Searching in a string.`
Packit	06dd63	`Copyright (C) 2008-2019 Free Software Foundation, Inc.`
Packit	06dd63
Packit	06dd63	`This program is free software: you can redistribute it and/or modify`
Packit	06dd63	`it under the terms of the GNU Lesser General Public License as published by`
Packit	06dd63	`the Free Software Foundation; either version 2.1 of the License, or`
Packit	06dd63	`(at your option) any later version.`
Packit	06dd63
Packit	06dd63	`This program is distributed in the hope that it will be useful,`
Packit	06dd63	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	06dd63	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
Packit	06dd63	`GNU Lesser General Public License for more details.`
Packit	06dd63
Packit	06dd63	`You should have received a copy of the GNU Lesser General Public License`
Packit	06dd63	`along with this program. If not, see <https://www.gnu.org/licenses/>. */`
Packit	06dd63
Packit	06dd63	`#include <config.h>`
Packit	06dd63
Packit	06dd63	`/* Specification. */`
Packit	06dd63	`#include <string.h>`
Packit	06dd63
Packit	06dd63	`/* Find the first occurrence of C in S. */`
Packit	06dd63	`void *`
Packit	06dd63	`rawmemchr (const void *s, int c_in)`
Packit	06dd63	`{`
Packit	06dd63	`/* On 32-bit hardware, choosing longword to be a 32-bit unsigned`
Packit	06dd63	`long instead of a 64-bit uintmax_t tends to give better`
Packit	06dd63	`performance. On 64-bit hardware, unsigned long is generally 64`
Packit	06dd63	`bits already. Change this typedef to experiment with`
Packit	06dd63	`performance. */`
Packit	06dd63	`typedef unsigned long int longword;`
Packit	06dd63
Packit	06dd63	`const unsigned char *char_ptr;`
Packit	06dd63	`const longword *longword_ptr;`
Packit	06dd63	`longword repeated_one;`
Packit	06dd63	`longword repeated_c;`
Packit	06dd63	`unsigned char c;`
Packit	06dd63
Packit	06dd63	`c = (unsigned char) c_in;`
Packit	06dd63
Packit	06dd63	`/* Handle the first few bytes by reading one byte at a time.`
Packit	06dd63	`Do this until CHAR_PTR is aligned on a longword boundary. */`
Packit	06dd63	`for (char_ptr = (const unsigned char *) s;`
Packit	06dd63	`(size_t) char_ptr % sizeof (longword) != 0;`
Packit	06dd63	`++char_ptr)`
Packit	06dd63	`if (*char_ptr == c)`
Packit	06dd63	`return (void *) char_ptr;`
Packit	06dd63
Packit	06dd63	`longword_ptr = (const longword *) char_ptr;`
Packit	06dd63
Packit	06dd63	`/* All these elucidatory comments refer to 4-byte longwords,`
Packit	06dd63	`but the theory applies equally well to any size longwords. */`
Packit	06dd63
Packit	06dd63	`/* Compute auxiliary longword values:`
Packit	06dd63	`repeated_one is a value which has a 1 in every byte.`
Packit	06dd63	`repeated_c has c in every byte. */`
Packit	06dd63	`repeated_one = 0x01010101;`
Packit	06dd63	`repeated_c = c \| (c << 8);`
Packit	06dd63	`repeated_c \|= repeated_c << 16;`
Packit	06dd63	`if (0xffffffffU < (longword) -1)`
Packit	06dd63	`{`
Packit	06dd63	`repeated_one \|= repeated_one << 31 << 1;`
Packit	06dd63	`repeated_c \|= repeated_c << 31 << 1;`
Packit	06dd63	`if (8 < sizeof (longword))`
Packit	06dd63	`{`
Packit	06dd63	`size_t i;`
Packit	06dd63
Packit	06dd63	`for (i = 64; i < sizeof (longword) * 8; i *= 2)`
Packit	06dd63	`{`
Packit	06dd63	`repeated_one \|= repeated_one << i;`
Packit	06dd63	`repeated_c \|= repeated_c << i;`
Packit	06dd63	`}`
Packit	06dd63	`}`
Packit	06dd63	`}`
Packit	06dd63
Packit	06dd63	`/* Instead of the traditional loop which tests each byte, we will`
Packit	06dd63	`test a longword at a time. The tricky part is testing if *any of`
Packit	06dd63	`the four* bytes in the longword in question are equal to NUL or`
Packit	06dd63	`c. We first use an xor with repeated_c. This reduces the task`
Packit	06dd63	`to testing whether any of the four bytes in longword1 is zero.`
Packit	06dd63
Packit	06dd63	`We compute tmp =`
Packit	06dd63	`((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).`
Packit	06dd63	`That is, we perform the following operations:`
Packit	06dd63	`1. Subtract repeated_one.`
Packit	06dd63	`2. & ~longword1.`
Packit	06dd63	`3. & a mask consisting of 0x80 in every byte.`
Packit	06dd63	`Consider what happens in each byte:`
Packit	06dd63	`- If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,`
Packit	06dd63	`and step 3 transforms it into 0x80. A carry can also be propagated`
Packit	06dd63	`to more significant bytes.`
Packit	06dd63	`- If a byte of longword1 is nonzero, let its lowest 1 bit be at`
Packit	06dd63	`position k (0 <= k <= 7); so the lowest k bits are 0. After step 1,`
Packit	06dd63	`the byte ends in a single bit of value 0 and k bits of value 1.`
Packit	06dd63	`After step 2, the result is just k bits of value 1: 2^k - 1. After`
Packit	06dd63	`step 3, the result is 0. And no carry is produced.`
Packit	06dd63	`So, if longword1 has only non-zero bytes, tmp is zero.`
Packit	06dd63	`Whereas if longword1 has a zero byte, call j the position of the least`
Packit	06dd63	`significant zero byte. Then the result has a zero at positions 0, ...,`
Packit	06dd63	`j-1 and a 0x80 at position j. We cannot predict the result at the more`
Packit	06dd63	`significant bytes (positions j+1..3), but it does not matter since we`
Packit	06dd63	`already have a non-zero bit at position 8*j+7.`
Packit	06dd63
Packit	06dd63	`The test whether any byte in longword1 is zero is equivalent`
Packit	06dd63	`to testing whether tmp is nonzero.`
Packit	06dd63
Packit	06dd63	`This test can read beyond the end of a string, depending on where`
Packit	06dd63	`C_IN is encountered. However, this is considered safe since the`
Packit	06dd63	`initialization phase ensured that the read will be aligned,`
Packit	06dd63	`therefore, the read will not cross page boundaries and will not`
Packit	06dd63	`cause a fault. */`
Packit	06dd63
Packit	06dd63	`while (1)`
Packit	06dd63	`{`
Packit	06dd63	`longword longword1 = *longword_ptr ^ repeated_c;`
Packit	06dd63
Packit	06dd63	`if ((((longword1 - repeated_one) & ~longword1)`
Packit	06dd63	`& (repeated_one << 7)) != 0)`
Packit	06dd63	`break;`
Packit	06dd63	`longword_ptr++;`
Packit	06dd63	`}`
Packit	06dd63
Packit	06dd63	`char_ptr = (const unsigned char *) longword_ptr;`
Packit	06dd63
Packit	06dd63	`/* At this point, we know that one of the sizeof (longword) bytes`
Packit	06dd63	`starting at char_ptr is == c. On little-endian machines, we`
Packit	06dd63	`could determine the first such byte without any further memory`
Packit	06dd63	`accesses, just by looking at the tmp result from the last loop`
Packit	06dd63	`iteration. But this does not work on big-endian machines.`
Packit	06dd63	`Choose code that works in both cases. */`
Packit	06dd63
Packit	06dd63	`char_ptr = (unsigned char *) longword_ptr;`
Packit	06dd63	`while (*char_ptr != c)`
Packit	06dd63	`char_ptr++;`
Packit	06dd63	`return (void *) char_ptr;`
Packit	06dd63	`}`

source-git / augeas

Source Code

Blame gnulib/lib/rawmemchr.c