diff --git a/deps/http-parser/LICENSE-MIT b/deps/http-parser/LICENSE-MIT new file mode 100644 index 0000000..58010b3 --- /dev/null +++ b/deps/http-parser/LICENSE-MIT @@ -0,0 +1,23 @@ +http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright +Igor Sysoev. + +Additional changes are licensed under the same terms as NGINX and +copyright Joyent, Inc. and other Node contributors. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/deps/http-parser/http_parser.c b/deps/http-parser/http_parser.c new file mode 100644 index 0000000..27bdd20 --- /dev/null +++ b/deps/http-parser/http_parser.c @@ -0,0 +1,2177 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include +#include + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#ifndef ELEM_AT +# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->http_errno = (e); \ +} while(0) + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#if HTTP_PARSER_STRICT +#define TOKEN(c) (tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(const http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + /* The schema must start with an alpha character. After that, it may + * consist of digits, '+', '-' or '.', followed by a ':'. + */ + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHANUM(ch) || ch == '+' || ch == '-' || ch == '.') { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* FALLTHROUGH */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (parser->state) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (parser->state == s_header_field) + header_field_mark = data; + if (parser->state == s_header_value) + header_value_mark = data; + switch (parser->state) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_server: + case s_req_server_with_at: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(parser->state)) { + ++parser->nread; + /* Buffer overflow attack */ + if (parser->nread > HTTP_MAX_HEADER_SIZE) { + SET_ERRNO(HPE_HEADER_OVERFLOW); + goto error; + } + } + + reexecute_byte: + switch (parser->state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (ch == CR || ch == LF) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + parser->state = s_res_or_resp_H; + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + parser->state = s_start_req; + goto reexecute_byte; + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + parser->state = s_res_HT; + } else { + if (ch != 'E') { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + parser->state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + switch (ch) { + case 'H': + parser->state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '0' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + parser->state = s_res_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + parser->state = s_res_first_status_code; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + parser->state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + parser->state = s_res_status; + break; + case CR: + parser->state = s_res_line_almost_done; + break; + case LF: + parser->state = s_header_field_start; + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + parser->state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + parser->state = s_header_field_start; + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (!IS_ALPHA(ch)) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + parser->state = s_req_method; + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (ch == '\0') { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + parser->state = s_req_spaces_before_url; + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (parser->index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (parser->index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } else { + goto error; + } + } else if (parser->method == HTTP_MKCOL) { + if (parser->index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (parser->index == 1 && ch == '-') { + parser->method = HTTP_MSEARCH; + } else if (parser->index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } else { + goto error; + } + } else if (parser->method == HTTP_SUBSCRIBE) { + if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_SEARCH; + } else { + goto error; + } + } else if (parser->index == 1 && parser->method == HTTP_POST) { + if (ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (ch == 'U') { + parser->method = HTTP_PUT; /* or HTTP_PURGE */ + } else if (ch == 'A') { + parser->method = HTTP_PATCH; + } else { + goto error; + } + } else if (parser->index == 2) { + if (parser->method == HTTP_PUT) { + if (ch == 'R') parser->method = HTTP_PURGE; + } else if (parser->method == HTTP_UNLOCK) { + if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE; + } + } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + parser->state = s_req_server_start; + } + + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_server: + case s_req_server_with_at: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + parser->state = s_req_http_start; + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + parser->state = (ch == CR) ? + s_req_line_almost_done : + s_header_field_start; + CALLBACK_DATA(url); + break; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + parser->state = s_req_http_H; + break; + case ' ': + break; + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + parser->state = s_req_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + parser->state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + parser->state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + parser->state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + parser->state = s_headers_almost_done; + goto reexecute_byte; + } + + c = TOKEN(ch); + + if (!c) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + parser->state = s_header_field; + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (parser->header_state) { + case h_general: + break; + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + parser->state = s_header_value_start; + CALLBACK_DATA(header_field); + break; + } + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_field); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_start: + { + if (ch == ' ' || ch == '\t') break; + + MARK(header_value); + + parser->state = s_header_value; + parser->index = 0; + + if (ch == CR) { + parser->header_state = h_general; + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_value); + break; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_general; + } + break; + + case h_content_length: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else { + parser->header_state = h_general; + } + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_almost_done; + CALLBACK_DATA_NOADVANCE(header_value); + goto reexecute_byte; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + { + uint64_t t; + + if (ch == ' ') break; + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + parser->header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + parser->header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CLOSE)-2) { + parser->header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + parser->state = s_header_value; + parser->header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + { + STRICT_CHECK(ch != LF); + + parser->state = s_header_value_lws; + + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') + parser->state = s_header_value_start; + else + { + parser->state = s_header_field_start; + goto reexecute_byte; + } + break; + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + } + + parser->state = s_headers_done; + + /* Set this here so that on_headers_complete() callbacks can see it */ + parser->upgrade = + (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + return p - data; /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return p - data; + } + + goto reexecute_byte; + } + + case s_headers_done: + { + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + return (p - data) + 1; + } + + if (parser->flags & F_SKIPBODY) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + parser->state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + parser->state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || + !http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + parser->state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_message_done; + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + goto reexecute_byte; + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + + case s_chunk_size_start: + { + assert(parser->nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (unhex_val == -1) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + parser->state = s_chunk_size; + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + parser->state = s_chunk_parameters; + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this. TODO check for overflow */ + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + parser->state = s_header_field_start; + } else { + parser->state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_chunk_data_almost_done; + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + parser->state = s_chunk_data_done; + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + parser->state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran our of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + + return len; + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + return (p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (const http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (const http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * +http_method_str (enum http_method m) +{ + return ELEM_AT(method_strings, m, ""); +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +const char * +http_errno_name(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].description; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* FALLTHROUGH */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':') { + return s_http_host_v6; + } + + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + uf = old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* FALLTROUGH */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} + +int +http_body_is_final(const struct http_parser *parser) { + return parser->state == s_message_done; +} diff --git a/deps/http-parser/http_parser.h b/deps/http-parser/http_parser.h new file mode 100644 index 0000000..67e1d95 --- /dev/null +++ b/deps/http-parser/http_parser.h @@ -0,0 +1,305 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 0 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +typedef SIZE_T size_t; +typedef SSIZE_T ssize_t; +#elif defined(__sun) || defined(__sun__) +#include +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_url" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* webdav */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + /* subversion */ \ + XX(16, REPORT, REPORT) \ + XX(17, MKACTIVITY, MKACTIVITY) \ + XX(18, CHECKOUT, CHECKOUT) \ + XX(19, MERGE, MERGE) \ + /* upnp */ \ + XX(20, MSEARCH, M-SEARCH) \ + XX(21, NOTIFY, NOTIFY) \ + XX(22, SUBSCRIBE, SUBSCRIBE) \ + XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(24, PATCH, PATCH) \ + XX(25, PURGE, PURGE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + + +struct http_parser { + /** PRIVATE **/ + unsigned char type : 2; /* enum http_parser_type */ + unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ + unsigned char state; /* enum state from http_parser.c */ + unsigned char header_state; /* enum header_state from http_parser.c */ + unsigned char index; /* index into current matcher */ + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned short status_code; /* responses only */ + unsigned char method; /* requests only */ + unsigned char http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned char upgrade : 1; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns 0, then this should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(const http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +/* Checks if this is the final chunk of the body. */ +int http_body_is_final(const http_parser *parser); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/deps/regex/config.h b/deps/regex/config.h new file mode 100644 index 0000000..9537069 --- /dev/null +++ b/deps/regex/config.h @@ -0,0 +1,7 @@ +#ifndef _REGEX_CONFIG_H_ +#define _REGEX_CONFIG_H_ + +# define GAWK +# define NO_MBSUPPORT + +#endif diff --git a/deps/regex/regcomp.c b/deps/regex/regcomp.c new file mode 100644 index 0000000..43bffbc --- /dev/null +++ b/deps/regex/regcomp.c @@ -0,0 +1,3857 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + + +#ifdef ZOS_USS + +/* For ZOS USS we must define btowc */ + +wchar_t +btowc (int c) +{ + wchar_t wtmp[2]; + char tmp[2]; + + tmp[0] = c; + tmp[1] = 0; + + mbtowc (wtmp, tmp, 1); + return wtmp[0]; +} +#endif + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (const char *pattern, + size_t length, + struct re_pattern_buffer *bufp) +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (reg_syntax_t syntax) +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (struct re_pattern_buffer *bufp) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (__mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + re_free (buf); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + int i; + +# ifdef _LIBC + /* See if we have to try all bytes which start multiple collation + elements. + e.g. In da_DK, we want to catch 'a' since "aa" is a valid + collation element, and don't catch 'b' since 'b' is + the only collation element which starts from 'b' (and + it is caught by SIMPLE_BRACKET). */ + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 + && (cset->ncoll_syms || cset->nranges)) + { + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# endif /* _LIBC */ + + /* See if we have to start the match at all multibyte characters, + i.e. where we would not find an invalid sequence. This only + applies to multibyte character sets; for single byte character + sets, the SIMPLE_BRACKET again suffices. */ + if (dfa->mb_cur_max > 1 + && (cset->nchar_classes || cset->non_match || cset->nranges +# ifdef _LIBC + || cset->nequiv_classes +# endif /* _LIBC */ + )) + { + unsigned char c = 0; + do + { + mbstate_t mbs; + memset (&mbs, 0, sizeof (mbs)); + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) + re_set_fastmap (fastmap, false, (int) c); + } + while (++c != 0); + } + + else + { + /* ... Else catch all bytes which can start the mbchars. */ + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, false, *(unsigned char *) buf); + } + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (regex_t *__restrict preg, + const char *__restrict pattern, + int cflags) +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror(int errcode, UNUSED const regex_t *__restrict preg, + char *__restrict errbuf, size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +#if __GNUC__ >= 3 +static const bitset_t utf8_sb_map = { + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#else /* ! (__GNUC__ >= 3) */ +static bitset_t utf8_sb_map; +#endif /* __GNUC__ >= 3 */ +#endif /* RE_ENABLE_I18N */ + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + unsigned int i; + int j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else + dfa->is_utf8 = 1; + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + { +#if !defined(__GNUC__) || __GNUC__ < 3 + static short utf8_sb_map_inited = 0; + + if (! utf8_sb_map_inited) + { + int i; + + utf8_sb_map_inited = 0; + for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++) + utf8_sb_map[i] = BITSET_WORD_MAX; + } +#endif + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + } + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + reg_errcode_t err = re_node_set_merge (&init_nodes, + dfa->eclosures + + dest_idx); + if (err != REG_NOERROR) + return err; + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.ctx_type) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. It's okay to test + opr.ctx_type since constraints (for all DFA nodes) are + created by ORing one or more opr.ctx_type values. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + unsigned int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != (int)i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + if (node->token.type == ANCHOR) + dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (UNUSED void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* If the node is root_node itself, it means the epsilon clsoure + has a loop. Then tie it to the destination of the root_node. */ + if (org_node == root_node && clone_node != org_node) + { + ret = re_node_set_insert (dfa->edests + clone_node, org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + /* In case of the node has another constraint, add it. */ + constraint |= dfa->nodes[org_node].constraint; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There is no such duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There is a duplicated node which satisfies the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int ret; + unsigned int src, idx; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + int idx; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + size_t node_idx; + int incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + int i; + re_node_set eclosure; + int ret; + int incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + /* If the current node has constraints, duplicate all nodes + since they must inherit the constraints. */ + if (dfa->nodes[node].constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, + dfa->nodes[node].constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + err = re_node_set_merge (&eclosure, &eclosure_elem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* An epsilon closure includes itself. */ + ret = re_node_set_insert (&eclosure, node); + if (BE (ret < 0, 0)) + return REG_ESPACE; + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression : + CAT + / \ + / \ + EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + |: + ALT + / \ + / \ + + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + : + CAT + / \ + / \ + + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "", + it must not be "". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + "alnum", + "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + "space", + "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (): + SUBEXP + | + +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); +#ifndef RE_TOKEN_INIT_BUG + re_token_t start_token = *token; +#else + re_token_t start_token; + + memcpy ((void *) &start_token, (void *) token, sizeof start_token); +#endif + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* {} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "{n,m}" to "...{0,}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); +#ifdef GAWK + /* + * Fedora Core 2, maybe others, have broken `btowc' that returns -1 + * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are + * unsigned, so we don't have sign extension problems. + */ + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? start_ch : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? end_ch : end_elem->opr.wch); +#else + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); +#endif + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environment. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + if (nrules != 0) + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\n'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + (const char *) start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, UNUSED re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1 & 0xffffff]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + /* Compare only if the length matches and the collation rule + index is the same. */ + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) + { + int cnt = 0; + + while (cnt <= len && + weights[(idx1 & 0xffffff) + 1 + cnt] + == weights[(idx2 & 0xffffff) + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0)) + class_name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (class_name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (class_name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (class_name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (class_name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (class_name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (class_name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (class_name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (class_name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (class_name, "blank") == 0) +#ifndef GAWK + BUILD_CHARCLASS_LOOP (isblank); +#else + /* see comments above */ + BUILD_CHARCLASS_LOOP (is_blank); +#endif + else if (strcmp (class_name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (class_name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (class_name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (UNUSED void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/deps/regex/regex.c b/deps/regex/regex.c new file mode 100644 index 0000000..225a001 --- /dev/null +++ b/deps/regex/regex.c @@ -0,0 +1,92 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#include "config.h" + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +#if defined (_MSC_VER) +#include /* for size_t */ +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before , which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include + +#ifdef GAWK +#undef alloca +#define alloca alloca_is_bad_you_should_never_use_it +#endif +#include +#include "regex_internal.h" + +#include "regex_internal.c" + +#ifdef GAWK +# define bool int + +# ifndef true +# define true (1) +# endif + +# ifndef false +# define false (0) +# endif +#endif +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif diff --git a/deps/regex/regex.h b/deps/regex/regex.h new file mode 100644 index 0000000..61c9683 --- /dev/null +++ b/deps/regex/regex.h @@ -0,0 +1,582 @@ +#include +#include + +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#ifdef HAVE_STDDEF_H +#include +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifndef _LIBC +#define __USE_GNU 1 +#endif + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +#ifdef __USE_GNU +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +# define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#endif + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +#ifdef __USE_GNU +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INVALID_INTERVAL_ORD) \ + & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS \ + | RE_INVALID_INTERVAL_ORD) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +# define RE_DUP_MAX (0x7fff) +#endif + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define __RE_TRANSLATE_TYPE unsigned char * +# ifdef __USE_GNU +# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE +# endif +#endif + +#ifdef __USE_GNU +# define __REPB_PREFIX(name) name +#else +# define __REPB_PREFIX(name) __##name +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *__REPB_PREFIX(buffer); + + /* Number of bytes to which `buffer' points. */ + unsigned long int __REPB_PREFIX(allocated); + + /* Number of bytes actually used in `buffer'. */ + unsigned long int __REPB_PREFIX(used); + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t __REPB_PREFIX(syntax); + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *__REPB_PREFIX(fastmap); + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned __REPB_PREFIX(can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned __REPB_PREFIX(regs_allocated) : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned __REPB_PREFIX(fastmap_accurate) : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned __REPB_PREFIX(no_sub) : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned __REPB_PREFIX(not_bol) : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned __REPB_PREFIX(not_eol) : 1; + + /* If true, an anchor at a newline matches. */ + unsigned __REPB_PREFIX(newline_anchor) : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +#ifdef __USE_GNU +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +# ifndef RE_NREGS +# define RE_NREGS 30 +# endif +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +#ifdef __USE_GNU +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); +#endif /* Use GNU */ + +#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __cstring, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/deps/regex/regex_internal.c b/deps/regex/regex_internal.c new file mode 100644 index 0000000..ad57c20 --- /dev/null +++ b/deps/regex/regex_internal.c @@ -0,0 +1,1744 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +#ifdef GAWK +#undef MAX /* safety */ +static size_t +MAX(size_t a, size_t b) +{ + return (a > b ? a : b); +} +#endif + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs; + + /* Avoid overflow in realloc. */ + const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int)); + if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) + return REG_ESPACE; + + new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + (0), (1), (0), (1), + Then wide character buffer will be: + , WEOF , , WEOF , + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wint_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + wchar_t wc2; + int remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + else + wc = (wint_t) wc2; + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { +#ifdef RE_ENABLE_I18N + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + /* + * ADR: valgrind says size can be 0, which then doesn't + * free the block of size 0. Harumph. This seems + * to work ok, though. + */ + if (size == 0) + { + memset(set, 0, sizeof(*set)); + return REG_NOERROR; + } + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows in realloc. */ + const size_t max_object_size = MAX (sizeof (re_token_t), + MAX (sizeof (re_node_set), + sizeof (int))); + if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0) + return REG_ESPACE; + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + if (re_node_set_init_copy (newstate->entrance_nodes, nodes) + != REG_NOERROR) + return NULL; + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/deps/regex/regex_internal.h b/deps/regex/regex_internal.h new file mode 100644 index 0000000..53ccebe --- /dev/null +++ b/deps/regex/regex_internal.h @@ -0,0 +1,819 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include +#include +#include +#include +#include + +#ifndef UNUSED +# ifdef __GNUC__ +# define UNUSED __attribute__((unused)) +# else +# define UNUSED +# endif +#endif + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if !defined(ZOS_USS) +#if defined HAVE_STDINT_H || defined _LIBC +# include +#endif /* HAVE_STDINT_H || _LIBC */ +#endif /* !ZOS_USS */ +#if defined _LIBC +# include +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +#ifndef GAWK +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif +#else /* GAWK */ +/* + * This is a mess. On glibc systems you have to define + * a magic constant to get isblank() out of , since it's + * a C99 function. To heck with all that and borrow a page from + * dfa.c's book. + */ + +static int +is_blank (int c) +{ + return (c == ' ' || c == '\t'); +} +#endif /* GAWK */ + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include +# include +# include +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#ifndef NO_MBSUPPORT +#include "mbsupport.h" /* gawk */ +#endif +#ifndef MB_CUR_MAX +#define MB_CUR_MAX 1 +#endif + +#if (defined MBS_SUPPORT) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# ifdef inline +# undef inline +# endif +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# ifdef __wctype +# undef __wctype +# endif +# define __wctype wctype +# ifdef __iswctype +# undef __iswctype +# endif +# define __iswctype iswctype +# define __btowc btowc +# define __mbrtowc mbrtowc +#undef __mempcpy /* GAWK */ +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. Cast to int as most code use it + * like that for counting */ +#define BITSET_WORD_BITS ((int)(sizeof (bitset_word_t) * CHAR_BIT)) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +#ifndef NOT_IN_libc +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +# ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) + internal_function; +# endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#endif +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifndef _LIBC +# if HAVE_ALLOCA +# if (_MSC_VER) +# include +# define __libc_use_alloca(n) 0 +# else +# include +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# endif +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */ +#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif +#if defined _LIBC + __libc_lock_define (, lock) +#endif +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +# ifndef NOT_IN_libc +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +# endif +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/deps/regex/regexec.c b/deps/regex/regexec.c new file mode 100644 index 0000000..0a1602e --- /dev/null +++ b/deps/regex/regexec.c @@ -0,0 +1,4369 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags); +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len); +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len); +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + unsigned int nregs, int regs_allocated); +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec ( + const regex_t *__restrict preg, + const char *__restrict string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + int start, length; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (struct re_pattern_buffer *bufp, + const char *string, + int length, + int start, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (struct re_pattern_buffer *bufp, + const char *string, + int length, int start, int range, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, + int stop, int ret_len) +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, + struct re_registers *regs, int ret_len) +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (struct re_registers *regs, + regmatch_t *pmatch, + unsigned int nregs, int regs_allocated) +{ + int rval = REGS_REALLOCATE; + unsigned int i; + unsigned int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0)) + return REGS_UNALLOCATED; + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->end == NULL, 0)) + { + re_free (regs->start); + return REGS_UNALLOCATED; + } + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end; + if (BE (new_start == NULL, 0)) + return REGS_UNALLOCATED; + new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_end == NULL, 0)) + { + re_free (new_start); + return REGS_UNALLOCATED; + } + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (struct re_pattern_buffer *bufp, + struct re_registers *regs, + unsigned num_regs, + regoff_t *starts, + regoff_t *ends) +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (const regex_t *preg, + const char *string, + int length, int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + unsigned int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= (size_t)mctx.input.bufs_len, 0)) + { + err = REG_ESPACE; + goto free_return; + } + + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + unsigned int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != (int)reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (re_match_context_t *mctx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= (size_t)match_last, 0)) + return REG_ESPACE; + + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else +#endif + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + unsigned int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + { + err = re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + } + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + ( ) + ( ) + ( ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; +#ifdef RE_ENABLE_I18N + reg_errcode_t err = REG_NOERROR; +#endif + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else +#endif + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + /* Avoid arithmetic overflow in size calculation. */ + if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))) + < (size_t)ndests), + 0)) + goto out_free; + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else +#endif + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + wint_t wc; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + wc = __btowc(*(input->mbs+str_idx)); + if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX)) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + /* This #include defines a local function! */ +# include + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + int32_t idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) + { + int cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Avoid overflow. */ + if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= (size_t)pstr->bufs_len, 0)) + return REG_ESPACE; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/deps/winhttp/urlmon.h b/deps/winhttp/urlmon.h new file mode 100644 index 0000000..4143d50 --- /dev/null +++ b/deps/winhttp/urlmon.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#if defined(__MINGW_VERSION) || defined(__MINGW32_VERSION) + +#ifndef __CUSTOM_URLMON_H +#define __CUSTOM_URLMON_H + +typedef struct IInternetSecurityManager IInternetSecurityManager; + +typedef struct IInternetSecurityManagerVtbl +{ + HRESULT(STDMETHODCALLTYPE *QueryInterface)(IInternetSecurityManager *, REFIID, void **); + ULONG(STDMETHODCALLTYPE *AddRef)(IInternetSecurityManager *); + ULONG(STDMETHODCALLTYPE *Release)(IInternetSecurityManager *); + LPVOID SetSecuritySite; + LPVOID GetSecuritySite; + HRESULT(STDMETHODCALLTYPE *MapUrlToZone)(IInternetSecurityManager *, LPCWSTR, DWORD *, DWORD); + LPVOID GetSecurityId; + LPVOID ProcessUrlAction; + LPVOID QueryCustomPolicy; + LPVOID SetZoneMapping; + LPVOID GetZoneMappings; +} IInternetSecurityManagerVtbl; + +struct IInternetSecurityManager +{ + CONST_VTBL struct IInternetSecurityManagerVtbl *lpVtbl; +}; + +#define URLZONE_LOCAL_MACHINE 0 +#define URLZONE_INTRANET 1 +#define URLZONE_TRUSTED 2 + +#endif /* __CUSTOM_URLMON_H */ + +#else + +#include_next + +#endif diff --git a/deps/winhttp/winhttp.def b/deps/winhttp/winhttp.def new file mode 100644 index 0000000..eecce59 --- /dev/null +++ b/deps/winhttp/winhttp.def @@ -0,0 +1,29 @@ +LIBRARY WINHTTP +EXPORTS +WinHttpAddRequestHeaders@16 +WinHttpCheckPlatform@0 +WinHttpCloseHandle@4 +WinHttpConnect@16 +WinHttpCrackUrl@16 +WinHttpCreateUrl@16 +WinHttpDetectAutoProxyConfigUrl@8 +WinHttpGetDefaultProxyConfiguration@4 +WinHttpGetIEProxyConfigForCurrentUser@4 +WinHttpGetProxyForUrl@16 +WinHttpOpen@20 +WinHttpOpenRequest@28 +WinHttpQueryAuthSchemes@16 +WinHttpQueryDataAvailable@8 +WinHttpQueryHeaders@24 +WinHttpQueryOption@16 +WinHttpReadData@16 +WinHttpReceiveResponse@8 +WinHttpSendRequest@28 +WinHttpSetCredentials@24 +WinHttpSetDefaultProxyConfiguration@4 +WinHttpSetOption@16 +WinHttpSetStatusCallback@16 +WinHttpSetTimeouts@20 +WinHttpTimeFromSystemTime@8 +WinHttpTimeToSystemTime@8 +WinHttpWriteData@16 diff --git a/deps/winhttp/winhttp.h b/deps/winhttp/winhttp.h new file mode 100644 index 0000000..b7fef1c --- /dev/null +++ b/deps/winhttp/winhttp.h @@ -0,0 +1,594 @@ +/* + * Copyright (C) 2007 Francois Gouget + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#if defined(__MINGW_VERSION) || defined(__MINGW32_VERSION) + +#ifndef __WINE_WINHTTP_H +#define __WINE_WINHTTP_H + +#ifdef _WIN64 +#include +#else +#include +#endif + +#define WINHTTPAPI +#define BOOLAPI WINHTTPAPI BOOL WINAPI + + +typedef LPVOID HINTERNET; +typedef HINTERNET *LPHINTERNET; + +#define INTERNET_DEFAULT_PORT 0 +#define INTERNET_DEFAULT_HTTP_PORT 80 +#define INTERNET_DEFAULT_HTTPS_PORT 443 +typedef WORD INTERNET_PORT; +typedef INTERNET_PORT *LPINTERNET_PORT; + +#define INTERNET_SCHEME_HTTP 1 +#define INTERNET_SCHEME_HTTPS 2 +typedef int INTERNET_SCHEME, *LPINTERNET_SCHEME; + +#define ICU_ESCAPE 0x80000000 + +/* flags for WinHttpOpen */ +#define WINHTTP_FLAG_ASYNC 0x10000000 + +/* flags for WinHttpOpenRequest */ +#define WINHTTP_FLAG_ESCAPE_PERCENT 0x00000004 +#define WINHTTP_FLAG_NULL_CODEPAGE 0x00000008 +#define WINHTTP_FLAG_ESCAPE_DISABLE 0x00000040 +#define WINHTTP_FLAG_ESCAPE_DISABLE_QUERY 0x00000080 +#define WINHTTP_FLAG_BYPASS_PROXY_CACHE 0x00000100 +#define WINHTTP_FLAG_REFRESH WINHTTP_FLAG_BYPASS_PROXY_CACHE +#define WINHTTP_FLAG_SECURE 0x00800000 + +#define WINHTTP_ACCESS_TYPE_DEFAULT_PROXY 0 +#define WINHTTP_ACCESS_TYPE_NO_PROXY 1 +#define WINHTTP_ACCESS_TYPE_NAMED_PROXY 3 + +#define WINHTTP_NO_PROXY_NAME NULL +#define WINHTTP_NO_PROXY_BYPASS NULL + +#define WINHTTP_NO_REFERER NULL +#define WINHTTP_DEFAULT_ACCEPT_TYPES NULL + +#define WINHTTP_NO_ADDITIONAL_HEADERS NULL +#define WINHTTP_NO_REQUEST_DATA NULL + +#define WINHTTP_HEADER_NAME_BY_INDEX NULL +#define WINHTTP_NO_OUTPUT_BUFFER NULL +#define WINHTTP_NO_HEADER_INDEX NULL + +#define WINHTTP_ADDREQ_INDEX_MASK 0x0000FFFF +#define WINHTTP_ADDREQ_FLAGS_MASK 0xFFFF0000 +#define WINHTTP_ADDREQ_FLAG_ADD_IF_NEW 0x10000000 +#define WINHTTP_ADDREQ_FLAG_ADD 0x20000000 +#define WINHTTP_ADDREQ_FLAG_COALESCE_WITH_COMMA 0x40000000 +#define WINHTTP_ADDREQ_FLAG_COALESCE_WITH_SEMICOLON 0x01000000 +#define WINHTTP_ADDREQ_FLAG_COALESCE WINHTTP_ADDREQ_FLAG_COALESCE_WITH_COMMA +#define WINHTTP_ADDREQ_FLAG_REPLACE 0x80000000 + +#define WINHTTP_IGNORE_REQUEST_TOTAL_LENGTH 0 + +/* flags for WinHttp{Set/Query}Options */ +#define WINHTTP_FIRST_OPTION WINHTTP_OPTION_CALLBACK +#define WINHTTP_OPTION_CALLBACK 1 +#define WINHTTP_OPTION_RESOLVE_TIMEOUT 2 +#define WINHTTP_OPTION_CONNECT_TIMEOUT 3 +#define WINHTTP_OPTION_CONNECT_RETRIES 4 +#define WINHTTP_OPTION_SEND_TIMEOUT 5 +#define WINHTTP_OPTION_RECEIVE_TIMEOUT 6 +#define WINHTTP_OPTION_RECEIVE_RESPONSE_TIMEOUT 7 +#define WINHTTP_OPTION_HANDLE_TYPE 9 +#define WINHTTP_OPTION_READ_BUFFER_SIZE 12 +#define WINHTTP_OPTION_WRITE_BUFFER_SIZE 13 +#define WINHTTP_OPTION_PARENT_HANDLE 21 +#define WINHTTP_OPTION_EXTENDED_ERROR 24 +#define WINHTTP_OPTION_SECURITY_FLAGS 31 +#define WINHTTP_OPTION_SECURITY_CERTIFICATE_STRUCT 32 +#define WINHTTP_OPTION_URL 34 +#define WINHTTP_OPTION_SECURITY_KEY_BITNESS 36 +#define WINHTTP_OPTION_PROXY 38 +#define WINHTTP_OPTION_USER_AGENT 41 +#define WINHTTP_OPTION_CONTEXT_VALUE 45 +#define WINHTTP_OPTION_CLIENT_CERT_CONTEXT 47 +#define WINHTTP_OPTION_REQUEST_PRIORITY 58 +#define WINHTTP_OPTION_HTTP_VERSION 59 +#define WINHTTP_OPTION_DISABLE_FEATURE 63 +#define WINHTTP_OPTION_CODEPAGE 68 +#define WINHTTP_OPTION_MAX_CONNS_PER_SERVER 73 +#define WINHTTP_OPTION_MAX_CONNS_PER_1_0_SERVER 74 +#define WINHTTP_OPTION_AUTOLOGON_POLICY 77 +#define WINHTTP_OPTION_SERVER_CERT_CONTEXT 78 +#define WINHTTP_OPTION_ENABLE_FEATURE 79 +#define WINHTTP_OPTION_WORKER_THREAD_COUNT 80 +#define WINHTTP_OPTION_PASSPORT_COBRANDING_TEXT 81 +#define WINHTTP_OPTION_PASSPORT_COBRANDING_URL 82 +#define WINHTTP_OPTION_CONFIGURE_PASSPORT_AUTH 83 +#define WINHTTP_OPTION_SECURE_PROTOCOLS 84 +#define WINHTTP_OPTION_ENABLETRACING 85 +#define WINHTTP_OPTION_PASSPORT_SIGN_OUT 86 +#define WINHTTP_OPTION_PASSPORT_RETURN_URL 87 +#define WINHTTP_OPTION_REDIRECT_POLICY 88 +#define WINHTTP_OPTION_MAX_HTTP_AUTOMATIC_REDIRECTS 89 +#define WINHTTP_OPTION_MAX_HTTP_STATUS_CONTINUE 90 +#define WINHTTP_OPTION_MAX_RESPONSE_HEADER_SIZE 91 +#define WINHTTP_OPTION_MAX_RESPONSE_DRAIN_SIZE 92 +#define WINHTTP_OPTION_CONNECTION_INFO 93 +#define WINHTTP_OPTION_CLIENT_CERT_ISSUER_LIST 94 +#define WINHTTP_OPTION_SPN 96 +#define WINHTTP_OPTION_GLOBAL_PROXY_CREDS 97 +#define WINHTTP_OPTION_GLOBAL_SERVER_CREDS 98 +#define WINHTTP_OPTION_UNLOAD_NOTIFY_EVENT 99 +#define WINHTTP_OPTION_REJECT_USERPWD_IN_URL 100 +#define WINHTTP_OPTION_USE_GLOBAL_SERVER_CREDENTIALS 101 +#define WINHTTP_LAST_OPTION WINHTTP_OPTION_USE_GLOBAL_SERVER_CREDENTIALS +#define WINHTTP_OPTION_USERNAME 0x1000 +#define WINHTTP_OPTION_PASSWORD 0x1001 +#define WINHTTP_OPTION_PROXY_USERNAME 0x1002 +#define WINHTTP_OPTION_PROXY_PASSWORD 0x1003 + +#define WINHTTP_CONNS_PER_SERVER_UNLIMITED 0xFFFFFFFF + +#define WINHTTP_AUTOLOGON_SECURITY_LEVEL_MEDIUM 0 +#define WINHTTP_AUTOLOGON_SECURITY_LEVEL_LOW 1 +#define WINHTTP_AUTOLOGON_SECURITY_LEVEL_HIGH 2 +#define WINHTTP_AUTOLOGON_SECURITY_LEVEL_DEFAULT WINHTTP_AUTOLOGON_SECURITY_LEVEL_MEDIUM + +#define WINHTTP_OPTION_REDIRECT_POLICY_NEVER 0 +#define WINHTTP_OPTION_REDIRECT_POLICY_DISALLOW_HTTPS_TO_HTTP 1 +#define WINHTTP_OPTION_REDIRECT_POLICY_ALWAYS 2 +#define WINHTTP_OPTION_REDIRECT_POLICY_LAST WINHTTP_OPTION_REDIRECT_POLICY_ALWAYS +#define WINHTTP_OPTION_REDIRECT_POLICY_DEFAULT WINHTTP_OPTION_REDIRECT_POLICY_DISALLOW_HTTPS_TO_HTTP + +#define WINHTTP_DISABLE_PASSPORT_AUTH 0x00000000 +#define WINHTTP_ENABLE_PASSPORT_AUTH 0x10000000 +#define WINHTTP_DISABLE_PASSPORT_KEYRING 0x20000000 +#define WINHTTP_ENABLE_PASSPORT_KEYRING 0x40000000 + +#define WINHTTP_DISABLE_COOKIES 0x00000001 +#define WINHTTP_DISABLE_REDIRECTS 0x00000002 +#define WINHTTP_DISABLE_AUTHENTICATION 0x00000004 +#define WINHTTP_DISABLE_KEEP_ALIVE 0x00000008 +#define WINHTTP_ENABLE_SSL_REVOCATION 0x00000001 +#define WINHTTP_ENABLE_SSL_REVERT_IMPERSONATION 0x00000002 +#define WINHTTP_DISABLE_SPN_SERVER_PORT 0x00000000 +#define WINHTTP_ENABLE_SPN_SERVER_PORT 0x00000001 +#define WINHTTP_OPTION_SPN_MASK WINHTTP_ENABLE_SPN_SERVER_PORT + +/* Options for WinHttpOpenRequest */ +#define WINHTTP_NO_REFERER NULL +#define WINHTTP_DEFAULT_ACCEPT_TYPES NULL + +/* Options for WinHttpSendRequest */ +#define WINHTTP_NO_ADDITIONAL_HEADERS NULL +#define WINHTTP_NO_REQUEST_DATA NULL + +/* WinHTTP error codes */ +#define WINHTTP_ERROR_BASE 12000 +#define ERROR_WINHTTP_OUT_OF_HANDLES (WINHTTP_ERROR_BASE + 1) +#define ERROR_WINHTTP_TIMEOUT (WINHTTP_ERROR_BASE + 2) +#define ERROR_WINHTTP_INTERNAL_ERROR (WINHTTP_ERROR_BASE + 4) +#define ERROR_WINHTTP_INVALID_URL (WINHTTP_ERROR_BASE + 5) +#define ERROR_WINHTTP_UNRECOGNIZED_SCHEME (WINHTTP_ERROR_BASE + 6) +#define ERROR_WINHTTP_NAME_NOT_RESOLVED (WINHTTP_ERROR_BASE + 7) +#define ERROR_WINHTTP_INVALID_OPTION (WINHTTP_ERROR_BASE + 9) +#define ERROR_WINHTTP_OPTION_NOT_SETTABLE (WINHTTP_ERROR_BASE + 11) +#define ERROR_WINHTTP_SHUTDOWN (WINHTTP_ERROR_BASE + 12) +#define ERROR_WINHTTP_LOGIN_FAILURE (WINHTTP_ERROR_BASE + 15) +#define ERROR_WINHTTP_OPERATION_CANCELLED (WINHTTP_ERROR_BASE + 17) +#define ERROR_WINHTTP_INCORRECT_HANDLE_TYPE (WINHTTP_ERROR_BASE + 18) +#define ERROR_WINHTTP_INCORRECT_HANDLE_STATE (WINHTTP_ERROR_BASE + 19) +#define ERROR_WINHTTP_CANNOT_CONNECT (WINHTTP_ERROR_BASE + 29) +#define ERROR_WINHTTP_CONNECTION_ERROR (WINHTTP_ERROR_BASE + 30) +#define ERROR_WINHTTP_RESEND_REQUEST (WINHTTP_ERROR_BASE + 32) +#define ERROR_WINHTTP_SECURE_CERT_DATE_INVALID (WINHTTP_ERROR_BASE + 37) +#define ERROR_WINHTTP_SECURE_CERT_CN_INVALID (WINHTTP_ERROR_BASE + 38) +#define ERROR_WINHTTP_CLIENT_AUTH_CERT_NEEDED (WINHTTP_ERROR_BASE + 44) +#define ERROR_WINHTTP_SECURE_INVALID_CA (WINHTTP_ERROR_BASE + 45) +#define ERROR_WINHTTP_SECURE_CERT_REV_FAILED (WINHTTP_ERROR_BASE + 57) +#define ERROR_WINHTTP_CANNOT_CALL_BEFORE_OPEN (WINHTTP_ERROR_BASE + 100) +#define ERROR_WINHTTP_CANNOT_CALL_BEFORE_SEND (WINHTTP_ERROR_BASE + 101) +#define ERROR_WINHTTP_CANNOT_CALL_AFTER_SEND (WINHTTP_ERROR_BASE + 102) +#define ERROR_WINHTTP_CANNOT_CALL_AFTER_OPEN (WINHTTP_ERROR_BASE + 103) +#define ERROR_WINHTTP_HEADER_NOT_FOUND (WINHTTP_ERROR_BASE + 150) +#define ERROR_WINHTTP_INVALID_SERVER_RESPONSE (WINHTTP_ERROR_BASE + 152) +#define ERROR_WINHTTP_INVALID_HEADER (WINHTTP_ERROR_BASE + 153) +#define ERROR_WINHTTP_INVALID_QUERY_REQUEST (WINHTTP_ERROR_BASE + 154) +#define ERROR_WINHTTP_HEADER_ALREADY_EXISTS (WINHTTP_ERROR_BASE + 155) +#define ERROR_WINHTTP_REDIRECT_FAILED (WINHTTP_ERROR_BASE + 156) +#define ERROR_WINHTTP_SECURE_CHANNEL_ERROR (WINHTTP_ERROR_BASE + 157) +#define ERROR_WINHTTP_BAD_AUTO_PROXY_SCRIPT (WINHTTP_ERROR_BASE + 166) +#define ERROR_WINHTTP_UNABLE_TO_DOWNLOAD_SCRIPT (WINHTTP_ERROR_BASE + 167) +#define ERROR_WINHTTP_SECURE_INVALID_CERT (WINHTTP_ERROR_BASE + 169) +#define ERROR_WINHTTP_SECURE_CERT_REVOKED (WINHTTP_ERROR_BASE + 170) +#define ERROR_WINHTTP_NOT_INITIALIZED (WINHTTP_ERROR_BASE + 172) +#define ERROR_WINHTTP_SECURE_FAILURE (WINHTTP_ERROR_BASE + 175) +#define ERROR_WINHTTP_AUTO_PROXY_SERVICE_ERROR (WINHTTP_ERROR_BASE + 178) +#define ERROR_WINHTTP_SECURE_CERT_WRONG_USAGE (WINHTTP_ERROR_BASE + 179) +#define ERROR_WINHTTP_AUTODETECTION_FAILED (WINHTTP_ERROR_BASE + 180) +#define ERROR_WINHTTP_HEADER_COUNT_EXCEEDED (WINHTTP_ERROR_BASE + 181) +#define ERROR_WINHTTP_HEADER_SIZE_OVERFLOW (WINHTTP_ERROR_BASE + 182) +#define ERROR_WINHTTP_CHUNKED_ENCODING_HEADER_SIZE_OVERFLOW (WINHTTP_ERROR_BASE + 183) +#define ERROR_WINHTTP_RESPONSE_DRAIN_OVERFLOW (WINHTTP_ERROR_BASE + 184) +#define ERROR_WINHTTP_CLIENT_CERT_NO_PRIVATE_KEY (WINHTTP_ERROR_BASE + 185) +#define ERROR_WINHTTP_CLIENT_CERT_NO_ACCESS_PRIVATE_KEY (WINHTTP_ERROR_BASE + 186) +#define WINHTTP_ERROR_LAST (WINHTTP_ERROR_BASE + 186) + +/* WinHttp status codes */ +#define HTTP_STATUS_CONTINUE 100 +#define HTTP_STATUS_SWITCH_PROTOCOLS 101 +#define HTTP_STATUS_OK 200 +#define HTTP_STATUS_CREATED 201 +#define HTTP_STATUS_ACCEPTED 202 +#define HTTP_STATUS_PARTIAL 203 +#define HTTP_STATUS_NO_CONTENT 204 +#define HTTP_STATUS_RESET_CONTENT 205 +#define HTTP_STATUS_PARTIAL_CONTENT 206 +#define HTTP_STATUS_WEBDAV_MULTI_STATUS 207 +#define HTTP_STATUS_AMBIGUOUS 300 +#define HTTP_STATUS_MOVED 301 +#define HTTP_STATUS_REDIRECT 302 +#define HTTP_STATUS_REDIRECT_METHOD 303 +#define HTTP_STATUS_NOT_MODIFIED 304 +#define HTTP_STATUS_USE_PROXY 305 +#define HTTP_STATUS_REDIRECT_KEEP_VERB 307 +#define HTTP_STATUS_BAD_REQUEST 400 +#define HTTP_STATUS_DENIED 401 +#define HTTP_STATUS_PAYMENT_REQ 402 +#define HTTP_STATUS_FORBIDDEN 403 +#define HTTP_STATUS_NOT_FOUND 404 +#define HTTP_STATUS_BAD_METHOD 405 +#define HTTP_STATUS_NONE_ACCEPTABLE 406 +#define HTTP_STATUS_PROXY_AUTH_REQ 407 +#define HTTP_STATUS_REQUEST_TIMEOUT 408 +#define HTTP_STATUS_CONFLICT 409 +#define HTTP_STATUS_GONE 410 +#define HTTP_STATUS_LENGTH_REQUIRED 411 +#define HTTP_STATUS_PRECOND_FAILED 412 +#define HTTP_STATUS_REQUEST_TOO_LARGE 413 +#define HTTP_STATUS_URI_TOO_LONG 414 +#define HTTP_STATUS_UNSUPPORTED_MEDIA 415 +#define HTTP_STATUS_RETRY_WITH 449 +#define HTTP_STATUS_SERVER_ERROR 500 +#define HTTP_STATUS_NOT_SUPPORTED 501 +#define HTTP_STATUS_BAD_GATEWAY 502 +#define HTTP_STATUS_SERVICE_UNAVAIL 503 +#define HTTP_STATUS_GATEWAY_TIMEOUT 504 +#define HTTP_STATUS_VERSION_NOT_SUP 505 +#define HTTP_STATUS_FIRST HTTP_STATUS_CONTINUE +#define HTTP_STATUS_LAST HTTP_STATUS_VERSION_NOT_SUP + +#define SECURITY_FLAG_IGNORE_UNKNOWN_CA 0x00000100 +#define SECURITY_FLAG_IGNORE_CERT_DATE_INVALID 0x00002000 +#define SECURITY_FLAG_IGNORE_CERT_CN_INVALID 0x00001000 +#define SECURITY_FLAG_IGNORE_CERT_WRONG_USAGE 0x00000200 +#define SECURITY_FLAG_SECURE 0x00000001 +#define SECURITY_FLAG_STRENGTH_WEAK 0x10000000 +#define SECURITY_FLAG_STRENGTH_MEDIUM 0x40000000 +#define SECURITY_FLAG_STRENGTH_STRONG 0x20000000 + +#define ICU_NO_ENCODE 0x20000000 +#define ICU_DECODE 0x10000000 +#define ICU_NO_META 0x08000000 +#define ICU_ENCODE_SPACES_ONLY 0x04000000 +#define ICU_BROWSER_MODE 0x02000000 +#define ICU_ENCODE_PERCENT 0x00001000 + +/* Query flags */ +#define WINHTTP_QUERY_MIME_VERSION 0 +#define WINHTTP_QUERY_CONTENT_TYPE 1 +#define WINHTTP_QUERY_CONTENT_TRANSFER_ENCODING 2 +#define WINHTTP_QUERY_CONTENT_ID 3 +#define WINHTTP_QUERY_CONTENT_DESCRIPTION 4 +#define WINHTTP_QUERY_CONTENT_LENGTH 5 +#define WINHTTP_QUERY_CONTENT_LANGUAGE 6 +#define WINHTTP_QUERY_ALLOW 7 +#define WINHTTP_QUERY_PUBLIC 8 +#define WINHTTP_QUERY_DATE 9 +#define WINHTTP_QUERY_EXPIRES 10 +#define WINHTTP_QUERY_LAST_MODIFIED 11 +#define WINHTTP_QUERY_MESSAGE_ID 12 +#define WINHTTP_QUERY_URI 13 +#define WINHTTP_QUERY_DERIVED_FROM 14 +#define WINHTTP_QUERY_COST 15 +#define WINHTTP_QUERY_LINK 16 +#define WINHTTP_QUERY_PRAGMA 17 +#define WINHTTP_QUERY_VERSION 18 +#define WINHTTP_QUERY_STATUS_CODE 19 +#define WINHTTP_QUERY_STATUS_TEXT 20 +#define WINHTTP_QUERY_RAW_HEADERS 21 +#define WINHTTP_QUERY_RAW_HEADERS_CRLF 22 +#define WINHTTP_QUERY_CONNECTION 23 +#define WINHTTP_QUERY_ACCEPT 24 +#define WINHTTP_QUERY_ACCEPT_CHARSET 25 +#define WINHTTP_QUERY_ACCEPT_ENCODING 26 +#define WINHTTP_QUERY_ACCEPT_LANGUAGE 27 +#define WINHTTP_QUERY_AUTHORIZATION 28 +#define WINHTTP_QUERY_CONTENT_ENCODING 29 +#define WINHTTP_QUERY_FORWARDED 30 +#define WINHTTP_QUERY_FROM 31 +#define WINHTTP_QUERY_IF_MODIFIED_SINCE 32 +#define WINHTTP_QUERY_LOCATION 33 +#define WINHTTP_QUERY_ORIG_URI 34 +#define WINHTTP_QUERY_REFERER 35 +#define WINHTTP_QUERY_RETRY_AFTER 36 +#define WINHTTP_QUERY_SERVER 37 +#define WINHTTP_QUERY_TITLE 38 +#define WINHTTP_QUERY_USER_AGENT 39 +#define WINHTTP_QUERY_WWW_AUTHENTICATE 40 +#define WINHTTP_QUERY_PROXY_AUTHENTICATE 41 +#define WINHTTP_QUERY_ACCEPT_RANGES 42 +#define WINHTTP_QUERY_SET_COOKIE 43 +#define WINHTTP_QUERY_COOKIE 44 +#define WINHTTP_QUERY_REQUEST_METHOD 45 +#define WINHTTP_QUERY_REFRESH 46 +#define WINHTTP_QUERY_CONTENT_DISPOSITION 47 +#define WINHTTP_QUERY_AGE 48 +#define WINHTTP_QUERY_CACHE_CONTROL 49 +#define WINHTTP_QUERY_CONTENT_BASE 50 +#define WINHTTP_QUERY_CONTENT_LOCATION 51 +#define WINHTTP_QUERY_CONTENT_MD5 52 +#define WINHTTP_QUERY_CONTENT_RANGE 53 +#define WINHTTP_QUERY_ETAG 54 +#define WINHTTP_QUERY_HOST 55 +#define WINHTTP_QUERY_IF_MATCH 56 +#define WINHTTP_QUERY_IF_NONE_MATCH 57 +#define WINHTTP_QUERY_IF_RANGE 58 +#define WINHTTP_QUERY_IF_UNMODIFIED_SINCE 59 +#define WINHTTP_QUERY_MAX_FORWARDS 60 +#define WINHTTP_QUERY_PROXY_AUTHORIZATION 61 +#define WINHTTP_QUERY_RANGE 62 +#define WINHTTP_QUERY_TRANSFER_ENCODING 63 +#define WINHTTP_QUERY_UPGRADE 64 +#define WINHTTP_QUERY_VARY 65 +#define WINHTTP_QUERY_VIA 66 +#define WINHTTP_QUERY_WARNING 67 +#define WINHTTP_QUERY_EXPECT 68 +#define WINHTTP_QUERY_PROXY_CONNECTION 69 +#define WINHTTP_QUERY_UNLESS_MODIFIED_SINCE 70 +#define WINHTTP_QUERY_PROXY_SUPPORT 75 +#define WINHTTP_QUERY_AUTHENTICATION_INFO 76 +#define WINHTTP_QUERY_PASSPORT_URLS 77 +#define WINHTTP_QUERY_PASSPORT_CONFIG 78 +#define WINHTTP_QUERY_MAX 78 +#define WINHTTP_QUERY_CUSTOM 65535 +#define WINHTTP_QUERY_FLAG_REQUEST_HEADERS 0x80000000 +#define WINHTTP_QUERY_FLAG_SYSTEMTIME 0x40000000 +#define WINHTTP_QUERY_FLAG_NUMBER 0x20000000 + +/* Callback options */ +#define WINHTTP_CALLBACK_STATUS_RESOLVING_NAME 0x00000001 +#define WINHTTP_CALLBACK_STATUS_NAME_RESOLVED 0x00000002 +#define WINHTTP_CALLBACK_STATUS_CONNECTING_TO_SERVER 0x00000004 +#define WINHTTP_CALLBACK_STATUS_CONNECTED_TO_SERVER 0x00000008 +#define WINHTTP_CALLBACK_STATUS_SENDING_REQUEST 0x00000010 +#define WINHTTP_CALLBACK_STATUS_REQUEST_SENT 0x00000020 +#define WINHTTP_CALLBACK_STATUS_RECEIVING_RESPONSE 0x00000040 +#define WINHTTP_CALLBACK_STATUS_RESPONSE_RECEIVED 0x00000080 +#define WINHTTP_CALLBACK_STATUS_CLOSING_CONNECTION 0x00000100 +#define WINHTTP_CALLBACK_STATUS_CONNECTION_CLOSED 0x00000200 +#define WINHTTP_CALLBACK_STATUS_HANDLE_CREATED 0x00000400 +#define WINHTTP_CALLBACK_STATUS_HANDLE_CLOSING 0x00000800 +#define WINHTTP_CALLBACK_STATUS_DETECTING_PROXY 0x00001000 +#define WINHTTP_CALLBACK_STATUS_REDIRECT 0x00004000 +#define WINHTTP_CALLBACK_STATUS_INTERMEDIATE_RESPONSE 0x00008000 +#define WINHTTP_CALLBACK_STATUS_SECURE_FAILURE 0x00010000 +#define WINHTTP_CALLBACK_STATUS_HEADERS_AVAILABLE 0x00020000 +#define WINHTTP_CALLBACK_STATUS_DATA_AVAILABLE 0x00040000 +#define WINHTTP_CALLBACK_STATUS_READ_COMPLETE 0x00080000 +#define WINHTTP_CALLBACK_STATUS_WRITE_COMPLETE 0x00100000 +#define WINHTTP_CALLBACK_STATUS_REQUEST_ERROR 0x00200000 +#define WINHTTP_CALLBACK_STATUS_SENDREQUEST_COMPLETE 0x00400000 +#define WINHTTP_CALLBACK_FLAG_RESOLVE_NAME (WINHTTP_CALLBACK_STATUS_RESOLVING_NAME | WINHTTP_CALLBACK_STATUS_NAME_RESOLVED) +#define WINHTTP_CALLBACK_FLAG_CONNECT_TO_SERVER (WINHTTP_CALLBACK_STATUS_CONNECTING_TO_SERVER | WINHTTP_CALLBACK_STATUS_CONNECTED_TO_SERVER) +#define WINHTTP_CALLBACK_FLAG_SEND_REQUEST (WINHTTP_CALLBACK_STATUS_SENDING_REQUEST | WINHTTP_CALLBACK_STATUS_REQUEST_SENT) +#define WINHTTP_CALLBACK_FLAG_RECEIVE_RESPONSE (WINHTTP_CALLBACK_STATUS_RECEIVING_RESPONSE | WINHTTP_CALLBACK_STATUS_RESPONSE_RECEIVED) +#define WINHTTP_CALLBACK_FLAG_CLOSE_CONNECTION (WINHTTP_CALLBACK_STATUS_CLOSING_CONNECTION | WINHTTP_CALLBACK_STATUS_CONNECTION_CLOSED) +#define WINHTTP_CALLBACK_FLAG_HANDLES (WINHTTP_CALLBACK_STATUS_HANDLE_CREATED | WINHTTP_CALLBACK_STATUS_HANDLE_CLOSING) +#define WINHTTP_CALLBACK_FLAG_DETECTING_PROXY WINHTTP_CALLBACK_STATUS_DETECTING_PROXY +#define WINHTTP_CALLBACK_FLAG_REDIRECT WINHTTP_CALLBACK_STATUS_REDIRECT +#define WINHTTP_CALLBACK_FLAG_INTERMEDIATE_RESPONSE WINHTTP_CALLBACK_STATUS_INTERMEDIATE_RESPONSE +#define WINHTTP_CALLBACK_FLAG_SECURE_FAILURE WINHTTP_CALLBACK_STATUS_SECURE_FAILURE +#define WINHTTP_CALLBACK_FLAG_SENDREQUEST_COMPLETE WINHTTP_CALLBACK_STATUS_SENDREQUEST_COMPLETE +#define WINHTTP_CALLBACK_FLAG_HEADERS_AVAILABLE WINHTTP_CALLBACK_STATUS_HEADERS_AVAILABLE +#define WINHTTP_CALLBACK_FLAG_DATA_AVAILABLE WINHTTP_CALLBACK_STATUS_DATA_AVAILABLE +#define WINHTTP_CALLBACK_FLAG_READ_COMPLETE WINHTTP_CALLBACK_STATUS_READ_COMPLETE +#define WINHTTP_CALLBACK_FLAG_WRITE_COMPLETE WINHTTP_CALLBACK_STATUS_WRITE_COMPLETE +#define WINHTTP_CALLBACK_FLAG_REQUEST_ERROR WINHTTP_CALLBACK_STATUS_REQUEST_ERROR +#define WINHTTP_CALLBACK_FLAG_ALL_COMPLETIONS (WINHTTP_CALLBACK_STATUS_SENDREQUEST_COMPLETE | WINHTTP_CALLBACK_STATUS_HEADERS_AVAILABLE \ + | WINHTTP_CALLBACK_STATUS_DATA_AVAILABLE | WINHTTP_CALLBACK_STATUS_READ_COMPLETE \ + | WINHTTP_CALLBACK_STATUS_WRITE_COMPLETE | WINHTTP_CALLBACK_STATUS_REQUEST_ERROR) +#define WINHTTP_CALLBACK_FLAG_ALL_NOTIFICATIONS 0xffffffff +#define WINHTTP_INVALID_STATUS_CALLBACK ((WINHTTP_STATUS_CALLBACK)(-1)) + +#define API_RECEIVE_RESPONSE (1) +#define API_QUERY_DATA_AVAILABLE (2) +#define API_READ_DATA (3) +#define API_WRITE_DATA (4) +#define API_SEND_REQUEST (5) + +#define WINHTTP_HANDLE_TYPE_SESSION 1 +#define WINHTTP_HANDLE_TYPE_CONNECT 2 +#define WINHTTP_HANDLE_TYPE_REQUEST 3 + +#define WINHTTP_CALLBACK_STATUS_FLAG_CERT_REV_FAILED 0x00000001 +#define WINHTTP_CALLBACK_STATUS_FLAG_INVALID_CERT 0x00000002 +#define WINHTTP_CALLBACK_STATUS_FLAG_CERT_REVOKED 0x00000004 +#define WINHTTP_CALLBACK_STATUS_FLAG_INVALID_CA 0x00000008 +#define WINHTTP_CALLBACK_STATUS_FLAG_CERT_CN_INVALID 0x00000010 +#define WINHTTP_CALLBACK_STATUS_FLAG_CERT_DATE_INVALID 0x00000020 +#define WINHTTP_CALLBACK_STATUS_FLAG_CERT_WRONG_USAGE 0x00000040 +#define WINHTTP_CALLBACK_STATUS_FLAG_SECURITY_CHANNEL_ERROR 0x80000000 + +#define WINHTTP_FLAG_SECURE_PROTOCOL_SSL2 0x00000008 +#define WINHTTP_FLAG_SECURE_PROTOCOL_SSL3 0x00000020 +#define WINHTTP_FLAG_SECURE_PROTOCOL_TLS1 0x00000080 +#define WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_1 0x00000200 +#define WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_2 0x00000800 +#define WINHTTP_FLAG_SECURE_PROTOCOL_ALL (WINHTTP_FLAG_SECURE_PROTOCOL_SSL2 | WINHTTP_FLAG_SECURE_PROTOCOL_SSL3 | WINHTTP_FLAG_SECURE_PROTOCOL_TLS1) + +#define WINHTTP_AUTH_SCHEME_BASIC 0x00000001 +#define WINHTTP_AUTH_SCHEME_NTLM 0x00000002 +#define WINHTTP_AUTH_SCHEME_PASSPORT 0x00000004 +#define WINHTTP_AUTH_SCHEME_DIGEST 0x00000008 +#define WINHTTP_AUTH_SCHEME_NEGOTIATE 0x00000010 + +#define WINHTTP_AUTH_TARGET_SERVER 0x00000000 +#define WINHTTP_AUTH_TARGET_PROXY 0x00000001 + +#define WINHTTP_TIME_FORMAT_BUFSIZE 62 + +typedef struct +{ + DWORD dwStructSize; + LPWSTR lpszScheme; + DWORD dwSchemeLength; + INTERNET_SCHEME nScheme; + LPWSTR lpszHostName; + DWORD dwHostNameLength; + INTERNET_PORT nPort; + LPWSTR lpszUserName; + DWORD dwUserNameLength; + LPWSTR lpszPassword; + DWORD dwPasswordLength; + LPWSTR lpszUrlPath; + DWORD dwUrlPathLength; + LPWSTR lpszExtraInfo; + DWORD dwExtraInfoLength; +} URL_COMPONENTS, *LPURL_COMPONENTS; +typedef URL_COMPONENTS URL_COMPONENTSW; +typedef LPURL_COMPONENTS LPURL_COMPONENTSW; + +typedef struct +{ + DWORD_PTR dwResult; + DWORD dwError; +} WINHTTP_ASYNC_RESULT, *LPWINHTTP_ASYNC_RESULT; + +typedef struct +{ + FILETIME ftExpiry; + FILETIME ftStart; + LPWSTR lpszSubjectInfo; + LPWSTR lpszIssuerInfo; + LPWSTR lpszProtocolName; + LPWSTR lpszSignatureAlgName; + LPWSTR lpszEncryptionAlgName; + DWORD dwKeySize; +} WINHTTP_CERTIFICATE_INFO; + +typedef struct +{ + DWORD dwAccessType; + LPWSTR lpszProxy; + LPWSTR lpszProxyBypass; +} WINHTTP_PROXY_INFO, *LPWINHTTP_PROXY_INFO; +typedef WINHTTP_PROXY_INFO WINHTTP_PROXY_INFOW; +typedef LPWINHTTP_PROXY_INFO LPWINHTTP_PROXY_INFOW; + +typedef struct +{ + BOOL fAutoDetect; + LPWSTR lpszAutoConfigUrl; + LPWSTR lpszProxy; + LPWSTR lpszProxyBypass; +} WINHTTP_CURRENT_USER_IE_PROXY_CONFIG; + +typedef VOID (CALLBACK *WINHTTP_STATUS_CALLBACK)(HINTERNET,DWORD_PTR,DWORD,LPVOID,DWORD); + +#define WINHTTP_AUTO_DETECT_TYPE_DHCP 0x00000001 +#define WINHTTP_AUTO_DETECT_TYPE_DNS_A 0x00000002 + +#define WINHTTP_AUTOPROXY_AUTO_DETECT 0x00000001 +#define WINHTTP_AUTOPROXY_CONFIG_URL 0x00000002 +#define WINHTTP_AUTOPROXY_RUN_INPROCESS 0x00010000 +#define WINHTTP_AUTOPROXY_RUN_OUTPROCESS_ONLY 0x00020000 + +typedef struct +{ + DWORD dwFlags; + DWORD dwAutoDetectFlags; + LPCWSTR lpszAutoConfigUrl; + LPVOID lpvReserved; + DWORD dwReserved; + BOOL fAutoLogonIfChallenged; +} WINHTTP_AUTOPROXY_OPTIONS; + +typedef struct +{ + DWORD dwMajorVersion; + DWORD dwMinorVersion; +} HTTP_VERSION_INFO, *LPHTTP_VERSION_INFO; + +#ifdef _WS2DEF_ +typedef struct +{ + DWORD cbSize; + SOCKADDR_STORAGE LocalAddress; + SOCKADDR_STORAGE RemoteAddress; +} WINHTTP_CONNECTION_INFO; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +BOOL WINAPI WinHttpAddRequestHeaders(HINTERNET,LPCWSTR,DWORD,DWORD); +BOOL WINAPI WinHttpDetectAutoProxyConfigUrl(DWORD,LPWSTR*); +BOOL WINAPI WinHttpCheckPlatform(void); +BOOL WINAPI WinHttpCloseHandle(HINTERNET); +HINTERNET WINAPI WinHttpConnect(HINTERNET,LPCWSTR,INTERNET_PORT,DWORD); +BOOL WINAPI WinHttpCrackUrl(LPCWSTR,DWORD,DWORD,LPURL_COMPONENTS); +BOOL WINAPI WinHttpCreateUrl(LPURL_COMPONENTS,DWORD,LPWSTR,LPDWORD); +BOOL WINAPI WinHttpGetDefaultProxyConfiguration(WINHTTP_PROXY_INFO*); +BOOL WINAPI WinHttpGetIEProxyConfigForCurrentUser(WINHTTP_CURRENT_USER_IE_PROXY_CONFIG*); +BOOL WINAPI WinHttpGetProxyForUrl(HINTERNET,LPCWSTR,WINHTTP_AUTOPROXY_OPTIONS*,WINHTTP_PROXY_INFO*); +HINTERNET WINAPI WinHttpOpen(LPCWSTR,DWORD,LPCWSTR,LPCWSTR,DWORD); +HINTERNET WINAPI WinHttpOpenRequest(HINTERNET,LPCWSTR,LPCWSTR,LPCWSTR,LPCWSTR,LPCWSTR*,DWORD); +BOOL WINAPI WinHttpQueryAuthParams(HINTERNET,DWORD,LPVOID*); +BOOL WINAPI WinHttpQueryAuthSchemes(HINTERNET,LPDWORD,LPDWORD,LPDWORD); +BOOL WINAPI WinHttpQueryDataAvailable(HINTERNET,LPDWORD); +BOOL WINAPI WinHttpQueryHeaders(HINTERNET,DWORD,LPCWSTR,LPVOID,LPDWORD,LPDWORD); +BOOL WINAPI WinHttpQueryOption(HINTERNET,DWORD,LPVOID,LPDWORD); +BOOL WINAPI WinHttpReadData(HINTERNET,LPVOID,DWORD,LPDWORD); +BOOL WINAPI WinHttpReceiveResponse(HINTERNET,LPVOID); +BOOL WINAPI WinHttpSendRequest(HINTERNET,LPCWSTR,DWORD,LPVOID,DWORD,DWORD,DWORD_PTR); +BOOL WINAPI WinHttpSetDefaultProxyConfiguration(WINHTTP_PROXY_INFO*); +BOOL WINAPI WinHttpSetCredentials(HINTERNET,DWORD,DWORD,LPCWSTR,LPCWSTR,LPVOID); +BOOL WINAPI WinHttpSetOption(HINTERNET,DWORD,LPVOID,DWORD); +WINHTTP_STATUS_CALLBACK WINAPI WinHttpSetStatusCallback(HINTERNET,WINHTTP_STATUS_CALLBACK,DWORD,DWORD_PTR); +BOOL WINAPI WinHttpSetTimeouts(HINTERNET,int,int,int,int); +BOOL WINAPI WinHttpTimeFromSystemTime(const SYSTEMTIME *,LPWSTR); +BOOL WINAPI WinHttpTimeToSystemTime(LPCWSTR,SYSTEMTIME*); +BOOL WINAPI WinHttpWriteData(HINTERNET,LPCVOID,DWORD,LPDWORD); + +#ifdef __cplusplus +} +#endif + +#include + +#endif /* __WINE_WINHTTP_H */ + +#else + +#include_next + +#endif diff --git a/deps/winhttp/winhttp64.def b/deps/winhttp/winhttp64.def new file mode 100644 index 0000000..bfad3a0 --- /dev/null +++ b/deps/winhttp/winhttp64.def @@ -0,0 +1,29 @@ +LIBRARY WINHTTP +EXPORTS +WinHttpAddRequestHeaders +WinHttpCheckPlatform +WinHttpCloseHandle +WinHttpConnect +WinHttpCrackUrl +WinHttpCreateUrl +WinHttpDetectAutoProxyConfigUrl +WinHttpGetDefaultProxyConfiguration +WinHttpGetIEProxyConfigForCurrentUser +WinHttpGetProxyForUrl +WinHttpOpen +WinHttpOpenRequest +WinHttpQueryAuthSchemes +WinHttpQueryDataAvailable +WinHttpQueryHeaders +WinHttpQueryOption +WinHttpReadData +WinHttpReceiveResponse +WinHttpSendRequest +WinHttpSetCredentials +WinHttpSetDefaultProxyConfiguration +WinHttpSetOption +WinHttpSetStatusCallback +WinHttpSetTimeouts +WinHttpTimeFromSystemTime +WinHttpTimeToSystemTime +WinHttpWriteData diff --git a/deps/zlib/adler32.c b/deps/zlib/adler32.c new file mode 100644 index 0000000..d0be438 --- /dev/null +++ b/deps/zlib/adler32.c @@ -0,0 +1,186 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(adler, buf, len) + uLong adler; + const Bytef *buf; + z_size_t len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff --git a/deps/zlib/crc32.c b/deps/zlib/crc32.c new file mode 100644 index 0000000..9580440 --- /dev/null +++ b/deps/zlib/crc32.c @@ -0,0 +1,442 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +/* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif +#ifdef BYFOUR + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, z_size_t)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, z_size_t)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local z_crc_t FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const z_crc_t FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + z_crc_t c; + int n, k; + z_crc_t poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (z_crc_t)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = ZSWAP32(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = ZSWAP32(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const z_crc_t FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const z_crc_t FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const z_crc_t FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + z_crc_t endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + return crc32_z(crc, buf, len); +} + +#ifdef BYFOUR + +/* + This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit + integer pointer type. This violates the strict aliasing rule, where a + compiler can assume, for optimization purposes, that two pointers to + fundamentally different types won't ever point to the same memory. This can + manifest as a problem only if one of the pointers is written to. This code + only reads from those pointers. So long as this code remains isolated in + this compilation unit, there won't be a problem. For this reason, this code + should not be copied and pasted into a compilation unit in which other code + writes to the buffer that is passed to these routines. + */ + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *buf4++; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(ZSWAP32(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +local uLong crc32_combine_(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff --git a/deps/zlib/crc32.h b/deps/zlib/crc32.h new file mode 100644 index 0000000..9e0c778 --- /dev/null +++ b/deps/zlib/crc32.h @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/deps/zlib/deflate.c b/deps/zlib/deflate.c new file mode 100644 index 0000000..1ec7614 --- /dev/null +++ b/deps/zlib/deflate.c @@ -0,0 +1,2163 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local int deflateStateCheck OF((z_streamp strm)); +local void slide_hash OF((deflate_state *s)); +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV +# pragma message("Assembler code may have bugs -- use at your own risk") + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef ZLIB_DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +local void slide_hash(s) + deflate_state *s; +{ + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = (uInt)windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = (uInt)memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck (strm) + z_streamp strm; +{ + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (deflateStateCheck(strm) || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) + z_streamp strm; + Bytef *dictionary; + uInt *dictLength; +{ + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep (strm) + z_streamp strm; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->high_water) { + /* Flush the last buffer: */ + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_out == 0) + return Z_BUF_ERROR; + } + if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; + s->nice_match = nice_length; + s->max_chain_length = (uInt)max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong complen, wraplen; + + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (deflateStateCheck(strm)) + return complen + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + old_flush = s->last_flush; + s->last_flush = flush; + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Write the header */ + if (s->status == INIT_STATE) { + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + left -= copy; + } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + } + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#endif + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + + status = strm->state->status; + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = (int)s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* ZLIB_DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunites to have a single copy from next_in to next_out. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. + */ + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); + + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; + + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif + + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; + } + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; + } + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. + */ + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + } + s->block_start = s->strstart; + s->insert += MIN(used, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart - 1; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} diff --git a/deps/zlib/deflate.h b/deps/zlib/deflate.h new file mode 100644 index 0000000..23ecdd3 --- /dev/null +++ b/deps/zlib/deflate.h @@ -0,0 +1,349 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef ZLIB_DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff --git a/deps/zlib/gzguts.h b/deps/zlib/gzguts.h new file mode 100644 index 0000000..990a4d2 --- /dev/null +++ b/deps/zlib/gzguts.h @@ -0,0 +1,218 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff --git a/deps/zlib/infback.c b/deps/zlib/infback.c new file mode 100644 index 0000000..59679ec --- /dev/null +++ b/deps/zlib/infback.c @@ -0,0 +1,640 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = (uInt)windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/deps/zlib/inffast.c b/deps/zlib/inffast.c new file mode 100644 index 0000000..0dbd1db --- /dev/null +++ b/deps/zlib/inffast.c @@ -0,0 +1,323 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/deps/zlib/inffast.h b/deps/zlib/inffast.h new file mode 100644 index 0000000..e5c1aa4 --- /dev/null +++ b/deps/zlib/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/deps/zlib/inffixed.h b/deps/zlib/inffixed.h new file mode 100644 index 0000000..d628327 --- /dev/null +++ b/deps/zlib/inffixed.h @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/deps/zlib/inflate.c b/deps/zlib/inflate.c new file mode 100644 index 0000000..ac333e8 --- /dev/null +++ b/deps/zlib/inflate.c @@ -0,0 +1,1561 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local int inflateStateCheck OF((z_streamp strm)); +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, + unsigned len)); + +local int inflateStateCheck(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +const unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(strm, check) +z_streamp strm; +int check; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff --git a/deps/zlib/inflate.h b/deps/zlib/inflate.h new file mode 100644 index 0000000..a46cce6 --- /dev/null +++ b/deps/zlib/inflate.h @@ -0,0 +1,125 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff --git a/deps/zlib/inftrees.c b/deps/zlib/inftrees.c new file mode 100644 index 0000000..2ea08fc --- /dev/null +++ b/deps/zlib/inftrees.c @@ -0,0 +1,304 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/deps/zlib/inftrees.h b/deps/zlib/inftrees.h new file mode 100644 index 0000000..baa53a0 --- /dev/null +++ b/deps/zlib/inftrees.h @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/deps/zlib/trees.c b/deps/zlib/trees.c new file mode 100644 index 0000000..50cf4b4 --- /dev/null +++ b/deps/zlib/trees.c @@ -0,0 +1,1203 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2017 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef ZLIB_DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local const static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local const static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local const static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef ZLIB_DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* !ZLIB_DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef ZLIB_DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); + } + if (overflow == 0) return; + + Tracev((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len<<3; +#endif +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and write out the encoded block. + */ +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+last, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef ZLIB_DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(s) + deflate_state *s; +{ + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} diff --git a/deps/zlib/trees.h b/deps/zlib/trees.h new file mode 100644 index 0000000..d35639d --- /dev/null +++ b/deps/zlib/trees.h @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/deps/zlib/zconf.h b/deps/zlib/zconf.h new file mode 100644 index 0000000..5e1d68a --- /dev/null +++ b/deps/zlib/zconf.h @@ -0,0 +1,534 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO + typedef unsigned long z_size_t; +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/deps/zlib/zlib.h b/deps/zlib/zlib.h new file mode 100644 index 0000000..f09cdaf --- /dev/null +++ b/deps/zlib/zlib.h @@ -0,0 +1,1912 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.11" +#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more ouput + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if any input has been consumed in a previous + deflate() call, then the input available so far is compressed with the old + level and strategy using deflate(strm, Z_BLOCK). There are three approaches + for the compression levels 0, 1..3, and 4..9 respectively. The new level + and strategy will take effect at the next call of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will not automatically decode concatenated gzip streams. + inflate() will return Z_STREAM_END at the end of the gzip stream. The state + would need to be reset to continue decoding a subsequent gzip stream. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen)); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Three times that size in buffer space is allocated. A larger buffer + size of, for example, 64K or 128K bytes will noticeably increase the speed + of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. Previously provided + data is flushed before the parameter change. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, + gzFile file)); +/* + Read up to nitems items of size size from file to buf, otherwise operating + as gzread() does. This duplicates the interface of stdio's fread(), with + size_t request and return types. If the library defines size_t, then + z_size_t is identical to size_t. If not, then z_size_t is an unsigned + integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevetheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, reseting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, + z_size_t nitems, gzFile file)); +/* + gzfwrite() writes nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/deps/zlib/zutil.c b/deps/zlib/zutil.c new file mode 100644 index 0000000..a76c6b0 --- /dev/null +++ b/deps/zlib/zutil.c @@ -0,0 +1,325 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2017 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef ZLIB_DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef ZLIB_DEBUG +#include +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf; + ulg bsize = (ulg)items*size; + + (void)opaque; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + int n; + + (void)opaque; + + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) +{ + (void)opaque; + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + (void)opaque; + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + (void)opaque; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + (void)opaque; + free(ptr); +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/deps/zlib/zutil.h b/deps/zlib/zutil.h new file mode 100644 index 0000000..b079ea6 --- /dev/null +++ b/deps/zlib/zutil.h @@ -0,0 +1,271 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 1 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 2 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 5 +#endif + +#ifdef OS2 +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 7 +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +# endif +#endif + +#ifdef __acorn +# define OS_CODE 13 +#endif + +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 +#endif + +#ifdef _BEOS_ +# define OS_CODE 16 +#endif + +#ifdef __TOS_OS400__ +# define OS_CODE 18 +#endif + +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 3 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef ZLIB_DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 0000000..0e49159 --- /dev/null +++ b/examples/.gitignore @@ -0,0 +1,15 @@ +general +showindex +diff +rev-list +blame +cat-file +init +log +rev-parse +remote +status +tag +for-each-ref +describe +*.dSYM diff --git a/examples/network/.gitignore b/examples/network/.gitignore new file mode 100644 index 0000000..1b48e66 --- /dev/null +++ b/examples/network/.gitignore @@ -0,0 +1 @@ +/git2 diff --git a/tests/resources/describe/.gitted/refs/tags/A b/tests/resources/describe/.gitted/refs/tags/A new file mode 100644 index 0000000..aced4fd --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/A @@ -0,0 +1 @@ +aaddd4f14847e0e323924ec262c2343249a84f8b diff --git a/tests/resources/describe/.gitted/refs/tags/B b/tests/resources/describe/.gitted/refs/tags/B new file mode 100644 index 0000000..ab1a5e6 --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/B @@ -0,0 +1 @@ +52912fbab0715dec53d43053966e78ad213ba359 diff --git a/tests/resources/describe/.gitted/refs/tags/D b/tests/resources/describe/.gitted/refs/tags/D new file mode 100644 index 0000000..90f4208 --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/D @@ -0,0 +1 @@ +10bd08b099ecb79184c60183f5c94ca915f427ad diff --git a/tests/resources/describe/.gitted/refs/tags/R b/tests/resources/describe/.gitted/refs/tags/R new file mode 100644 index 0000000..ef04b7c --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/R @@ -0,0 +1 @@ +680166b6cd31f76354fee2572618e6b0142d05e6 diff --git a/tests/resources/describe/.gitted/refs/tags/c b/tests/resources/describe/.gitted/refs/tags/c new file mode 100644 index 0000000..650d82f --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/c @@ -0,0 +1 @@ +6126a5f9c57ebc81e64370ec3095184ad92dab1c diff --git a/tests/resources/describe/.gitted/refs/tags/e b/tests/resources/describe/.gitted/refs/tags/e new file mode 100644 index 0000000..5e88d6f --- /dev/null +++ b/tests/resources/describe/.gitted/refs/tags/e @@ -0,0 +1 @@ +1e016431ec7b22dd3e23f3e6f5f68f358f9227cf diff --git a/tests/resources/empty_bare.git/refs/tags/dummy-marker.txt b/tests/resources/empty_bare.git/refs/tags/dummy-marker.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/resources/empty_bare.git/refs/tags/dummy-marker.txt diff --git a/tests/resources/empty_standard_repo/.gitted/refs/tags/dummy-marker.txt b/tests/resources/empty_standard_repo/.gitted/refs/tags/dummy-marker.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/resources/empty_standard_repo/.gitted/refs/tags/dummy-marker.txt diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-blob b/tests/resources/push_src/.gitted/refs/tags/tag-blob new file mode 100644 index 0000000..abfebf2 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-blob @@ -0,0 +1 @@ +b483ae7ba66decee9aee971f501221dea84b1498 diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-commit b/tests/resources/push_src/.gitted/refs/tags/tag-commit new file mode 100644 index 0000000..c023b84 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-commit @@ -0,0 +1 @@ +805c54522e614f29f70d2413a0470247d8b424ac diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-commit-two b/tests/resources/push_src/.gitted/refs/tags/tag-commit-two new file mode 100644 index 0000000..abb3650 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-commit-two @@ -0,0 +1 @@ +36f79b2846017d3761e0a02d0bccd573e0f90c57 diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-lightweight b/tests/resources/push_src/.gitted/refs/tags/tag-lightweight new file mode 100644 index 0000000..711e466 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-lightweight @@ -0,0 +1 @@ +951bbbb90e2259a4c8950db78946784fb53fcbce diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-tag b/tests/resources/push_src/.gitted/refs/tags/tag-tag new file mode 100644 index 0000000..d6cd748 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-tag @@ -0,0 +1 @@ +eea4f2705eeec2db3813f2430829afce99cd00b5 diff --git a/tests/resources/push_src/.gitted/refs/tags/tag-tree b/tests/resources/push_src/.gitted/refs/tags/tag-tree new file mode 100644 index 0000000..7a530d3 --- /dev/null +++ b/tests/resources/push_src/.gitted/refs/tags/tag-tree @@ -0,0 +1 @@ +ff83aa4c5e5d28e3bcba2f5c6e2adc61286a4e5e diff --git a/tests/resources/testrepo.git/refs/tags/annotated_tag_to_blob b/tests/resources/testrepo.git/refs/tags/annotated_tag_to_blob new file mode 100644 index 0000000..6c146d6 --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/annotated_tag_to_blob @@ -0,0 +1 @@ +521d87c1ec3aef9824daf6d96cc0ae3710766d91 diff --git a/tests/resources/testrepo.git/refs/tags/e90810b b/tests/resources/testrepo.git/refs/tags/e90810b new file mode 100644 index 0000000..584495d --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/e90810b @@ -0,0 +1 @@ +7b4384978d2493e851f9cca7858815fac9b10980 diff --git a/tests/resources/testrepo.git/refs/tags/hard_tag b/tests/resources/testrepo.git/refs/tags/hard_tag new file mode 100644 index 0000000..59ce656 --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/hard_tag @@ -0,0 +1 @@ +849a5e34a26815e821f865b8479f5815a47af0fe diff --git a/tests/resources/testrepo.git/refs/tags/point_to_blob b/tests/resources/testrepo.git/refs/tags/point_to_blob new file mode 100644 index 0000000..f874a3f --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/point_to_blob @@ -0,0 +1 @@ +1385f264afb75a56a5bec74243be9b367ba4ca08 diff --git a/tests/resources/testrepo.git/refs/tags/taggerless b/tests/resources/testrepo.git/refs/tags/taggerless new file mode 100644 index 0000000..f960c7d --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/taggerless @@ -0,0 +1 @@ +4a23e2e65ad4e31c4c9db7dc746650bfad082679 diff --git a/tests/resources/testrepo.git/refs/tags/test b/tests/resources/testrepo.git/refs/tags/test new file mode 100644 index 0000000..6ee952a --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/test @@ -0,0 +1 @@ +b25fa35b38051e4ae45d4222e795f9df2e43f1d1 diff --git a/tests/resources/testrepo.git/refs/tags/wrapped_tag b/tests/resources/testrepo.git/refs/tags/wrapped_tag new file mode 100644 index 0000000..59ce656 --- /dev/null +++ b/tests/resources/testrepo.git/refs/tags/wrapped_tag @@ -0,0 +1 @@ +849a5e34a26815e821f865b8479f5815a47af0fe diff --git a/tests/resources/testrepo/.gitted/refs/tags/e90810b b/tests/resources/testrepo/.gitted/refs/tags/e90810b new file mode 100644 index 0000000..584495d --- /dev/null +++ b/tests/resources/testrepo/.gitted/refs/tags/e90810b @@ -0,0 +1 @@ +7b4384978d2493e851f9cca7858815fac9b10980 diff --git a/tests/resources/testrepo/.gitted/refs/tags/foo/bar b/tests/resources/testrepo/.gitted/refs/tags/foo/bar new file mode 100644 index 0000000..6ee952a --- /dev/null +++ b/tests/resources/testrepo/.gitted/refs/tags/foo/bar @@ -0,0 +1 @@ +b25fa35b38051e4ae45d4222e795f9df2e43f1d1 diff --git a/tests/resources/testrepo/.gitted/refs/tags/foo/foo/bar b/tests/resources/testrepo/.gitted/refs/tags/foo/foo/bar new file mode 100644 index 0000000..6ee952a --- /dev/null +++ b/tests/resources/testrepo/.gitted/refs/tags/foo/foo/bar @@ -0,0 +1 @@ +b25fa35b38051e4ae45d4222e795f9df2e43f1d1 diff --git a/tests/resources/testrepo/.gitted/refs/tags/point_to_blob b/tests/resources/testrepo/.gitted/refs/tags/point_to_blob new file mode 100644 index 0000000..f874a3f --- /dev/null +++ b/tests/resources/testrepo/.gitted/refs/tags/point_to_blob @@ -0,0 +1 @@ +1385f264afb75a56a5bec74243be9b367ba4ca08 diff --git a/tests/resources/testrepo/.gitted/refs/tags/test b/tests/resources/testrepo/.gitted/refs/tags/test new file mode 100644 index 0000000..6ee952a --- /dev/null +++ b/tests/resources/testrepo/.gitted/refs/tags/test @@ -0,0 +1 @@ +b25fa35b38051e4ae45d4222e795f9df2e43f1d1