|
Packit |
54873f |
/*
|
|
Packit |
54873f |
* Copyright (c) 2007-2012, Novell Inc.
|
|
Packit |
54873f |
*
|
|
Packit |
54873f |
* This program is licensed under the BSD license, read LICENSE.BSD
|
|
Packit |
54873f |
* for further information
|
|
Packit |
54873f |
*/
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/*
|
|
Packit |
54873f |
* repopage.c
|
|
Packit |
54873f |
*
|
|
Packit |
54873f |
* Paging and compression functions for the vertical repository data.
|
|
Packit |
54873f |
* Vertical data is grouped by key, normal data is grouped by solvable.
|
|
Packit |
54873f |
* This makes searching for a string in vertical data fast as there's
|
|
Packit |
54873f |
* no need to skip over data if keys we're not interested in.
|
|
Packit |
54873f |
*
|
|
Packit |
54873f |
* The vertical data is split into pages, each page is compressed with a fast
|
|
Packit |
54873f |
* compression algorithm. These pages are read in on demand, not recently used
|
|
Packit |
54873f |
* pages automatically get dropped.
|
|
Packit |
54873f |
*/
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#define _XOPEN_SOURCE 500
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#include <sys/types.h>
|
|
Packit |
54873f |
#include <stdint.h>
|
|
Packit |
54873f |
#include <stdio.h>
|
|
Packit |
54873f |
#include <stdlib.h>
|
|
Packit |
54873f |
#include <string.h>
|
|
Packit |
54873f |
#include <unistd.h>
|
|
Packit |
54873f |
#include <assert.h>
|
|
Packit |
54873f |
#include <fcntl.h>
|
|
Packit |
54873f |
#include <time.h>
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#ifdef _WIN32
|
|
Packit |
54873f |
#include <windows.h>
|
|
Packit |
54873f |
#include <fileapi.h>
|
|
Packit |
54873f |
#include <io.h>
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#include "repo.h"
|
|
Packit |
54873f |
#include "repopage.h"
|
|
Packit |
54873f |
|
|
Packit |
54873f |
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#define BLOCK_SIZE (65536*1)
|
|
Packit |
54873f |
#if BLOCK_SIZE <= 65536
|
|
Packit |
54873f |
typedef uint16_t Ref;
|
|
Packit |
54873f |
#else
|
|
Packit |
54873f |
typedef uint32_t Ref;
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/*
|
|
Packit |
54873f |
The format is tailored for fast decompression (i.e. only byte based),
|
|
Packit |
54873f |
and skewed to ASCII content (highest bit often not set):
|
|
Packit |
54873f |
|
|
Packit |
54873f |
a 0LLLLLLL
|
|
Packit |
54873f |
- self-describing ASCII character hex L
|
|
Packit |
54873f |
b 100lllll <l+1 bytes>
|
|
Packit |
54873f |
- literal run of length l+1
|
|
Packit |
54873f |
c 101oolll <8o>
|
|
Packit |
54873f |
- back ref of length l+2, at offset -(o+1) (o < 1 << 10)
|
|
Packit |
54873f |
d 110lllll <8o>
|
|
Packit |
54873f |
- back ref of length l+2+8, at offset -(o+1) (o < 1 << 8)
|
|
Packit |
54873f |
e 1110llll <8o> <8o>
|
|
Packit |
54873f |
- back ref of length l+3, at offset -(o+1) (o < 1 << 16)
|
|
Packit |
54873f |
f1 1111llll <8l> <8o> <8o>
|
|
Packit |
54873f |
- back ref, length l+19 (l < 1<<12), offset -(o+1) (o < 1<<16)
|
|
Packit |
54873f |
f2 11110lll <8l> <8o> <8o>
|
|
Packit |
54873f |
- back ref, length l+19 (l < 1<<11), offset -(o+1) (o < 1<<16)
|
|
Packit |
54873f |
g 11111lll <8l> <8o> <8o> <8o>
|
|
Packit |
54873f |
- back ref, length l+5 (l < 1<<11), offset -(o+1) (o < 1<<24)
|
|
Packit |
54873f |
|
|
Packit |
54873f |
Generally for a literal of length L we need L+1 bytes, hence it is
|
|
Packit |
54873f |
better to encode also very short backrefs (2 chars) as backrefs if
|
|
Packit |
54873f |
their offset is small, as that only needs two bytes. Except if we
|
|
Packit |
54873f |
already have a literal run, in that case it's better to append there,
|
|
Packit |
54873f |
instead of breaking it for a backref. So given a potential backref
|
|
Packit |
54873f |
at offset O, length L the strategy is as follows:
|
|
Packit |
54873f |
|
|
Packit |
54873f |
L < 2 : encode as 1-literal
|
|
Packit |
54873f |
L == 2, O > 1024 : encode as 1-literal
|
|
Packit |
54873f |
L == 2, have already literals: encode as 1-literal
|
|
Packit |
54873f |
O = O - 1
|
|
Packit |
54873f |
L >= 2, L <= 9, O < 1024 : encode as c
|
|
Packit |
54873f |
L >= 10, L <= 41, O < 256 : encode as d
|
|
Packit |
54873f |
else we have either O >= 1024, or L >= 42:
|
|
Packit |
54873f |
L < 3 : encode as 1-literal
|
|
Packit |
54873f |
L >= 3, L <= 18, O < 65536 : encode as e
|
|
Packit |
54873f |
L >= 19, L <= 4095+18, O < 65536 : encode as f
|
|
Packit |
54873f |
else we have either L >= 4096+18 or O >= 65536.
|
|
Packit |
54873f |
O >= 65536: encode as 1-literal, too bad
|
|
Packit |
54873f |
(with the current block size this can't happen)
|
|
Packit |
54873f |
L >= 4096+18, so reduce to 4095+18 : encode as f
|
|
Packit |
54873f |
*/
|
|
Packit |
54873f |
|
|
Packit |
54873f |
|
|
Packit |
54873f |
static unsigned int
|
|
Packit |
54873f |
compress_buf(const unsigned char *in, unsigned int in_len,
|
|
Packit |
54873f |
unsigned char *out, unsigned int out_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int oo = 0; /* out-offset */
|
|
Packit |
54873f |
unsigned int io = 0; /* in-offset */
|
|
Packit |
54873f |
#define HS (65536)
|
|
Packit |
54873f |
Ref htab[HS];
|
|
Packit |
54873f |
Ref hnext[BLOCK_SIZE];
|
|
Packit |
54873f |
unsigned int litofs = 0;
|
|
Packit |
54873f |
memset(htab, -1, sizeof (htab));
|
|
Packit |
54873f |
memset(hnext, -1, sizeof (hnext));
|
|
Packit |
54873f |
while (io + 2 < in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Search for a match of the string starting at IN, we have at
|
|
Packit |
54873f |
least three characters. */
|
|
Packit |
54873f |
unsigned int hval = in[io] | in[io + 1] << 8 | in[io + 2] << 16;
|
|
Packit |
54873f |
unsigned int try, mlen, mofs, tries;
|
|
Packit |
54873f |
hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
|
|
Packit |
54873f |
hval = hval & (HS - 1);
|
|
Packit |
54873f |
try = htab[hval];
|
|
Packit |
54873f |
hnext[io] = htab[hval];
|
|
Packit |
54873f |
htab[hval] = io;
|
|
Packit |
54873f |
mlen = 0;
|
|
Packit |
54873f |
mofs = 0;
|
|
Packit |
54873f |
|
|
Packit |
54873f |
for (tries = 0; try != -1 && tries < 12; tries++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (try < io
|
|
Packit |
54873f |
&& in[try] == in[io] && in[try + 1] == in[io + 1])
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
mlen = 2;
|
|
Packit |
54873f |
mofs = (io - try) - 1;
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
try = hnext[try];
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
for (; try != -1 && tries < 12; tries++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* assert(mlen >= 2); */
|
|
Packit |
54873f |
/* assert(io + mlen < in_len); */
|
|
Packit |
54873f |
/* Try a match starting from [io] with the strings at [try].
|
|
Packit |
54873f |
That's only sensible if TRY actually is before IO (can happen
|
|
Packit |
54873f |
with uninit hash table). If we have a previous match already
|
|
Packit |
54873f |
we're only going to take the new one if it's longer, hence
|
|
Packit |
54873f |
check the potentially last character. */
|
|
Packit |
54873f |
if (try < io && in[try + mlen] == in[io + mlen])
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int this_len, this_ofs;
|
|
Packit |
54873f |
if (memcmp(in + try, in + io, mlen))
|
|
Packit |
54873f |
goto no_match;
|
|
Packit |
54873f |
this_len = mlen + 1;
|
|
Packit |
54873f |
/* Now try extending the match by more characters. */
|
|
Packit |
54873f |
for (;
|
|
Packit |
54873f |
io + this_len < in_len
|
|
Packit |
54873f |
&& in[try + this_len] == in[io + this_len]; this_len++)
|
|
Packit |
54873f |
;
|
|
Packit |
54873f |
#if 0
|
|
Packit |
54873f |
unsigned int testi;
|
|
Packit |
54873f |
for (testi = 0; testi < this_len; testi++)
|
|
Packit |
54873f |
assert(in[try + testi] == in[io + testi]);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
this_ofs = (io - try) - 1;
|
|
Packit |
54873f |
/*if (this_ofs > 65535)
|
|
Packit |
54873f |
goto no_match; */
|
|
Packit |
54873f |
#if 0
|
|
Packit |
54873f |
assert(this_len >= 2);
|
|
Packit |
54873f |
assert(this_len >= mlen);
|
|
Packit |
54873f |
assert(this_len > mlen || (this_len == mlen && this_ofs > mofs));
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
mlen = this_len, mofs = this_ofs;
|
|
Packit |
54873f |
/* If our match extends up to the end of input, no next
|
|
Packit |
54873f |
match can become better. This is not just an
|
|
Packit |
54873f |
optimization, it establishes a loop invariant
|
|
Packit |
54873f |
(io + mlen < in_len). */
|
|
Packit |
54873f |
if (io + mlen >= in_len)
|
|
Packit |
54873f |
goto match_done;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
no_match:
|
|
Packit |
54873f |
try = hnext[try];
|
|
Packit |
54873f |
/*if (io - try - 1 >= 65536)
|
|
Packit |
54873f |
break;*/
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
match_done:
|
|
Packit |
54873f |
if (mlen)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/*fprintf(stderr, "%d %d\n", mlen, mofs);*/
|
|
Packit |
54873f |
if (mlen == 2 && (litofs || mofs >= 1024))
|
|
Packit |
54873f |
mlen = 0;
|
|
Packit |
54873f |
/*else if (mofs >= 65536)
|
|
Packit |
54873f |
mlen = 0;*/
|
|
Packit |
54873f |
else if (mofs >= 65536)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (mlen >= 2048 + 5)
|
|
Packit |
54873f |
mlen = 2047 + 5;
|
|
Packit |
54873f |
else if (mlen < 5)
|
|
Packit |
54873f |
mlen = 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else if (mlen < 3)
|
|
Packit |
54873f |
mlen = 0;
|
|
Packit |
54873f |
/*else if (mlen >= 4096 + 19)
|
|
Packit |
54873f |
mlen = 4095 + 19;*/
|
|
Packit |
54873f |
else if (mlen >= 2048 + 19)
|
|
Packit |
54873f |
mlen = 2047 + 19;
|
|
Packit |
54873f |
/* Skip this match if the next character would deliver a better one,
|
|
Packit |
54873f |
but only do this if we have the chance to really extend the
|
|
Packit |
54873f |
length (i.e. our current length isn't yet the (conservative)
|
|
Packit |
54873f |
maximum). */
|
|
Packit |
54873f |
if (mlen && mlen < (2048 + 5) && io + 3 < in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int hval =
|
|
Packit |
54873f |
in[io + 1] | in[io + 2] << 8 | in[io + 3] << 16;
|
|
Packit |
54873f |
unsigned int try;
|
|
Packit |
54873f |
hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
|
|
Packit |
54873f |
hval = hval & (HS - 1);
|
|
Packit |
54873f |
try = htab[hval];
|
|
Packit |
54873f |
if (try < io + 1
|
|
Packit |
54873f |
&& in[try] == in[io + 1] && in[try + 1] == in[io + 2])
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int this_len;
|
|
Packit |
54873f |
this_len = 2;
|
|
Packit |
54873f |
for (;
|
|
Packit |
54873f |
io + 1 + this_len < in_len
|
|
Packit |
54873f |
&& in[try + this_len] == in[io + 1 + this_len];
|
|
Packit |
54873f |
this_len++)
|
|
Packit |
54873f |
;
|
|
Packit |
54873f |
if (this_len >= mlen)
|
|
Packit |
54873f |
mlen = 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (!mlen)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (!litofs)
|
|
Packit |
54873f |
litofs = io + 1;
|
|
Packit |
54873f |
io++;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (litofs)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned litlen;
|
|
Packit |
54873f |
litofs--;
|
|
Packit |
54873f |
litlen = io - litofs;
|
|
Packit |
54873f |
/* fprintf(stderr, "lit: %d\n", litlen); */
|
|
Packit |
54873f |
while (litlen)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int easy_sz;
|
|
Packit |
54873f |
/* Emit everything we can as self-describers. As soon as
|
|
Packit |
54873f |
we hit a byte we can't emit as such we're going to emit
|
|
Packit |
54873f |
a length descriptor anyway, so we can as well include
|
|
Packit |
54873f |
bytes < 0x80 which might follow afterwards in that run. */
|
|
Packit |
54873f |
for (easy_sz = 0;
|
|
Packit |
54873f |
easy_sz < litlen && in[litofs + easy_sz] < 0x80;
|
|
Packit |
54873f |
easy_sz++)
|
|
Packit |
54873f |
;
|
|
Packit |
54873f |
if (easy_sz)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + easy_sz >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
memcpy(out + oo, in + litofs, easy_sz);
|
|
Packit |
54873f |
litofs += easy_sz;
|
|
Packit |
54873f |
oo += easy_sz;
|
|
Packit |
54873f |
litlen -= easy_sz;
|
|
Packit |
54873f |
if (!litlen)
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (litlen <= 32)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + 1 + litlen >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0x80 | (litlen - 1);
|
|
Packit |
54873f |
while (litlen--)
|
|
Packit |
54873f |
out[oo++] = in[litofs++];
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Literal length > 32, so chunk it. */
|
|
Packit |
54873f |
if (oo + 1 + 32 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0x80 | 31;
|
|
Packit |
54873f |
memcpy(out + oo, in + litofs, 32);
|
|
Packit |
54873f |
oo += 32;
|
|
Packit |
54873f |
litofs += 32;
|
|
Packit |
54873f |
litlen -= 32;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
litofs = 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* fprintf(stderr, "ref: %d @ %d\n", mlen, mofs); */
|
|
Packit |
54873f |
|
|
Packit |
54873f |
if (mlen >= 2 && mlen <= 9 && mofs < 1024)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + 2 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0xa0 | ((mofs & 0x300) >> 5) | (mlen - 2);
|
|
Packit |
54873f |
out[oo++] = mofs & 0xff;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else if (mlen >= 10 && mlen <= 41 && mofs < 256)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + 2 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0xc0 | (mlen - 10);
|
|
Packit |
54873f |
out[oo++] = mofs;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else if (mofs >= 65536)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
assert(mlen >= 5 && mlen < 2048 + 5);
|
|
Packit |
54873f |
if (oo + 5 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0xf8 | ((mlen - 5) >> 8);
|
|
Packit |
54873f |
out[oo++] = (mlen - 5) & 0xff;
|
|
Packit |
54873f |
out[oo++] = mofs & 0xff;
|
|
Packit |
54873f |
out[oo++] = (mofs >> 8) & 0xff;
|
|
Packit |
54873f |
out[oo++] = mofs >> 16;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else if (mlen >= 3 && mlen <= 18)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
assert(mofs < 65536);
|
|
Packit |
54873f |
if (oo + 3 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0xe0 | (mlen - 3);
|
|
Packit |
54873f |
out[oo++] = mofs & 0xff;
|
|
Packit |
54873f |
out[oo++] = mofs >> 8;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
assert(mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536);
|
|
Packit |
54873f |
if (oo + 4 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0xf0 | ((mlen - 19) >> 8);
|
|
Packit |
54873f |
out[oo++] = (mlen - 19) & 0xff;
|
|
Packit |
54873f |
out[oo++] = mofs & 0xff;
|
|
Packit |
54873f |
out[oo++] = mofs >> 8;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
/* Insert the hashes for the compressed run [io..io+mlen-1].
|
|
Packit |
54873f |
For [io] we have it already done at the start of the loop.
|
|
Packit |
54873f |
So it's from [io+1..io+mlen-1], and we need three chars per
|
|
Packit |
54873f |
hash, so the accessed characters will be [io+1..io+mlen-1+2],
|
|
Packit |
54873f |
ergo io+mlen+1 < in_len. */
|
|
Packit |
54873f |
mlen--;
|
|
Packit |
54873f |
io++;
|
|
Packit |
54873f |
while (mlen--)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (io + 2 < in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int hval =
|
|
Packit |
54873f |
in[io] | in[io + 1] << 8 | in[io + 2] << 16;
|
|
Packit |
54873f |
hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
|
|
Packit |
54873f |
hval = hval & (HS - 1);
|
|
Packit |
54873f |
hnext[io] = htab[hval];
|
|
Packit |
54873f |
htab[hval] = io;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
io++;
|
|
Packit |
54873f |
};
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
/* We might have some characters left. */
|
|
Packit |
54873f |
if (io < in_len && !litofs)
|
|
Packit |
54873f |
litofs = io + 1;
|
|
Packit |
54873f |
io = in_len;
|
|
Packit |
54873f |
if (litofs)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned litlen;
|
|
Packit |
54873f |
litofs--;
|
|
Packit |
54873f |
litlen = io - litofs;
|
|
Packit |
54873f |
/* fprintf(stderr, "lit: %d\n", litlen); */
|
|
Packit |
54873f |
while (litlen)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int easy_sz;
|
|
Packit |
54873f |
/* Emit everything we can as self-describers. As soon as we hit a
|
|
Packit |
54873f |
byte we can't emit as such we're going to emit a length
|
|
Packit |
54873f |
descriptor anyway, so we can as well include bytes < 0x80 which
|
|
Packit |
54873f |
might follow afterwards in that run. */
|
|
Packit |
54873f |
for (easy_sz = 0; easy_sz < litlen && in[litofs + easy_sz] < 0x80;
|
|
Packit |
54873f |
easy_sz++)
|
|
Packit |
54873f |
;
|
|
Packit |
54873f |
if (easy_sz)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + easy_sz >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
memcpy(out + oo, in + litofs, easy_sz);
|
|
Packit |
54873f |
litofs += easy_sz;
|
|
Packit |
54873f |
oo += easy_sz;
|
|
Packit |
54873f |
litlen -= easy_sz;
|
|
Packit |
54873f |
if (!litlen)
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (litlen <= 32)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oo + 1 + litlen >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0x80 | (litlen - 1);
|
|
Packit |
54873f |
while (litlen--)
|
|
Packit |
54873f |
out[oo++] = in[litofs++];
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Literal length > 32, so chunk it. */
|
|
Packit |
54873f |
if (oo + 1 + 32 >= out_len)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
out[oo++] = 0x80 | 31;
|
|
Packit |
54873f |
memcpy(out + oo, in + litofs, 32);
|
|
Packit |
54873f |
oo += 32;
|
|
Packit |
54873f |
litofs += 32;
|
|
Packit |
54873f |
litlen -= 32;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
return oo;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
static unsigned int
|
|
Packit |
54873f |
unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
|
|
Packit |
54873f |
unsigned char *out,
|
|
Packit |
54873f |
unsigned int out_len __attribute__((unused)))
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned char *orig_out = out;
|
|
Packit |
54873f |
const unsigned char *in_end = in + in_len;
|
|
Packit |
54873f |
while (in < in_end)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int first = *in++;
|
|
Packit |
54873f |
int o;
|
|
Packit |
54873f |
switch (first >> 4)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
default:
|
|
Packit |
54873f |
/* This default case can't happen, but GCCs VRP is not strong
|
|
Packit |
54873f |
enough to see this, so make this explicitely not fall to
|
|
Packit |
54873f |
the end of the switch, so that we don't have to initialize
|
|
Packit |
54873f |
o above. */
|
|
Packit |
54873f |
continue;
|
|
Packit |
54873f |
case 0: case 1:
|
|
Packit |
54873f |
case 2: case 3:
|
|
Packit |
54873f |
case 4: case 5:
|
|
Packit |
54873f |
case 6: case 7:
|
|
Packit |
54873f |
/* a 0LLLLLLL */
|
|
Packit |
54873f |
/* fprintf (stderr, "lit: 1\n"); */
|
|
Packit |
54873f |
*out++ = first;
|
|
Packit |
54873f |
continue;
|
|
Packit |
54873f |
case 8: case 9:
|
|
Packit |
54873f |
/* b 100lllll <l+1 bytes> */
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int l = first & 31;
|
|
Packit |
54873f |
/* fprintf (stderr, "lit: %d\n", l); */
|
|
Packit |
54873f |
do
|
|
Packit |
54873f |
*out++ = *in++;
|
|
Packit |
54873f |
while (l--);
|
|
Packit |
54873f |
continue;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
case 10: case 11:
|
|
Packit |
54873f |
/* c 101oolll <8o> */
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
o = first & (3 << 3);
|
|
Packit |
54873f |
o = (o << 5) | *in++;
|
|
Packit |
54873f |
first = (first & 7) + 2;
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
case 12: case 13:
|
|
Packit |
54873f |
/* d 110lllll <8o> */
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
o = *in++;
|
|
Packit |
54873f |
first = (first & 31) + 10;
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
case 14:
|
|
Packit |
54873f |
/* e 1110llll <8o> <8o> */
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
o = in[0] | (in[1] << 8);
|
|
Packit |
54873f |
in += 2;
|
|
Packit |
54873f |
first = first & 31;
|
|
Packit |
54873f |
first += 3;
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
case 15:
|
|
Packit |
54873f |
/* f1 1111llll <8o> <8o> <8l> */
|
|
Packit |
54873f |
/* f2 11110lll <8o> <8o> <8l> */
|
|
Packit |
54873f |
/* g 11111lll <8o> <8o> <8o> <8l> */
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
first = first & 15;
|
|
Packit |
54873f |
if (first >= 8)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
first = (((first - 8) << 8) | in[0]) + 5;
|
|
Packit |
54873f |
o = in[1] | (in[2] << 8) | (in[3] << 16);
|
|
Packit |
54873f |
in += 4;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
first = ((first << 8) | in[0]) + 19;
|
|
Packit |
54873f |
o = in[1] | (in[2] << 8);
|
|
Packit |
54873f |
in += 3;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
/* fprintf(stderr, "ref: %d @ %d\n", first, o); */
|
|
Packit |
54873f |
o++;
|
|
Packit |
54873f |
o = -o;
|
|
Packit |
54873f |
#if 0
|
|
Packit |
54873f |
/* We know that first will not be zero, and this loop structure is
|
|
Packit |
54873f |
better optimizable. */
|
|
Packit |
54873f |
do
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
*out = *(out - o);
|
|
Packit |
54873f |
out++;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
while (--first);
|
|
Packit |
54873f |
#else
|
|
Packit |
54873f |
switch (first)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
case 18: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 17: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 16: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 15: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 14: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 13: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 12: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 11: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 10: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 9: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 8: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 7: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 6: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 5: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 4: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 3: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 2: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 1: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 0: break;
|
|
Packit |
54873f |
default:
|
|
Packit |
54873f |
/* Duff duff :-) */
|
|
Packit |
54873f |
switch (first & 15)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
do
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
case 0: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 15: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 14: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 13: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 12: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 11: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 10: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 9: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 8: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 7: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 6: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 5: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 4: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 3: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 2: *out = *(out + o); out++;
|
|
Packit |
54873f |
case 1: *out = *(out + o); out++;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
while ((int)(first -= 16) > 0);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
return out - orig_out;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/**********************************************************************/
|
|
Packit |
54873f |
|
|
Packit |
54873f |
void repopagestore_init(Repopagestore *store)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
memset(store, 0, sizeof(*store));
|
|
Packit |
54873f |
store->pagefd = -1;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
void repopagestore_free(Repopagestore *store)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
store->blob_store = solv_free(store->blob_store);
|
|
Packit |
54873f |
store->file_pages = solv_free(store->file_pages);
|
|
Packit |
54873f |
store->mapped_at = solv_free(store->mapped_at);
|
|
Packit |
54873f |
store->mapped = solv_free(store->mapped);
|
|
Packit |
54873f |
if (store->pagefd != -1)
|
|
Packit |
54873f |
close(store->pagefd);
|
|
Packit |
54873f |
store->pagefd = -1;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/**********************************************************************/
|
|
Packit |
54873f |
|
|
Packit |
54873f |
unsigned char *
|
|
Packit |
54873f |
repopagestore_load_page_range(Repopagestore *store, unsigned int pstart, unsigned int pend)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Make sure all pages from PSTART to PEND (inclusive) are loaded,
|
|
Packit |
54873f |
and are consecutive. Return a pointer to the mapping of PSTART. */
|
|
Packit |
54873f |
unsigned char buf[REPOPAGE_BLOBSIZE];
|
|
Packit |
54873f |
unsigned int i, best, pnum;
|
|
Packit |
54873f |
|
|
Packit |
54873f |
if (pstart == pend)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Quick check in case the requested page is already mapped */
|
|
Packit |
54873f |
if (store->mapped_at[pstart] != -1)
|
|
Packit |
54873f |
return store->blob_store + store->mapped_at[pstart];
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* Quick check in case all pages are already mapped and consecutive. */
|
|
Packit |
54873f |
for (pnum = pstart; pnum <= pend; pnum++)
|
|
Packit |
54873f |
if (store->mapped_at[pnum] == -1
|
|
Packit |
54873f |
|| (pnum > pstart
|
|
Packit |
54873f |
&& store->mapped_at[pnum]
|
|
Packit |
54873f |
!= store->mapped_at[pnum-1] + REPOPAGE_BLOBSIZE))
|
|
Packit |
54873f |
break;
|
|
Packit |
54873f |
if (pnum > pend)
|
|
Packit |
54873f |
return store->blob_store + store->mapped_at[pstart];
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
if (store->pagefd == -1 || !store->file_pages)
|
|
Packit |
54873f |
return 0; /* no backing file */
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGE: want %d pages starting at %d\n", pend - pstart + 1, pstart);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* Ensure that we can map the numbers of pages we need at all. */
|
|
Packit |
54873f |
if (pend - pstart + 1 > store->nmapped)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int oldcan = store->nmapped;
|
|
Packit |
54873f |
store->nmapped = pend - pstart + 1;
|
|
Packit |
54873f |
if (store->nmapped < 4)
|
|
Packit |
54873f |
store->nmapped = 4;
|
|
Packit |
54873f |
store->mapped = solv_realloc2(store->mapped, store->nmapped, sizeof(store->mapped[0]));
|
|
Packit |
54873f |
for (i = oldcan; i < store->nmapped; i++)
|
|
Packit |
54873f |
store->mapped[i] = -1;
|
|
Packit |
54873f |
store->blob_store = solv_realloc2(store->blob_store, store->nmapped, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGE: can map %d pages\n", store->nmapped);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
if (store->mapped_at[pstart] != -1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* assume forward search */
|
|
Packit |
54873f |
best = store->mapped_at[pstart] / REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
if (best + (pend - pstart) >= store->nmapped)
|
|
Packit |
54873f |
best = 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else if (store->mapped_at[pend] != -1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* assume backward search */
|
|
Packit |
54873f |
best = store->mapped_at[pend] / REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
if (best < pend - pstart)
|
|
Packit |
54873f |
best = store->nmapped - 1;
|
|
Packit |
54873f |
best -= pend - pstart;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* choose some "random" location to avoid thrashing */
|
|
Packit |
54873f |
best = (pstart + store->rr_counter++) % (store->nmapped - pend + pstart);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* So we want to map our pages from [best] to [best+pend-pstart].
|
|
Packit |
54873f |
Use a very simple strategy, which doesn't make the best use of
|
|
Packit |
54873f |
our resources, but works. Throw away all pages in that range
|
|
Packit |
54873f |
(even ours) then copy around ours or read them in. */
|
|
Packit |
54873f |
for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int pnum_mapped_at;
|
|
Packit |
54873f |
unsigned int oldpnum = store->mapped[i];
|
|
Packit |
54873f |
if (oldpnum != -1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (oldpnum == pnum)
|
|
Packit |
54873f |
continue; /* already have the correct page */
|
|
Packit |
54873f |
/* Evict this page. */
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGE: evict page %d from %d\n", oldpnum, i);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
store->mapped[i] = -1;
|
|
Packit |
54873f |
store->mapped_at[oldpnum] = -1;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
/* check if we can copy the correct content (before it gets evicted) */
|
|
Packit |
54873f |
pnum_mapped_at = store->mapped_at[pnum];
|
|
Packit |
54873f |
if (pnum_mapped_at != -1 && pnum_mapped_at != i * REPOPAGE_BLOBSIZE)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE, i);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
|
|
Packit |
54873f |
store->mapped[i] = pnum;
|
|
Packit |
54873f |
store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* Everything is free now. Read in or copy the pages we want. */
|
|
Packit |
54873f |
for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
if (store->mapped_at[pnum] != -1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int pnum_mapped_at = store->mapped_at[pnum];
|
|
Packit |
54873f |
if (pnum_mapped_at != i * REPOPAGE_BLOBSIZE)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE, i);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
/* Still mapped somewhere else, so just copy it from there. */
|
|
Packit |
54873f |
memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
Attrblobpage *p = store->file_pages + pnum;
|
|
Packit |
54873f |
unsigned int in_len = p->page_size;
|
|
Packit |
54873f |
unsigned int compressed = in_len & 1;
|
|
Packit |
54873f |
in_len >>= 1;
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "PAGEIN: %d to %d", pnum, i);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
#ifndef _WIN32
|
|
Packit |
54873f |
if (pread(store->pagefd, compressed ? buf : dest, in_len, store->file_offset + p->page_offset) != in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("mapping pread");
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
#else
|
|
Packit |
54873f |
DWORD read_len;
|
|
Packit |
54873f |
OVERLAPPED ovlp = {0};
|
|
Packit |
54873f |
ovlp.Offset = store->file_offset + p->page_offset;
|
|
Packit |
54873f |
if (!ReadFile((HANDLE) _get_osfhandle(store->pagefd), compressed ? buf : dest, in_len, &read_len, &ovlp) || read_len != in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("mapping ReadFile");
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
if (compressed)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int out_len;
|
|
Packit |
54873f |
out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
if (out_len != REPOPAGE_BLOBSIZE && pnum < store->num_pages - 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "can't decompress\n");
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, " (expand %d to %d)", in_len, out_len);
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "\n");
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
store->mapped[i] = pnum;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
return store->blob_store + best * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
unsigned int
|
|
Packit |
54873f |
repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
return compress_buf(page, len, cpage, max);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#define SOLV_ERROR_EOF 3
|
|
Packit |
54873f |
#define SOLV_ERROR_CORRUPT 6
|
|
Packit |
54873f |
|
|
Packit |
54873f |
static inline unsigned int
|
|
Packit |
54873f |
read_u32(FILE *fp)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
int c, i;
|
|
Packit |
54873f |
unsigned int x = 0;
|
|
Packit |
54873f |
|
|
Packit |
54873f |
for (i = 0; i < 4; i++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
c = getc(fp);
|
|
Packit |
54873f |
if (c == EOF)
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
x = (x << 8) | c;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
return x;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* Try to either setup on-demand paging (using FP as backing
|
|
Packit |
54873f |
file), or in case that doesn't work (FP not seekable) slurps in
|
|
Packit |
54873f |
all pages and deactivates paging. */
|
|
Packit |
54873f |
int
|
|
Packit |
54873f |
repopagestore_read_or_setup_pages(Repopagestore *store, FILE *fp, unsigned int pagesz, unsigned int blobsz)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int npages;
|
|
Packit |
54873f |
unsigned int i;
|
|
Packit |
54873f |
unsigned int can_seek;
|
|
Packit |
54873f |
unsigned int cur_page_ofs;
|
|
Packit |
54873f |
unsigned char buf[REPOPAGE_BLOBSIZE];
|
|
Packit |
54873f |
|
|
Packit |
54873f |
if (pagesz != REPOPAGE_BLOBSIZE)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* We could handle this by slurping in everything. */
|
|
Packit |
54873f |
return SOLV_ERROR_CORRUPT;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
can_seek = 1;
|
|
Packit |
54873f |
if ((store->file_offset = ftell(fp)) < 0)
|
|
Packit |
54873f |
can_seek = 0;
|
|
Packit |
54873f |
clearerr(fp);
|
|
Packit |
54873f |
if (can_seek)
|
|
Packit |
54873f |
store->pagefd = dup(fileno(fp));
|
|
Packit |
54873f |
if (store->pagefd == -1)
|
|
Packit |
54873f |
can_seek = 0;
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
solv_setcloexec(store->pagefd, 1);
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "can %sseek\n", can_seek ? "" : "NOT ");
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
npages = (blobsz + REPOPAGE_BLOBSIZE - 1) / REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
|
|
Packit |
54873f |
store->num_pages = npages;
|
|
Packit |
54873f |
store->mapped_at = solv_malloc2(npages, sizeof(*store->mapped_at));
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* If we can't seek on our input we have to slurp in everything.
|
|
Packit |
54873f |
* Otherwise set up file_pages containing offest/length of the
|
|
Packit |
54873f |
* pages */
|
|
Packit |
54873f |
if (can_seek)
|
|
Packit |
54873f |
store->file_pages = solv_malloc2(npages, sizeof(*store->file_pages));
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
store->blob_store = solv_malloc2(npages, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
cur_page_ofs = 0;
|
|
Packit |
54873f |
for (i = 0; i < npages; i++)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int in_len = read_u32(fp);
|
|
Packit |
54873f |
unsigned int compressed = in_len & 1;
|
|
Packit |
54873f |
in_len >>= 1;
|
|
Packit |
54873f |
#ifdef DEBUG_PAGING
|
|
Packit |
54873f |
fprintf(stderr, "page %d: len %d (%scompressed)\n",
|
|
Packit |
54873f |
i, in_len, compressed ? "" : "not ");
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
if (can_seek)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
Attrblobpage *p = store->file_pages + i;
|
|
Packit |
54873f |
cur_page_ofs += 4;
|
|
Packit |
54873f |
store->mapped_at[i] = -1; /* not mapped yet */
|
|
Packit |
54873f |
p->page_offset = cur_page_ofs;
|
|
Packit |
54873f |
p->page_size = in_len * 2 + compressed;
|
|
Packit |
54873f |
if (fseek(fp, in_len, SEEK_CUR) < 0)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
/* We can't fall back to non-seeking behaviour as we already
|
|
Packit |
54873f |
read over some data pages without storing them away. */
|
|
Packit |
54873f |
close(store->pagefd);
|
|
Packit |
54873f |
store->pagefd = -1;
|
|
Packit |
54873f |
return SOLV_ERROR_EOF;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
cur_page_ofs += in_len;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int out_len;
|
|
Packit |
54873f |
void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
store->mapped_at[i] = i * REPOPAGE_BLOBSIZE;
|
|
Packit |
54873f |
/* We can't seek, so suck everything in. */
|
|
Packit |
54873f |
if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("fread");
|
|
Packit |
54873f |
return SOLV_ERROR_EOF;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (compressed)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE);
|
|
Packit |
54873f |
if (out_len != REPOPAGE_BLOBSIZE && i < npages - 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
return SOLV_ERROR_CORRUPT;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
void
|
|
Packit |
54873f |
repopagestore_disable_paging(Repopagestore *store)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (store->num_pages)
|
|
Packit |
54873f |
repopagestore_load_page_range(store, 0, store->num_pages - 1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#ifdef STANDALONE
|
|
Packit |
54873f |
|
|
Packit |
54873f |
static void
|
|
Packit |
54873f |
transfer_file(FILE * from, FILE * to, int compress)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned char inb[BLOCK_SIZE];
|
|
Packit |
54873f |
unsigned char outb[BLOCK_SIZE];
|
|
Packit |
54873f |
while (!feof (from) && !ferror (from))
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned int in_len, out_len;
|
|
Packit |
54873f |
if (compress)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
in_len = fread(inb, 1, BLOCK_SIZE, from);
|
|
Packit |
54873f |
if (in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned char *b = outb;
|
|
Packit |
54873f |
out_len = compress_buf(inb, in_len, outb, sizeof (outb));
|
|
Packit |
54873f |
if (!out_len)
|
|
Packit |
54873f |
b = inb, out_len = in_len;
|
|
Packit |
54873f |
if (fwrite(&out_len, sizeof (out_len), 1, to) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("write size");
|
|
Packit |
54873f |
exit (1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (fwrite(b, out_len, 1, to) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("write data");
|
|
Packit |
54873f |
exit (1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (fread(&in_len, sizeof(in_len), 1, from) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
if (feof(from))
|
|
Packit |
54873f |
return;
|
|
Packit |
54873f |
perror("can't read size");
|
|
Packit |
54873f |
exit(1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
if (fread(inb, in_len, 1, from) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("can't read data");
|
|
Packit |
54873f |
exit(1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
out_len =
|
|
Packit |
54873f |
unchecked_decompress_buf(inb, in_len, outb, sizeof(outb));
|
|
Packit |
54873f |
if (fwrite(outb, out_len, 1, to) != 1)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("can't write output");
|
|
Packit |
54873f |
exit(1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
/* Just for benchmarking purposes. */
|
|
Packit |
54873f |
static void
|
|
Packit |
54873f |
dumb_memcpy(void *dest, const void *src, unsigned int len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
char *d = dest;
|
|
Packit |
54873f |
const char *s = src;
|
|
Packit |
54873f |
while (len--)
|
|
Packit |
54873f |
*d++ = *s++;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
static void
|
|
Packit |
54873f |
benchmark(FILE * from)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
unsigned char inb[BLOCK_SIZE];
|
|
Packit |
54873f |
unsigned char outb[BLOCK_SIZE];
|
|
Packit |
54873f |
unsigned int in_len = fread(inb, 1, BLOCK_SIZE, from);
|
|
Packit |
54873f |
unsigned int out_len;
|
|
Packit |
54873f |
if (!in_len)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
perror("can't read from input");
|
|
Packit |
54873f |
exit(1);
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
unsigned int calib_loop;
|
|
Packit |
54873f |
unsigned int per_loop;
|
|
Packit |
54873f |
unsigned int i, j;
|
|
Packit |
54873f |
clock_t start, end;
|
|
Packit |
54873f |
float seconds;
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#if 0
|
|
Packit |
54873f |
calib_loop = 1;
|
|
Packit |
54873f |
per_loop = 0;
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
while ((clock() - start) < CLOCKS_PER_SEC / 4)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
calib_loop *= 2;
|
|
Packit |
54873f |
for (i = 0; i < calib_loop; i++)
|
|
Packit |
54873f |
dumb_memcpy(outb, inb, in_len);
|
|
Packit |
54873f |
per_loop += calib_loop;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
fprintf(stderr, "memcpy:\nCalibrated to %d iterations per loop\n",
|
|
Packit |
54873f |
per_loop);
|
|
Packit |
54873f |
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
for (i = 0; i < 10; i++)
|
|
Packit |
54873f |
for (j = 0; j < per_loop; j++)
|
|
Packit |
54873f |
dumb_memcpy(outb, inb, in_len);
|
|
Packit |
54873f |
end = clock();
|
|
Packit |
54873f |
seconds = (end - start) / (float) CLOCKS_PER_SEC;
|
|
Packit |
54873f |
fprintf(stderr, "%.2f seconds == %.2f MB/s\n", seconds,
|
|
Packit |
54873f |
((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
|
|
Packit |
54873f |
calib_loop = 1;
|
|
Packit |
54873f |
per_loop = 0;
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
while ((clock() - start) < CLOCKS_PER_SEC / 4)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
calib_loop *= 2;
|
|
Packit |
54873f |
for (i = 0; i < calib_loop; i++)
|
|
Packit |
54873f |
compress_buf(inb, in_len, outb, sizeof(outb));
|
|
Packit |
54873f |
per_loop += calib_loop;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
fprintf(stderr, "compression:\nCalibrated to %d iterations per loop\n",
|
|
Packit |
54873f |
per_loop);
|
|
Packit |
54873f |
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
for (i = 0; i < 10; i++)
|
|
Packit |
54873f |
for (j = 0; j < per_loop; j++)
|
|
Packit |
54873f |
compress_buf(inb, in_len, outb, sizeof(outb));
|
|
Packit |
54873f |
end = clock();
|
|
Packit |
54873f |
seconds = (end - start) / (float) CLOCKS_PER_SEC;
|
|
Packit |
54873f |
fprintf(stderr, "%.2f seconds == %.2f MB/s\n", seconds,
|
|
Packit |
54873f |
((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
|
|
Packit |
54873f |
|
|
Packit |
54873f |
out_len = compress_buf(inb, in_len, outb, sizeof(outb));
|
|
Packit |
54873f |
|
|
Packit |
54873f |
calib_loop = 1;
|
|
Packit |
54873f |
per_loop = 0;
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
while ((clock() - start) < CLOCKS_PER_SEC / 4)
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
calib_loop *= 2;
|
|
Packit |
54873f |
for (i = 0; i < calib_loop; i++)
|
|
Packit |
54873f |
unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
|
|
Packit |
54873f |
per_loop += calib_loop;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
fprintf(stderr, "decompression:\nCalibrated to %d iterations per loop\n",
|
|
Packit |
54873f |
per_loop);
|
|
Packit |
54873f |
|
|
Packit |
54873f |
start = clock();
|
|
Packit |
54873f |
for (i = 0; i < 10; i++)
|
|
Packit |
54873f |
for (j = 0; j < per_loop; j++)
|
|
Packit |
54873f |
unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
|
|
Packit |
54873f |
end = clock();
|
|
Packit |
54873f |
seconds = (end - start) / (float) CLOCKS_PER_SEC;
|
|
Packit |
54873f |
fprintf(stderr, "%.2f seconds == %.2f MB/s\n", seconds,
|
|
Packit |
54873f |
((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
int
|
|
Packit |
54873f |
main(int argc, char *argv[])
|
|
Packit |
54873f |
{
|
|
Packit |
54873f |
int compress = 1;
|
|
Packit |
54873f |
if (argc > 1 && !strcmp(argv[1], "-d"))
|
|
Packit |
54873f |
compress = 0;
|
|
Packit |
54873f |
if (argc > 1 && !strcmp(argv[1], "-b"))
|
|
Packit |
54873f |
benchmark(stdin);
|
|
Packit |
54873f |
else
|
|
Packit |
54873f |
transfer_file(stdin, stdout, compress);
|
|
Packit |
54873f |
return 0;
|
|
Packit |
54873f |
}
|
|
Packit |
54873f |
|
|
Packit |
54873f |
#endif
|
|
Packit |
54873f |
|