core: Import bup's "rollsum" code, add a test case
For static deltas, one strategy that will be employed is to split each object into chunks, and only include changed chunks in the deltas.
This commit is contained in:
parent
8c2a3efadb
commit
844c5ea652
|
|
@ -87,6 +87,11 @@ test_varint_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
|
|||
test_varint_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
|
||||
testmeta_DATA += test-varint.test
|
||||
|
||||
insttest_PROGRAMS += test-rollsum
|
||||
test_rollsum_SOURCES = src/libostree/bupsplit.c tests/test-rollsum.c
|
||||
test_rollsum_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
|
||||
test_rollsum_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
|
||||
|
||||
if BUILDOPT_GJS
|
||||
insttest_SCRIPTS += tests/test-core.js \
|
||||
tests/test-sizes.js \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright 2011 Avery Pennarun. All rights reserved.
|
||||
*
|
||||
* (This license applies to bupsplit.c and bupsplit.h only.)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "bupsplit.h"
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// According to librsync/rollsum.h:
|
||||
// "We should make this something other than zero to improve the
|
||||
// checksum algorithm: tridge suggests a prime number."
|
||||
// apenwarr: I unscientifically tried 0 and 7919, and they both ended up
|
||||
// slightly worse than the librsync value of 31 for my arbitrary test data.
|
||||
#define ROLLSUM_CHAR_OFFSET 31
|
||||
|
||||
typedef struct {
|
||||
unsigned s1, s2;
|
||||
uint8_t window[BUP_WINDOWSIZE];
|
||||
int wofs;
|
||||
} Rollsum;
|
||||
|
||||
|
||||
// These formulas are based on rollsum.h in the librsync project.
|
||||
static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add)
|
||||
{
|
||||
r->s1 += add - drop;
|
||||
r->s2 += r->s1 - (BUP_WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET));
|
||||
}
|
||||
|
||||
|
||||
static void rollsum_init(Rollsum *r)
|
||||
{
|
||||
r->s1 = BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET;
|
||||
r->s2 = BUP_WINDOWSIZE * (BUP_WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET;
|
||||
r->wofs = 0;
|
||||
memset(r->window, 0, BUP_WINDOWSIZE);
|
||||
}
|
||||
|
||||
|
||||
// For some reason, gcc 4.3 (at least) optimizes badly if find_ofs()
|
||||
// is static and rollsum_roll is an inline function. Let's use a macro
|
||||
// here instead to help out the optimizer.
|
||||
#define rollsum_roll(r, ch) do { \
|
||||
rollsum_add((r), (r)->window[(r)->wofs], (ch)); \
|
||||
(r)->window[(r)->wofs] = (ch); \
|
||||
(r)->wofs = ((r)->wofs + 1) % BUP_WINDOWSIZE; \
|
||||
} while (0)
|
||||
|
||||
|
||||
static uint32_t rollsum_digest(Rollsum *r)
|
||||
{
|
||||
return (r->s1 << 16) | (r->s2 & 0xffff);
|
||||
}
|
||||
|
||||
|
||||
static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len)
|
||||
{
|
||||
size_t count;
|
||||
Rollsum r;
|
||||
rollsum_init(&r);
|
||||
for (count = ofs; count < len; count++)
|
||||
rollsum_roll(&r, buf[count]);
|
||||
return rollsum_digest(&r);
|
||||
}
|
||||
|
||||
|
||||
int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits)
|
||||
{
|
||||
Rollsum r;
|
||||
int count;
|
||||
|
||||
rollsum_init(&r);
|
||||
for (count = 0; count < len; count++)
|
||||
{
|
||||
rollsum_roll(&r, buf[count]);
|
||||
if ((r.s2 & (BUP_BLOBSIZE-1)) == ((~0) & (BUP_BLOBSIZE-1)))
|
||||
{
|
||||
if (bits)
|
||||
{
|
||||
unsigned rsum = rollsum_digest(&r);
|
||||
*bits = BUP_BLOBBITS;
|
||||
rsum >>= BUP_BLOBBITS;
|
||||
for (*bits = BUP_BLOBBITS; (rsum >>= 1) & 1; (*bits)++)
|
||||
;
|
||||
}
|
||||
return count+1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef BUP_NO_SELFTEST
|
||||
#define BUP_SELFTEST_SIZE 100000
|
||||
|
||||
int bupsplit_selftest()
|
||||
{
|
||||
uint8_t *buf = malloc(BUP_SELFTEST_SIZE);
|
||||
uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b;
|
||||
unsigned count;
|
||||
|
||||
srandom(1);
|
||||
for (count = 0; count < BUP_SELFTEST_SIZE; count++)
|
||||
buf[count] = random();
|
||||
|
||||
sum1a = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE);
|
||||
sum1b = rollsum_sum(buf, 1, BUP_SELFTEST_SIZE);
|
||||
sum2a = rollsum_sum(buf, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE*5/2,
|
||||
BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
|
||||
sum2b = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
|
||||
sum3a = rollsum_sum(buf, 0, BUP_WINDOWSIZE+3);
|
||||
sum3b = rollsum_sum(buf, 3, BUP_WINDOWSIZE+3);
|
||||
|
||||
fprintf(stderr, "sum1a = 0x%08x\n", sum1a);
|
||||
fprintf(stderr, "sum1b = 0x%08x\n", sum1b);
|
||||
fprintf(stderr, "sum2a = 0x%08x\n", sum2a);
|
||||
fprintf(stderr, "sum2b = 0x%08x\n", sum2b);
|
||||
fprintf(stderr, "sum3a = 0x%08x\n", sum3a);
|
||||
fprintf(stderr, "sum3b = 0x%08x\n", sum3b);
|
||||
|
||||
free(buf);
|
||||
return sum1a!=sum1b || sum2a!=sum2b || sum3a!=sum3b;
|
||||
}
|
||||
|
||||
#endif // !BUP_NO_SELFTEST
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 2011 Avery Pennarun. All rights reserved.
|
||||
*
|
||||
* (This license applies to bupsplit.c and bupsplit.h only.)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __BUPSPLIT_H
|
||||
#define __BUPSPLIT_H
|
||||
|
||||
#define BUP_BLOBBITS (13)
|
||||
#define BUP_BLOBSIZE (1<<BUP_BLOBBITS)
|
||||
#define BUP_WINDOWBITS (7)
|
||||
#define BUP_WINDOWSIZE (1<<(BUP_WINDOWBITS-1))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits);
|
||||
int bupsplit_selftest(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __BUPSPLIT_H */
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
|
||||
*
|
||||
* Copyright (C) 2013 Colin Walters <walters@verbum.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libgsystem.h"
|
||||
|
||||
#include "bupsplit.h"
|
||||
|
||||
#define BLOB_MAX (8192*4)
|
||||
#define BLOB_READ_SIZE (1024*1024)
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
GCancellable *cancellable = NULL;
|
||||
GError *local_error = NULL;
|
||||
GError **error = &local_error;
|
||||
gs_unref_object GFile *path = NULL;
|
||||
GBytes *bytes = NULL;
|
||||
|
||||
g_setenv ("GIO_USE_VFS", "local", TRUE);
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
const guint8 *start;
|
||||
gsize len;
|
||||
|
||||
path = g_file_new_for_path (argv[1]);
|
||||
bytes = gs_file_map_readonly (path, cancellable, error);
|
||||
if (!bytes)
|
||||
goto out;
|
||||
|
||||
start = g_bytes_get_data (bytes, &len);
|
||||
while (TRUE)
|
||||
{
|
||||
int offset, bits;
|
||||
offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits);
|
||||
if (offset == 0)
|
||||
break;
|
||||
if (offset > BLOB_MAX)
|
||||
offset = BLOB_MAX;
|
||||
g_print ("%" G_GUINT64_FORMAT "\n", (guint64)offset);
|
||||
start += offset;
|
||||
len -= offset;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
bupsplit_selftest ();
|
||||
}
|
||||
|
||||
out:
|
||||
g_clear_pointer (&bytes, g_bytes_unref);
|
||||
if (local_error)
|
||||
{
|
||||
g_printerr ("%s\n", local_error->message);
|
||||
g_error_free (local_error);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue