deltas: Implement rollsums
This does an rsync-style prepared delta basically. On my test data, it shaves ~6MB of uncompressed data. Not a huge amount, but I expect this to be more useful for things like binaries which embed data, etc.
This commit is contained in:
parent
345754a564
commit
9aa7e30b38
|
|
@ -51,6 +51,8 @@ libostree_1_la_SOURCES = \
|
|||
src/libostree/ostree-lzma-decompressor.h \
|
||||
src/libostree/bupsplit.h \
|
||||
src/libostree/bupsplit.c \
|
||||
src/libostree/ostree-rollsum.h \
|
||||
src/libostree/ostree-rollsum.c \
|
||||
src/libostree/ostree-varint.h \
|
||||
src/libostree/ostree-varint.c \
|
||||
src/libostree/ostree-linuxfsutil.h \
|
||||
|
|
|
|||
|
|
@ -87,12 +87,6 @@ INSTALL_DATA_HOOKS += install-gpg-data-hook
|
|||
echo 'Output=TAP' >> $@.tmp; \
|
||||
mv $@.tmp $@)
|
||||
|
||||
%.test: tests/%.js Makefile
|
||||
$(AM_V_GEN) (echo '[Test]' > $@.tmp; \
|
||||
echo 'Exec=env TESTDATADIR=$(pkglibexecdir)/installed-tests $(pkglibexecdir)/installed-tests/$(notdir $<)' >> $@.tmp; \
|
||||
echo 'Type=session' >> $@.tmp; \
|
||||
mv $@.tmp $@)
|
||||
|
||||
if BUILDOPT_GJS
|
||||
insttest_SCRIPTS += tests/test-core.js \
|
||||
tests/test-sizes.js \
|
||||
|
|
@ -109,7 +103,7 @@ check_PROGRAMS = tests/test-rollsum tests/test-varint tests/test-ot-unix-utils
|
|||
tests_test_ot_unix_utils_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
|
||||
tests_test_ot_unix_utils_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
|
||||
|
||||
tests_test_rollsum_SOURCES = src/libostree/bupsplit.c tests/test-rollsum.c
|
||||
tests_test_rollsum_SOURCES = src/libostree/bupsplit.c src/libostree/ostree-rollsum.c tests/test-rollsum.c
|
||||
tests_test_rollsum_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
|
||||
tests_test_rollsum_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,18 +21,16 @@
|
|||
#include "config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
#include <gio/gunixoutputstream.h>
|
||||
|
||||
#include "ostree-core-private.h"
|
||||
#include "ostree-repo-private.h"
|
||||
#include "ostree-lzma-compressor.h"
|
||||
#include "ostree-repo-static-delta-private.h"
|
||||
#include "ostree-diff.h"
|
||||
#include "ostree-rollsum.h"
|
||||
#include "otutil.h"
|
||||
#include "ostree-varint.h"
|
||||
#include "bupsplit.h"
|
||||
|
||||
#define ROLLSUM_BLOB_MAX (8192*4)
|
||||
|
||||
typedef struct {
|
||||
guint64 uncompressed_size;
|
||||
|
|
@ -260,6 +258,36 @@ splice_stream_to_payload (OstreeStaticDeltaPartBuilder *current_part,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
write_content_mode_xattrs (OstreeRepo *repo,
|
||||
OstreeStaticDeltaPartBuilder *current_part,
|
||||
GFileInfo *content_finfo,
|
||||
GVariant *content_xattrs,
|
||||
gsize *out_mode_offset,
|
||||
gsize *out_xattr_offset)
|
||||
{
|
||||
guint32 uid =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::uid");
|
||||
guint32 gid =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::gid");
|
||||
guint32 mode =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
|
||||
gs_unref_variant GVariant *modev
|
||||
= g_variant_ref_sink (g_variant_new ("(uuu)",
|
||||
GUINT32_TO_BE (uid),
|
||||
GUINT32_TO_BE (gid),
|
||||
GUINT32_TO_BE (mode)));
|
||||
|
||||
*out_mode_offset = write_unique_variant_chunk (current_part,
|
||||
current_part->mode_set,
|
||||
current_part->modes,
|
||||
modev);
|
||||
*out_xattr_offset = write_unique_variant_chunk (current_part,
|
||||
current_part->xattr_set,
|
||||
current_part->xattrs,
|
||||
content_xattrs);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
process_one_object (OstreeRepo *repo,
|
||||
OstreeStaticDeltaBuilder *builder,
|
||||
|
|
@ -327,26 +355,12 @@ process_one_object (OstreeRepo *repo,
|
|||
else
|
||||
{
|
||||
gsize mode_offset, xattr_offset, content_offset;
|
||||
guint32 uid =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::uid");
|
||||
guint32 gid =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::gid");
|
||||
guint32 mode =
|
||||
g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
|
||||
gs_unref_variant GVariant *modev
|
||||
= g_variant_ref_sink (g_variant_new ("(uuu)",
|
||||
GUINT32_TO_BE (uid),
|
||||
GUINT32_TO_BE (gid),
|
||||
GUINT32_TO_BE (mode)));
|
||||
guint32 mode;
|
||||
|
||||
mode_offset = write_unique_variant_chunk (current_part,
|
||||
current_part->mode_set,
|
||||
current_part->modes,
|
||||
modev);
|
||||
xattr_offset = write_unique_variant_chunk (current_part,
|
||||
current_part->xattr_set,
|
||||
current_part->xattrs,
|
||||
content_xattrs);
|
||||
mode = g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
|
||||
|
||||
write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
|
||||
&mode_offset, &xattr_offset);
|
||||
|
||||
if (S_ISLNK (mode))
|
||||
{
|
||||
|
|
@ -381,106 +395,75 @@ process_one_object (OstreeRepo *repo,
|
|||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GPtrArray *keys;
|
||||
GHashTable *values;
|
||||
} OrderedRollsums;
|
||||
|
||||
static void
|
||||
ordered_rollsums_free (OrderedRollsums *ohash)
|
||||
{
|
||||
g_ptr_array_unref (ohash->keys);
|
||||
g_hash_table_unref (ohash->values);
|
||||
g_free (ohash);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
rollsum_chunks_crc32 (GInputStream *istream,
|
||||
OrderedRollsums **out_rollsums,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
gsize start = 0;
|
||||
gboolean rollsum_end = FALSE;
|
||||
OrderedRollsums *ret_rollsums = g_new0 (OrderedRollsums, 1);
|
||||
gs_unref_object GBufferedInputStream *bufinput =
|
||||
(GBufferedInputStream*) g_buffered_input_stream_new_sized (istream, ROLLSUM_BLOB_MAX);
|
||||
|
||||
ret_rollsums->keys = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
|
||||
ret_rollsums->values = g_hash_table_new (NULL, NULL);
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
gssize bytes_read;
|
||||
const guint8 *buf;
|
||||
gsize bufsize;
|
||||
int offset, bits;
|
||||
|
||||
bytes_read = g_buffered_input_stream_fill (bufinput, -1, cancellable, error);
|
||||
if (bytes_read == -1)
|
||||
goto out;
|
||||
if (bytes_read == 0)
|
||||
break;
|
||||
|
||||
buf = g_buffered_input_stream_peek_buffer (bufinput, &bufsize);
|
||||
|
||||
if (!rollsum_end)
|
||||
{
|
||||
offset = bupsplit_find_ofs (buf, MIN(G_MAXINT32, bufsize), &bits);
|
||||
if (offset == 0)
|
||||
{
|
||||
rollsum_end = TRUE;
|
||||
offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
|
||||
}
|
||||
else if (offset > ROLLSUM_BLOB_MAX)
|
||||
offset = ROLLSUM_BLOB_MAX;
|
||||
}
|
||||
else
|
||||
offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
|
||||
|
||||
if (!g_input_stream_skip ((GInputStream*)bufinput, bufsize, cancellable, error))
|
||||
goto out;
|
||||
|
||||
/* Use zlib's crc32 */
|
||||
{ guint32 crc = crc32 (0L, NULL, 0);
|
||||
GVariant *val;
|
||||
|
||||
crc = crc32 (crc, buf, offset);
|
||||
|
||||
val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
|
||||
g_ptr_array_add (ret_rollsums->keys, val);
|
||||
g_hash_table_insert (ret_rollsums->values, GUINT_TO_POINTER (crc), val);
|
||||
}
|
||||
|
||||
start += offset;
|
||||
}
|
||||
|
||||
ret = TRUE;
|
||||
gs_transfer_out_value (out_rollsums, &ret_rollsums);
|
||||
out:
|
||||
if (ret_rollsums)
|
||||
ordered_rollsums_free (ret_rollsums);
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char *from_checksum;
|
||||
OrderedRollsums *from_rollsums;
|
||||
OrderedRollsums *to_rollsums;
|
||||
guint match_ratio;
|
||||
guint64 match_size;
|
||||
OstreeRollsumMatches *matches;
|
||||
GBytes *tmp_to;
|
||||
} ContentRollsum;
|
||||
|
||||
static void
|
||||
content_rollsums_free (ContentRollsum *rollsum)
|
||||
{
|
||||
g_free (rollsum->from_checksum);
|
||||
ordered_rollsums_free (rollsum->from_rollsums);
|
||||
ordered_rollsums_free (rollsum->to_rollsums);
|
||||
_ostree_rollsum_matches_free (rollsum->matches);
|
||||
g_bytes_unref (rollsum->tmp_to);
|
||||
g_free (rollsum);
|
||||
}
|
||||
|
||||
/* Load a content object, uncompressing it to an unlinked tmpfile
|
||||
that's mmap()'d and suitable for seeking.
|
||||
*/
|
||||
static gboolean
|
||||
get_unpacked_unlinked_content (OstreeRepo *repo,
|
||||
const char *checksum,
|
||||
GBytes **out_content,
|
||||
GFileInfo **out_finfo,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
gs_free char *tmpname = g_strdup ("tmpostree-deltaobj-XXXXXX");
|
||||
gs_fd_close int fd = -1;
|
||||
gs_unref_bytes GBytes *ret_content = NULL;
|
||||
gs_unref_object GInputStream *istream = NULL;
|
||||
gs_unref_object GFileInfo *ret_finfo = NULL;
|
||||
gs_unref_object GOutputStream *out = NULL;
|
||||
|
||||
fd = g_mkstemp (tmpname);
|
||||
if (fd == -1)
|
||||
{
|
||||
gs_set_error_from_errno (error, errno);
|
||||
goto out;
|
||||
}
|
||||
/* Doesn't need a name */
|
||||
(void) unlink (tmpname);
|
||||
|
||||
if (!ostree_repo_load_file (repo, checksum, &istream, &ret_finfo, NULL,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
|
||||
if (g_file_info_get_file_type (ret_finfo) != G_FILE_TYPE_REGULAR)
|
||||
{
|
||||
ret = TRUE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out = g_unix_output_stream_new (fd, FALSE);
|
||||
if (g_output_stream_splice (out, istream, G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
|
||||
cancellable, error) < 0)
|
||||
goto out;
|
||||
|
||||
{ GMappedFile *mfile = g_mapped_file_new_from_fd (fd, FALSE, error);
|
||||
ret_content = g_mapped_file_get_bytes (mfile);
|
||||
g_mapped_file_unref (mfile);
|
||||
}
|
||||
|
||||
ret = TRUE;
|
||||
gs_transfer_out_value (out_content, &ret_content);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
try_content_rollsum (OstreeRepo *repo,
|
||||
const char *from,
|
||||
|
|
@ -490,62 +473,37 @@ try_content_rollsum (OstreeRepo *repo,
|
|||
GError **error)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
OrderedRollsums *from_rollsum = NULL;
|
||||
OrderedRollsums *to_rollsum = NULL;
|
||||
gs_unref_object GInputStream *from_istream = NULL;
|
||||
gs_unref_hashtable GHashTable *from_rollsum = NULL;
|
||||
gs_unref_hashtable GHashTable *to_rollsum = NULL;
|
||||
gs_unref_bytes GBytes *tmp_from = NULL;
|
||||
gs_unref_bytes GBytes *tmp_to = NULL;
|
||||
gs_unref_object GFileInfo *from_finfo = NULL;
|
||||
gs_unref_object GInputStream *to_istream = NULL;
|
||||
gs_unref_object GFileInfo *to_finfo = NULL;
|
||||
OstreeRollsumMatches *matches;
|
||||
ContentRollsum *ret_rollsum = NULL;
|
||||
guint total = 0;
|
||||
guint matches = 0;
|
||||
guint match_ratio = 0;
|
||||
guint64 match_size = 0;
|
||||
gpointer hkey, hvalue;
|
||||
GHashTableIter hiter;
|
||||
|
||||
*out_rollsum = NULL;
|
||||
|
||||
if (!ostree_repo_load_file (repo, from, &from_istream, &from_finfo, NULL,
|
||||
/* Load the content objects, splice them to uncompressed temporary files that
|
||||
* we can just mmap() and seek around in conveniently.
|
||||
*/
|
||||
if (!get_unpacked_unlinked_content (repo, from, &tmp_from, &from_finfo,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
if (!ostree_repo_load_file (repo, to, &to_istream, &to_finfo, NULL,
|
||||
if (!get_unpacked_unlinked_content (repo, to, &tmp_to, &to_finfo,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
|
||||
/* Only try to rollsum regular files obviously */
|
||||
if (!(g_file_info_get_file_type (from_finfo) == G_FILE_TYPE_REGULAR
|
||||
&& g_file_info_get_file_type (to_finfo) == G_FILE_TYPE_REGULAR))
|
||||
if (!(tmp_from && tmp_to))
|
||||
{
|
||||
ret = TRUE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
g_assert (from_istream && to_istream);
|
||||
matches = _ostree_compute_rollsum_matches (tmp_from, tmp_to);
|
||||
|
||||
if (!rollsum_chunks_crc32 (from_istream, &from_rollsum, cancellable, error))
|
||||
goto out;
|
||||
if (!rollsum_chunks_crc32 (to_istream, &to_rollsum, cancellable, error))
|
||||
goto out;
|
||||
|
||||
g_clear_object (&from_istream);
|
||||
g_clear_object (&to_istream);
|
||||
|
||||
g_hash_table_iter_init (&hiter, to_rollsum->values);
|
||||
while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
|
||||
{
|
||||
GVariant *chunk = hvalue;
|
||||
if (g_hash_table_contains (from_rollsum->values, hkey))
|
||||
{
|
||||
guint64 offset;
|
||||
g_variant_get (chunk, "(utt)", NULL, NULL, &offset);
|
||||
matches++;
|
||||
match_size += offset;
|
||||
}
|
||||
total++;
|
||||
}
|
||||
|
||||
match_ratio = (matches*100)/total;
|
||||
{ guint match_ratio = (matches->bufmatches*100)/matches->total;
|
||||
|
||||
/* Only proceed if the file contains (arbitrary) more than 25% of
|
||||
* the previous chunks.
|
||||
|
|
@ -555,20 +513,163 @@ try_content_rollsum (OstreeRepo *repo,
|
|||
ret = TRUE;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
g_printerr ("rollsum for %s; crcs=%u bufs=%u total=%u matchsize=%llu\n",
|
||||
to, matches->crcmatches,
|
||||
matches->bufmatches,
|
||||
matches->total, (unsigned long long)matches->match_size);
|
||||
|
||||
ret_rollsum = g_new0 (ContentRollsum, 1);
|
||||
ret_rollsum->match_ratio = match_ratio;
|
||||
ret_rollsum->match_size = match_size;
|
||||
ret_rollsum->from_checksum = g_strdup (from);
|
||||
ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
|
||||
ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
|
||||
ret_rollsum->matches = matches; matches = NULL;
|
||||
ret_rollsum->tmp_to = tmp_to; tmp_to = NULL;
|
||||
|
||||
ret = TRUE;
|
||||
gs_transfer_out_value (out_rollsum, &ret_rollsum);
|
||||
out:
|
||||
if (matches)
|
||||
_ostree_rollsum_matches_free (matches);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part,
|
||||
const guint8 *buf,
|
||||
guint64 offset)
|
||||
{
|
||||
guint64 payload_start;
|
||||
|
||||
payload_start = current_part->payload->len;
|
||||
g_string_append_len (current_part->payload, (char*)buf, offset);
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
|
||||
_ostree_write_varuint64 (current_part->operations, offset);
|
||||
_ostree_write_varuint64 (current_part->operations, payload_start);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
process_one_rollsum (OstreeRepo *repo,
|
||||
OstreeStaticDeltaBuilder *builder,
|
||||
OstreeStaticDeltaPartBuilder **current_part_val,
|
||||
const char *to_checksum,
|
||||
ContentRollsum *rollsum,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
guint64 content_size;
|
||||
gs_unref_object GInputStream *content_stream = NULL;
|
||||
gs_unref_object GFileInfo *content_finfo = NULL;
|
||||
gs_unref_variant GVariant *content_xattrs = NULL;
|
||||
OstreeStaticDeltaPartBuilder *current_part = *current_part_val;
|
||||
const guint8 *tmp_to_buf;
|
||||
gsize tmp_to_len;
|
||||
|
||||
/* Check to see if this delta has gone over maximum size */
|
||||
if (current_part->objects->len > 0 &&
|
||||
current_part->payload->len > builder->max_chunk_size_bytes)
|
||||
{
|
||||
*current_part_val = current_part = allocate_part (builder);
|
||||
}
|
||||
|
||||
tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len);
|
||||
|
||||
if (!ostree_repo_load_file (repo, to_checksum, &content_stream,
|
||||
&content_finfo, &content_xattrs,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
content_size = g_file_info_get_size (content_finfo);
|
||||
g_assert_cmpint (tmp_to_len, ==, content_size);
|
||||
|
||||
current_part->uncompressed_size += content_size;
|
||||
|
||||
g_ptr_array_add (current_part->objects, ostree_object_name_serialize (to_checksum, OSTREE_OBJECT_TYPE_FILE));
|
||||
|
||||
{ gsize mode_offset, xattr_offset, from_csum_offset;
|
||||
gboolean reading_payload = TRUE;
|
||||
guchar source_csum[32];
|
||||
guint i;
|
||||
|
||||
write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
|
||||
&mode_offset, &xattr_offset);
|
||||
|
||||
/* Write the origin checksum */
|
||||
ostree_checksum_inplace_to_bytes (rollsum->from_checksum, source_csum);
|
||||
from_csum_offset = current_part->payload->len;
|
||||
g_string_append_len (current_part->payload, (char*)source_csum, sizeof (source_csum));
|
||||
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_OPEN);
|
||||
_ostree_write_varuint64 (current_part->operations, mode_offset);
|
||||
_ostree_write_varuint64 (current_part->operations, xattr_offset);
|
||||
_ostree_write_varuint64 (current_part->operations, content_size);
|
||||
|
||||
{ guint64 writing_offset = 0;
|
||||
guint64 offset = 0, to_start = 0, from_start = 0;
|
||||
GPtrArray *matchlist = rollsum->matches->matches;
|
||||
|
||||
g_assert (matchlist->len > 0);
|
||||
for (i = 0; i < matchlist->len; i++)
|
||||
{
|
||||
GVariant *match = matchlist->pdata[i];
|
||||
guint32 crc;
|
||||
guint64 prefix;
|
||||
|
||||
g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start);
|
||||
|
||||
prefix = to_start - writing_offset;
|
||||
|
||||
if (prefix > 0)
|
||||
{
|
||||
if (!reading_payload)
|
||||
{
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
|
||||
reading_payload = TRUE;
|
||||
}
|
||||
|
||||
g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len);
|
||||
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix);
|
||||
writing_offset += prefix;
|
||||
}
|
||||
|
||||
if (reading_payload)
|
||||
{
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE);
|
||||
_ostree_write_varuint64 (current_part->operations, from_csum_offset);
|
||||
reading_payload = FALSE;
|
||||
}
|
||||
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
|
||||
_ostree_write_varuint64 (current_part->operations, offset);
|
||||
_ostree_write_varuint64 (current_part->operations, from_start);
|
||||
writing_offset += offset;
|
||||
}
|
||||
|
||||
if (!reading_payload)
|
||||
{
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
|
||||
reading_payload = TRUE;
|
||||
}
|
||||
|
||||
{ guint64 remainder = tmp_to_len - writing_offset;
|
||||
if (remainder > 0)
|
||||
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder);
|
||||
writing_offset += remainder;
|
||||
g_assert_cmpint (writing_offset, ==, tmp_to_len);
|
||||
}
|
||||
|
||||
g_assert_cmpint (writing_offset, ==, content_size);
|
||||
}
|
||||
|
||||
|
||||
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
|
||||
}
|
||||
|
||||
ret = TRUE;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
generate_delta_lowlatency (OstreeRepo *repo,
|
||||
const char *from,
|
||||
|
|
@ -694,46 +795,13 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
|||
continue;
|
||||
|
||||
g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
|
||||
builder->rollsum_size += rollsum->match_size;
|
||||
builder->rollsum_size += rollsum->matches->match_size;
|
||||
}
|
||||
|
||||
g_printerr ("rollsum for %u/%u modified\n",
|
||||
g_hash_table_size (rollsum_optimized_content_objects),
|
||||
g_hash_table_size (modified_content_objects));
|
||||
|
||||
/* Scan for large objects, so we can fall back to plain HTTP-based
|
||||
* fetch. In the future this should come after an rsync-style
|
||||
* rolling delta check for modified files.
|
||||
*/
|
||||
g_hash_table_iter_init (&hashiter, new_reachable_content);
|
||||
while (g_hash_table_iter_next (&hashiter, &key, &value))
|
||||
{
|
||||
GVariant *serialized_key = key;
|
||||
const char *checksum;
|
||||
OstreeObjectType objtype;
|
||||
guint64 uncompressed_size;
|
||||
gboolean fallback = FALSE;
|
||||
|
||||
ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
|
||||
|
||||
if (!ostree_repo_load_object_stream (repo, objtype, checksum,
|
||||
NULL, &uncompressed_size,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
if (uncompressed_size > builder->min_fallback_size_bytes)
|
||||
fallback = TRUE;
|
||||
|
||||
if (fallback)
|
||||
{
|
||||
gs_free char *size = g_format_size (uncompressed_size);
|
||||
g_printerr ("fallback for %s (%s)\n",
|
||||
ostree_object_to_string (checksum, objtype), size);
|
||||
g_ptr_array_add (builder->fallback_objects,
|
||||
g_variant_ref (serialized_key));
|
||||
g_hash_table_iter_remove (&hashiter);
|
||||
}
|
||||
}
|
||||
|
||||
current_part = allocate_part (builder);
|
||||
|
||||
/* Pack the metadata first */
|
||||
|
|
@ -752,7 +820,57 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* Now content */
|
||||
/* Now do rollsummed objects */
|
||||
|
||||
g_hash_table_iter_init (&hashiter, rollsum_optimized_content_objects);
|
||||
while (g_hash_table_iter_next (&hashiter, &key, &value))
|
||||
{
|
||||
const char *checksum = key;
|
||||
ContentRollsum *rollsum = value;
|
||||
|
||||
if (!process_one_rollsum (repo, builder, ¤t_part,
|
||||
checksum, rollsum,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Scan for large objects, so we can fall back to plain HTTP-based
|
||||
* fetch.
|
||||
*/
|
||||
g_hash_table_iter_init (&hashiter, new_reachable_content);
|
||||
while (g_hash_table_iter_next (&hashiter, &key, &value))
|
||||
{
|
||||
GVariant *serialized_key = key;
|
||||
const char *checksum;
|
||||
OstreeObjectType objtype;
|
||||
guint64 uncompressed_size;
|
||||
gboolean fallback = FALSE;
|
||||
|
||||
ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
|
||||
|
||||
/* Skip content objects we rollsum'd */
|
||||
if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
|
||||
continue;
|
||||
|
||||
if (!ostree_repo_load_object_stream (repo, objtype, checksum,
|
||||
NULL, &uncompressed_size,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
if (uncompressed_size > builder->min_fallback_size_bytes)
|
||||
fallback = TRUE;
|
||||
|
||||
if (fallback)
|
||||
{
|
||||
gs_free char *size = g_format_size (uncompressed_size);
|
||||
g_printerr ("fallback for %s (%s)\n",
|
||||
ostree_object_to_string (checksum, objtype), size);
|
||||
g_ptr_array_add (builder->fallback_objects,
|
||||
g_variant_ref (serialized_key));
|
||||
g_hash_table_iter_remove (&hashiter);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now non-rollsummed content */
|
||||
g_hash_table_iter_init (&hashiter, new_reachable_content);
|
||||
while (g_hash_table_iter_next (&hashiter, &key, &value))
|
||||
{
|
||||
|
|
@ -762,6 +880,10 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
|||
|
||||
ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
|
||||
|
||||
/* Skip content objects we rollsum'd */
|
||||
if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
|
||||
continue;
|
||||
|
||||
if (!process_one_object (repo, builder, ¤t_part,
|
||||
checksum, objtype,
|
||||
cancellable, error))
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ typedef enum {
|
|||
OSTREE_STATIC_DELTA_OP_OPEN_SPLICE_AND_CLOSE = 'S',
|
||||
OSTREE_STATIC_DELTA_OP_OPEN = 'o',
|
||||
OSTREE_STATIC_DELTA_OP_WRITE = 'w',
|
||||
OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'R',
|
||||
OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r',
|
||||
OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R',
|
||||
OSTREE_STATIC_DELTA_OP_CLOSE = 'c'
|
||||
} OstreeStaticDeltaOpCode;
|
||||
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ OPPROTO(open_splice_and_close)
|
|||
OPPROTO(open)
|
||||
OPPROTO(write)
|
||||
OPPROTO(set_read_source)
|
||||
OPPROTO(unset_read_source)
|
||||
OPPROTO(close)
|
||||
#undef OPPROTO
|
||||
|
||||
|
|
@ -250,6 +251,10 @@ _ostree_static_delta_part_execute_raw (OstreeRepo *repo,
|
|||
if (!dispatch_set_read_source (repo, state, cancellable, error))
|
||||
goto out;
|
||||
break;
|
||||
case OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE:
|
||||
if (!dispatch_unset_read_source (repo, state, cancellable, error))
|
||||
goto out;
|
||||
break;
|
||||
case OSTREE_STATIC_DELTA_OP_CLOSE:
|
||||
if (!dispatch_close (repo, state, cancellable, error))
|
||||
goto out;
|
||||
|
|
@ -519,7 +524,6 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
|
|||
{
|
||||
guint64 content_offset;
|
||||
guint64 objlen;
|
||||
guint64 content_size;
|
||||
gsize bytes_written;
|
||||
gs_unref_object GInputStream *object_input = NULL;
|
||||
gs_unref_object GInputStream *memin = NULL;
|
||||
|
|
@ -527,11 +531,11 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
|
|||
if (!do_content_open_generic (repo, state, cancellable, error))
|
||||
goto out;
|
||||
|
||||
if (!read_varuint64 (state, &content_size, error))
|
||||
if (!read_varuint64 (state, &state->content_size, error))
|
||||
goto out;
|
||||
if (!read_varuint64 (state, &content_offset, error))
|
||||
goto out;
|
||||
if (!validate_ofs (state, content_offset, content_size, error))
|
||||
if (!validate_ofs (state, content_offset, state->content_size, error))
|
||||
goto out;
|
||||
|
||||
/* Fast path for regular files to bare repositories */
|
||||
|
|
@ -551,7 +555,7 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
|
|||
{
|
||||
if (!g_output_stream_write_all (state->content_out,
|
||||
state->payload_data + content_offset,
|
||||
content_size,
|
||||
state->content_size,
|
||||
&bytes_written,
|
||||
cancellable, error))
|
||||
goto out;
|
||||
|
|
@ -567,14 +571,14 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
|
|||
if (S_ISLNK (state->mode))
|
||||
{
|
||||
gs_free char *nulterminated_target =
|
||||
g_strndup ((char*)state->payload_data + content_offset, content_size);
|
||||
g_strndup ((char*)state->payload_data + content_offset, state->content_size);
|
||||
g_file_info_set_symlink_target (finfo, nulterminated_target);
|
||||
}
|
||||
else
|
||||
{
|
||||
g_assert (S_ISREG (state->mode));
|
||||
g_file_info_set_size (finfo, content_size);
|
||||
memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset, content_size, NULL);
|
||||
g_file_info_set_size (finfo, state->content_size);
|
||||
memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset, state->content_size, NULL);
|
||||
}
|
||||
|
||||
if (!ostree_raw_file_to_content_stream (memin, finfo, state->xattrs,
|
||||
|
|
@ -621,6 +625,9 @@ dispatch_open (OstreeRepo *repo,
|
|||
if (!do_content_open_generic (repo, state, cancellable, error))
|
||||
goto out;
|
||||
|
||||
if (!read_varuint64 (state, &state->content_size, error))
|
||||
goto out;
|
||||
|
||||
if (!_ostree_repo_open_trusted_content_bare (repo, state->checksum,
|
||||
state->content_size,
|
||||
&state->barecommitstate,
|
||||
|
|
@ -651,8 +658,6 @@ dispatch_write (OstreeRepo *repo,
|
|||
goto out;
|
||||
if (!read_varuint64 (state, &content_offset, error))
|
||||
goto out;
|
||||
if (!validate_ofs (state, content_offset, content_size, error))
|
||||
goto out;
|
||||
|
||||
if (!state->have_obj)
|
||||
{
|
||||
|
|
@ -695,6 +700,9 @@ dispatch_write (OstreeRepo *repo,
|
|||
}
|
||||
else
|
||||
{
|
||||
if (!validate_ofs (state, content_offset, content_size, error))
|
||||
goto out;
|
||||
|
||||
if (!g_output_stream_write_all (state->content_out,
|
||||
state->payload_data + content_offset,
|
||||
content_size,
|
||||
|
|
@ -745,6 +753,29 @@ dispatch_set_read_source (OstreeRepo *repo,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
dispatch_unset_read_source (OstreeRepo *repo,
|
||||
StaticDeltaExecutionState *state,
|
||||
GCancellable *cancellable,
|
||||
GError **error)
|
||||
{
|
||||
gboolean ret = FALSE;
|
||||
|
||||
if (state->read_source_fd)
|
||||
{
|
||||
(void) close (state->read_source_fd);
|
||||
state->read_source_fd = -1;
|
||||
}
|
||||
|
||||
g_clear_pointer (&state->read_source_object, g_free);
|
||||
|
||||
ret = TRUE;
|
||||
/* out: */
|
||||
if (!ret)
|
||||
g_prefix_error (error, "opcode unset-read-source: ");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
dispatch_close (OstreeRepo *repo,
|
||||
StaticDeltaExecutionState *state,
|
||||
|
|
@ -765,14 +796,10 @@ dispatch_close (OstreeRepo *repo,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (state->read_source_fd)
|
||||
{
|
||||
(void) close (state->read_source_fd);
|
||||
state->read_source_fd = -1;
|
||||
}
|
||||
if (!dispatch_unset_read_source (repo, state, cancellable, error))
|
||||
goto out;
|
||||
|
||||
g_clear_pointer (&state->xattrs, g_variant_unref);
|
||||
g_clear_pointer (&state->read_source_object, g_free);
|
||||
g_clear_object (&state->content_out);
|
||||
|
||||
state->checksum_index++;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,201 @@
|
|||
/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
|
||||
*
|
||||
* Copyright (C) 2015 Colin Walters <walters@verbum.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "ostree-rollsum.h"
|
||||
#include "libgsystem.h"
|
||||
#include "bupsplit.h"
|
||||
|
||||
#define ROLLSUM_BLOB_MAX (8192*4)
|
||||
|
||||
static GHashTable *
|
||||
rollsum_chunks_crc32 (GBytes *bytes)
|
||||
{
|
||||
gsize start = 0;
|
||||
gboolean rollsum_end = FALSE;
|
||||
GHashTable *ret_rollsums = NULL;
|
||||
const guint8 *buf;
|
||||
gsize buflen;
|
||||
gsize remaining;
|
||||
|
||||
ret_rollsums = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_ptr_array_unref);
|
||||
|
||||
buf = g_bytes_get_data (bytes, &buflen);
|
||||
|
||||
remaining = buflen;
|
||||
while (remaining > 0)
|
||||
{
|
||||
int offset, bits;
|
||||
|
||||
if (!rollsum_end)
|
||||
{
|
||||
offset = bupsplit_find_ofs (buf + start, MIN(G_MAXINT32, remaining), &bits);
|
||||
if (offset == 0)
|
||||
{
|
||||
rollsum_end = TRUE;
|
||||
offset = MIN(ROLLSUM_BLOB_MAX, remaining);
|
||||
}
|
||||
else if (offset > ROLLSUM_BLOB_MAX)
|
||||
offset = ROLLSUM_BLOB_MAX;
|
||||
}
|
||||
else
|
||||
offset = MIN(ROLLSUM_BLOB_MAX, remaining);
|
||||
|
||||
/* Use zlib's crc32 */
|
||||
{ guint32 crc = crc32 (0L, NULL, 0);
|
||||
GVariant *val;
|
||||
GPtrArray *matches;
|
||||
|
||||
crc = crc32 (crc, buf, offset);
|
||||
|
||||
val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
|
||||
matches = g_hash_table_lookup (ret_rollsums, GUINT_TO_POINTER (crc));
|
||||
if (!matches)
|
||||
{
|
||||
matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
|
||||
g_hash_table_insert (ret_rollsums, GUINT_TO_POINTER (crc), matches);
|
||||
}
|
||||
g_ptr_array_add (matches, val);
|
||||
}
|
||||
|
||||
start += offset;
|
||||
remaining -= offset;
|
||||
}
|
||||
|
||||
return ret_rollsums;
|
||||
}
|
||||
|
||||
static gint
|
||||
compare_matches (const void *app,
|
||||
const void *bpp)
|
||||
{
|
||||
GVariant **avpp = (GVariant**)app;
|
||||
GVariant *a = *avpp;
|
||||
GVariant **bvpp = (GVariant**)bpp;
|
||||
GVariant *b = *bvpp;
|
||||
guint64 a_start, b_start;
|
||||
|
||||
g_variant_get_child (a, 2, "t", &a_start);
|
||||
g_variant_get_child (b, 2, "t", &b_start);
|
||||
|
||||
g_assert_cmpint (a_start, !=, b_start);
|
||||
|
||||
if (a_start < b_start)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
OstreeRollsumMatches *
|
||||
_ostree_compute_rollsum_matches (GBytes *from,
|
||||
GBytes *to)
|
||||
{
|
||||
OstreeRollsumMatches *ret_rollsum = NULL;
|
||||
gs_unref_hashtable GHashTable *from_rollsum = NULL;
|
||||
gs_unref_hashtable GHashTable *to_rollsum = NULL;
|
||||
gs_unref_ptrarray GPtrArray *matches = NULL;
|
||||
const guint8 *from_buf;
|
||||
gsize from_len;
|
||||
const guint8 *to_buf;
|
||||
gsize to_len;
|
||||
gpointer hkey, hvalue;
|
||||
GHashTableIter hiter;
|
||||
|
||||
ret_rollsum = g_new0 (OstreeRollsumMatches, 1);
|
||||
|
||||
matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
|
||||
|
||||
from_buf = g_bytes_get_data (from, &from_len);
|
||||
to_buf = g_bytes_get_data (to, &to_len);
|
||||
|
||||
from_rollsum = rollsum_chunks_crc32 (from);
|
||||
to_rollsum = rollsum_chunks_crc32 (to);
|
||||
|
||||
g_hash_table_iter_init (&hiter, to_rollsum);
|
||||
while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
|
||||
{
|
||||
GPtrArray *to_chunks = hvalue;
|
||||
GPtrArray *from_chunks;
|
||||
|
||||
from_chunks = g_hash_table_lookup (from_rollsum, hkey);
|
||||
if (from_chunks != NULL)
|
||||
{
|
||||
guint i;
|
||||
|
||||
ret_rollsum->crcmatches++;
|
||||
|
||||
for (i = 0; i < to_chunks->len; i++)
|
||||
{
|
||||
GVariant *to_chunk = to_chunks->pdata[i];
|
||||
guint64 to_start, to_offset;
|
||||
guint32 tocrc;
|
||||
guint j;
|
||||
|
||||
g_variant_get (to_chunk, "(utt)", &tocrc, &to_start, &to_offset);
|
||||
|
||||
for (j = 0; j < from_chunks->len; j++)
|
||||
{
|
||||
GVariant *from_chunk = from_chunks->pdata[j];
|
||||
guint32 fromcrc;
|
||||
guint64 from_start, from_offset;
|
||||
|
||||
g_variant_get (from_chunk, "(utt)", &fromcrc, &from_start, &from_offset);
|
||||
|
||||
g_assert (fromcrc == tocrc);
|
||||
g_assert (to_offset == from_offset);
|
||||
|
||||
/* Rsync uses a cryptographic checksum, but let's be
|
||||
* very conservative here and just memcmp.
|
||||
*/
|
||||
if (memcmp (from_buf + from_start, to_buf + to_start, to_offset) == 0)
|
||||
{
|
||||
GVariant *match = g_variant_new ("(uttt)", fromcrc, to_offset, to_start, from_start);
|
||||
ret_rollsum->bufmatches++;
|
||||
ret_rollsum->match_size += to_offset;
|
||||
g_ptr_array_add (matches, g_variant_ref_sink (match));
|
||||
break; /* Don't need any more matches */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret_rollsum->total += to_chunks->len;
|
||||
}
|
||||
|
||||
g_ptr_array_sort (matches, compare_matches);
|
||||
|
||||
ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
|
||||
ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
|
||||
ret_rollsum->matches = matches; matches = NULL;
|
||||
|
||||
return ret_rollsum;
|
||||
}
|
||||
|
||||
void
|
||||
_ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum)
|
||||
{
|
||||
g_hash_table_unref (rollsum->to_rollsums);
|
||||
g_hash_table_unref (rollsum->from_rollsums);
|
||||
g_ptr_array_unref (rollsum->matches);
|
||||
g_free (rollsum);
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
|
||||
*
|
||||
* Copyright (C) 2015 Colin Walters <walters@verbum.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gio/gio.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
typedef struct {
|
||||
GHashTable *from_rollsums;
|
||||
GHashTable *to_rollsums;
|
||||
guint crcmatches;
|
||||
guint bufmatches;
|
||||
guint total;
|
||||
guint64 match_size;
|
||||
GPtrArray *matches;
|
||||
} OstreeRollsumMatches;
|
||||
|
||||
OstreeRollsumMatches *
|
||||
_ostree_compute_rollsum_matches (GBytes *from,
|
||||
GBytes *to);
|
||||
|
||||
void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
|
|
@ -20,138 +20,51 @@
|
|||
|
||||
#include "config.h"
|
||||
|
||||
#include "ostree-rollsum.h"
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libgsystem.h"
|
||||
|
||||
#include "bupsplit.h"
|
||||
|
||||
#define BLOB_MAX (8192*4)
|
||||
|
||||
static GPtrArray *
|
||||
rollsum_checksums_for_data (GBytes *bytes)
|
||||
{
|
||||
const guint8 *start;
|
||||
gsize len;
|
||||
gboolean rollsum_end = FALSE;
|
||||
GPtrArray *ret = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
|
||||
|
||||
start = g_bytes_get_data (bytes, &len);
|
||||
while (len > 0)
|
||||
{
|
||||
int offset, bits;
|
||||
if (!rollsum_end)
|
||||
{
|
||||
offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits);
|
||||
if (offset == 0)
|
||||
{
|
||||
rollsum_end = TRUE;
|
||||
offset = MIN(BLOB_MAX, len);
|
||||
}
|
||||
else if (offset > BLOB_MAX)
|
||||
offset = BLOB_MAX;
|
||||
}
|
||||
else
|
||||
offset = MIN(BLOB_MAX, len);
|
||||
|
||||
{
|
||||
gs_free char *blobcsum =
|
||||
g_compute_checksum_for_data (G_CHECKSUM_SHA256,
|
||||
start, offset);
|
||||
g_ptr_array_add (ret, g_variant_ref_sink (g_variant_new ("(st)",
|
||||
blobcsum, (guint64)offset)));
|
||||
}
|
||||
start += offset;
|
||||
len -= offset;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
print_rollsums (GPtrArray *rollsums)
|
||||
{
|
||||
guint i;
|
||||
for (i = 0; i < rollsums->len; i++)
|
||||
{
|
||||
GVariant *sum = rollsums->pdata[i];
|
||||
const char *csum;
|
||||
guint64 val;
|
||||
g_variant_get (sum, "(&st)", &csum, &val);
|
||||
g_print ("chunk %s %" G_GUINT64_FORMAT "\n", csum, val);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
GCancellable *cancellable = NULL;
|
||||
GError *local_error = NULL;
|
||||
GError **error = &local_error;
|
||||
gs_unref_object GFile *path = NULL;
|
||||
GBytes *bytes = NULL;
|
||||
GBytes *from_bytes = NULL;
|
||||
GBytes *to_bytes = NULL;
|
||||
const char *from_path;
|
||||
const char *to_path;
|
||||
OstreeRollsumMatches *matches;
|
||||
GMappedFile *mfile;
|
||||
|
||||
g_setenv ("GIO_USE_VFS", "local", TRUE);
|
||||
|
||||
if (argc == 2)
|
||||
{
|
||||
gs_unref_ptrarray GPtrArray *rollsums = NULL;
|
||||
if (argc < 3)
|
||||
exit (1);
|
||||
|
||||
path = g_file_new_for_path (argv[1]);
|
||||
bytes = gs_file_map_readonly (path, cancellable, error);
|
||||
if (!bytes)
|
||||
from_path = argv[1];
|
||||
to_path = argv[2];
|
||||
|
||||
mfile = g_mapped_file_new (from_path, FALSE, error);
|
||||
if (!mfile)
|
||||
goto out;
|
||||
|
||||
rollsums = rollsum_checksums_for_data (bytes);
|
||||
print_rollsums (rollsums);
|
||||
}
|
||||
else if (argc > 2)
|
||||
{
|
||||
guint i;
|
||||
gs_unref_hashtable GHashTable *sums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
|
||||
guint64 input_size = 0;
|
||||
guint64 rollsum_size = 0;
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
guint j;
|
||||
gs_unref_ptrarray GPtrArray *rollsums = NULL;
|
||||
guint64 this_rollsum_size = 0;
|
||||
|
||||
path = g_file_new_for_path (argv[i]);
|
||||
bytes = gs_file_map_readonly (path, cancellable, error);
|
||||
if (!bytes)
|
||||
from_bytes = g_mapped_file_get_bytes (mfile);
|
||||
g_mapped_file_unref (mfile);
|
||||
mfile = g_mapped_file_new (to_path, FALSE, error);
|
||||
if (!mfile)
|
||||
goto out;
|
||||
to_bytes = g_mapped_file_get_bytes (mfile);
|
||||
g_mapped_file_unref (mfile);
|
||||
|
||||
input_size += g_bytes_get_size (bytes);
|
||||
matches = _ostree_compute_rollsum_matches (from_bytes, to_bytes);
|
||||
|
||||
g_print ("input: %s size: %" G_GUINT64_FORMAT "\n", argv[i], g_bytes_get_size (bytes));
|
||||
|
||||
rollsums = rollsum_checksums_for_data (bytes);
|
||||
print_rollsums (rollsums);
|
||||
for (j = 0; j < rollsums->len; j++)
|
||||
{
|
||||
GVariant *sum = rollsums->pdata[j];
|
||||
const char *csum;
|
||||
guint64 ofs;
|
||||
g_variant_get (sum, "(&st)", &csum, &ofs);
|
||||
if (!g_hash_table_contains (sums, csum))
|
||||
{
|
||||
g_hash_table_add (sums, g_strdup (csum));
|
||||
rollsum_size += ofs;
|
||||
}
|
||||
this_rollsum_size += ofs;
|
||||
}
|
||||
g_print ("input: rollsum size: %" G_GUINT64_FORMAT "\n", this_rollsum_size);
|
||||
}
|
||||
g_print ("rollsum total:%u input:%" G_GUINT64_FORMAT " output: %" G_GUINT64_FORMAT " speedup:%f\n",
|
||||
g_hash_table_size (sums), input_size, rollsum_size,
|
||||
(((double)(input_size+1)) / ((double) rollsum_size + 1)));
|
||||
}
|
||||
else
|
||||
{
|
||||
bupsplit_selftest ();
|
||||
}
|
||||
g_printerr ("rollsum crcs=%u bufs=%u total=%u matchsize=%llu\n",
|
||||
matches->crcmatches,
|
||||
matches->bufmatches,
|
||||
matches->total, (unsigned long long)matches->match_size);
|
||||
|
||||
out:
|
||||
g_clear_pointer (&bytes, g_bytes_unref);
|
||||
if (local_error)
|
||||
{
|
||||
g_printerr ("%s\n", local_error->message);
|
||||
|
|
|
|||
Loading…
Reference in New Issue