Add bsdiff support to deltas

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2015-02-23 10:35:17 +01:00 committed by Colin Walters
parent a705d9cf29
commit 3f3bb8e37d
4 changed files with 304 additions and 15 deletions

View File

@ -21,7 +21,9 @@
#include "config.h" #include "config.h"
#include <string.h> #include <string.h>
#include <stdlib.h>
#include <gio/gunixoutputstream.h> #include <gio/gunixoutputstream.h>
#include <gio/gmemoryoutputstream.h>
#include "ostree-core-private.h" #include "ostree-core-private.h"
#include "ostree-repo-private.h" #include "ostree-repo-private.h"
@ -31,6 +33,7 @@
#include "ostree-rollsum.h" #include "ostree-rollsum.h"
#include "otutil.h" #include "otutil.h"
#include "ostree-varint.h" #include "ostree-varint.h"
#include "bsdiff/bsdiff.h"
#define CONTENT_SIZE_SIMILARITY_THRESHOLD_PERCENT (30) #define CONTENT_SIZE_SIMILARITY_THRESHOLD_PERCENT (30)
@ -397,9 +400,16 @@ process_one_object (OstreeRepo *repo,
return ret; return ret;
} }
typedef struct {
char *from_checksum;
GBytes *tmp_from;
GBytes *tmp_to;
} ContentBsdiff;
typedef struct { typedef struct {
char *from_checksum; char *from_checksum;
OstreeRollsumMatches *matches; OstreeRollsumMatches *matches;
GBytes *tmp_from;
GBytes *tmp_to; GBytes *tmp_to;
} ContentRollsum; } ContentRollsum;
@ -408,10 +418,20 @@ content_rollsums_free (ContentRollsum *rollsum)
{ {
g_free (rollsum->from_checksum); g_free (rollsum->from_checksum);
_ostree_rollsum_matches_free (rollsum->matches); _ostree_rollsum_matches_free (rollsum->matches);
g_bytes_unref (rollsum->tmp_from);
g_bytes_unref (rollsum->tmp_to); g_bytes_unref (rollsum->tmp_to);
g_free (rollsum); g_free (rollsum);
} }
static void
content_bsdiffs_free (ContentBsdiff *bsdiff)
{
g_free (bsdiff->from_checksum);
g_bytes_unref (bsdiff->tmp_from);
g_bytes_unref (bsdiff->tmp_to);
g_free (bsdiff);
}
/* Load a content object, uncompressing it to an unlinked tmpfile /* Load a content object, uncompressing it to an unlinked tmpfile
that's mmap()'d and suitable for seeking. that's mmap()'d and suitable for seeking.
*/ */
@ -466,6 +486,47 @@ get_unpacked_unlinked_content (OstreeRepo *repo,
return ret; return ret;
} }
static gboolean
try_content_bsdiff (OstreeRepo *repo,
const char *from,
const char *to,
ContentBsdiff **out_bsdiff,
GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
gs_unref_hashtable GHashTable *from_bsdiff = NULL;
gs_unref_hashtable GHashTable *to_bsdiff = NULL;
gs_unref_bytes GBytes *tmp_from = NULL;
gs_unref_bytes GBytes *tmp_to = NULL;
gs_unref_object GFileInfo *from_finfo = NULL;
gs_unref_object GFileInfo *to_finfo = NULL;
ContentBsdiff *ret_bsdiff = NULL;
*out_bsdiff = NULL;
if (!get_unpacked_unlinked_content (repo, from, &tmp_from, &from_finfo,
cancellable, error))
goto out;
if (!get_unpacked_unlinked_content (repo, to, &tmp_to, &to_finfo,
cancellable, error))
goto out;
/* TODO: make this option configurable. */
if (g_bytes_get_size (tmp_to) + g_bytes_get_size (tmp_from) > (200 * (1 << 20)))
goto out;
ret_bsdiff = g_new0 (ContentBsdiff, 1);
ret_bsdiff->from_checksum = g_strdup (from);
ret_bsdiff->tmp_from = tmp_from; tmp_from = NULL;
ret_bsdiff->tmp_to = tmp_to; tmp_to = NULL;
ret = TRUE;
gs_transfer_out_value (out_bsdiff, &ret_bsdiff);
out:
return ret;
}
static gboolean static gboolean
try_content_rollsum (OstreeRepo *repo, try_content_rollsum (OstreeRepo *repo,
const char *from, const char *from,
@ -525,6 +586,7 @@ try_content_rollsum (OstreeRepo *repo,
ret_rollsum = g_new0 (ContentRollsum, 1); ret_rollsum = g_new0 (ContentRollsum, 1);
ret_rollsum->from_checksum = g_strdup (from); ret_rollsum->from_checksum = g_strdup (from);
ret_rollsum->matches = matches; matches = NULL; ret_rollsum->matches = matches; matches = NULL;
ret_rollsum->tmp_from = tmp_from; tmp_from = NULL;
ret_rollsum->tmp_to = tmp_to; tmp_to = NULL; ret_rollsum->tmp_to = tmp_to; tmp_to = NULL;
ret = TRUE; ret = TRUE;
@ -535,6 +597,27 @@ try_content_rollsum (OstreeRepo *repo,
return ret; return ret;
} }
struct bzdiff_opaque_s
{
GOutputStream *out;
GCancellable *cancellable;
GError **error;
};
static int
bzdiff_write (struct bsdiff_stream* stream, const void* buffer, int size)
{
struct bzdiff_opaque_s *op = stream->opaque;
if (!g_output_stream_write (op->out,
buffer,
size,
op->cancellable,
op->error))
return -1;
return 0;
}
static void static void
append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part, append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part,
const guint8 *buf, const guint8 *buf,
@ -572,7 +655,7 @@ process_one_rollsum (OstreeRepo *repo,
current_part->payload->len > builder->max_chunk_size_bytes) current_part->payload->len > builder->max_chunk_size_bytes)
{ {
*current_part_val = current_part = allocate_part (builder); *current_part_val = current_part = allocate_part (builder);
} }
tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len); tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len);
@ -615,7 +698,7 @@ process_one_rollsum (OstreeRepo *repo,
GVariant *match = matchlist->pdata[i]; GVariant *match = matchlist->pdata[i];
guint32 crc; guint32 crc;
guint64 prefix; guint64 prefix;
g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start); g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start);
prefix = to_start - writing_offset; prefix = to_start - writing_offset;
@ -627,7 +710,7 @@ process_one_rollsum (OstreeRepo *repo,
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE); g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
reading_payload = TRUE; reading_payload = TRUE;
} }
g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len); g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len);
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix); append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix);
writing_offset += prefix; writing_offset += prefix;
@ -651,7 +734,7 @@ process_one_rollsum (OstreeRepo *repo,
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE); g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
reading_payload = TRUE; reading_payload = TRUE;
} }
{ guint64 remainder = tmp_to_len - writing_offset; { guint64 remainder = tmp_to_len - writing_offset;
if (remainder > 0) if (remainder > 0)
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder); append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder);
@ -671,6 +754,100 @@ process_one_rollsum (OstreeRepo *repo,
return ret; return ret;
} }
static gboolean
process_one_bsdiff (OstreeRepo *repo,
OstreeStaticDeltaBuilder *builder,
OstreeStaticDeltaPartBuilder **current_part_val,
const char *to_checksum,
ContentBsdiff *bsdiff_content,
GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
guint64 content_size;
gs_unref_object GInputStream *content_stream = NULL;
gs_unref_object GFileInfo *content_finfo = NULL;
gs_unref_variant GVariant *content_xattrs = NULL;
OstreeStaticDeltaPartBuilder *current_part = *current_part_val;
const guint8 *tmp_to_buf;
gsize tmp_to_len;
const guint8 *tmp_from_buf;
gsize tmp_from_len;
/* Check to see if this delta has gone over maximum size */
if (current_part->objects->len > 0 &&
current_part->payload->len > builder->max_chunk_size_bytes)
{
*current_part_val = current_part = allocate_part (builder);
}
tmp_to_buf = g_bytes_get_data (bsdiff_content->tmp_to, &tmp_to_len);
tmp_from_buf = g_bytes_get_data (bsdiff_content->tmp_from, &tmp_from_len);
if (!ostree_repo_load_file (repo, to_checksum, &content_stream,
&content_finfo, &content_xattrs,
cancellable, error))
goto out;
content_size = g_file_info_get_size (content_finfo);
g_assert_cmpint (tmp_to_len, ==, content_size);
current_part->uncompressed_size += content_size;
g_ptr_array_add (current_part->objects, ostree_object_name_serialize (to_checksum, OSTREE_OBJECT_TYPE_FILE));
{ gsize mode_offset, xattr_offset;
guchar source_csum[32];
write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
&mode_offset, &xattr_offset);
/* Write the origin checksum */
ostree_checksum_inplace_to_bytes (bsdiff_content->from_checksum, source_csum);
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE);
_ostree_write_varuint64 (current_part->operations, current_part->payload->len);
g_string_append_len (current_part->payload, (char*)source_csum, sizeof (source_csum));
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_OPEN);
_ostree_write_varuint64 (current_part->operations, mode_offset);
_ostree_write_varuint64 (current_part->operations, xattr_offset);
_ostree_write_varuint64 (current_part->operations, content_size);
{
struct bsdiff_stream stream;
struct bzdiff_opaque_s op;
const gchar *payload;
gssize payload_size;
gs_unref_object GOutputStream *out = g_memory_output_stream_new_resizable ();
stream.malloc = malloc;
stream.free = free;
stream.write = bzdiff_write;
op.out = out;
op.cancellable = cancellable;
op.error = error;
stream.opaque = &op;
if (bsdiff (tmp_from_buf, tmp_from_len, tmp_to_buf, tmp_to_len, &stream) < 0)
goto out;
payload = g_memory_output_stream_get_data (G_MEMORY_OUTPUT_STREAM (out));
payload_size = g_memory_output_stream_get_data_size (G_MEMORY_OUTPUT_STREAM (out));
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_BSPATCH);
_ostree_write_varuint64 (current_part->operations, current_part->payload->len);
_ostree_write_varuint64 (current_part->operations, payload_size);
g_string_append_len (current_part->payload, payload, payload_size);
}
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
}
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
ret = TRUE;
out:
return ret;
}
static gboolean static gboolean
generate_delta_lowlatency (OstreeRepo *repo, generate_delta_lowlatency (OstreeRepo *repo,
const char *from, const char *from,
@ -695,6 +872,7 @@ generate_delta_lowlatency (OstreeRepo *repo,
gs_unref_hashtable GHashTable *new_reachable_symlink_content = NULL; gs_unref_hashtable GHashTable *new_reachable_symlink_content = NULL;
gs_unref_hashtable GHashTable *modified_regfile_content = NULL; gs_unref_hashtable GHashTable *modified_regfile_content = NULL;
gs_unref_hashtable GHashTable *rollsum_optimized_content_objects = NULL; gs_unref_hashtable GHashTable *rollsum_optimized_content_objects = NULL;
gs_unref_hashtable GHashTable *bsdiff_optimized_content_objects = NULL;
gs_unref_hashtable GHashTable *content_object_to_size = NULL; gs_unref_hashtable GHashTable *content_object_to_size = NULL;
if (from != NULL) if (from != NULL)
@ -787,22 +965,35 @@ generate_delta_lowlatency (OstreeRepo *repo,
g_free, g_free,
(GDestroyNotify) content_rollsums_free); (GDestroyNotify) content_rollsums_free);
bsdiff_optimized_content_objects = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free,
(GDestroyNotify) content_bsdiffs_free);
g_hash_table_iter_init (&hashiter, modified_regfile_content); g_hash_table_iter_init (&hashiter, modified_regfile_content);
while (g_hash_table_iter_next (&hashiter, &key, &value)) while (g_hash_table_iter_next (&hashiter, &key, &value))
{ {
const char *to_checksum = key; const char *to_checksum = key;
const char *from_checksum = value; const char *from_checksum = value;
ContentRollsum *rollsum; ContentRollsum *rollsum;
ContentBsdiff *bsdiff;
if (!try_content_rollsum (repo, from_checksum, to_checksum, if (!try_content_rollsum (repo, from_checksum, to_checksum,
&rollsum, cancellable, error)) &rollsum, cancellable, error))
goto out; goto out;
if (!rollsum) if (rollsum)
continue; {
g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
builder->rollsum_size += rollsum->matches->match_size;
continue;
}
g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum); if (!try_content_bsdiff (repo, from_checksum, to_checksum,
builder->rollsum_size += rollsum->matches->match_size; &bsdiff, cancellable, error))
goto out;
if (bsdiff)
g_hash_table_insert (bsdiff_optimized_content_objects, g_strdup (to_checksum), bsdiff);
} }
g_printerr ("rollsum for %u/%u modified\n", g_printerr ("rollsum for %u/%u modified\n",
@ -836,7 +1027,21 @@ generate_delta_lowlatency (OstreeRepo *repo,
ContentRollsum *rollsum = value; ContentRollsum *rollsum = value;
if (!process_one_rollsum (repo, builder, &current_part, if (!process_one_rollsum (repo, builder, &current_part,
checksum, rollsum, checksum, rollsum,
cancellable, error))
goto out;
}
/* Now do bsdiff'ed objects */
g_hash_table_iter_init (&hashiter, bsdiff_optimized_content_objects);
while (g_hash_table_iter_next (&hashiter, &key, &value))
{
const char *checksum = key;
ContentBsdiff *bsdiff = value;
if (!process_one_bsdiff (repo, builder, &current_part,
checksum, bsdiff,
cancellable, error)) cancellable, error))
goto out; goto out;
} }
@ -851,8 +1056,9 @@ generate_delta_lowlatency (OstreeRepo *repo,
guint64 uncompressed_size; guint64 uncompressed_size;
gboolean fallback = FALSE; gboolean fallback = FALSE;
/* Skip content objects we rollsum'd */ /* Skip content objects we rollsum'd or bsdiff'ed */
if (g_hash_table_contains (rollsum_optimized_content_objects, checksum)) if (g_hash_table_contains (rollsum_optimized_content_objects, checksum) ||
g_hash_table_contains (bsdiff_optimized_content_objects, checksum))
continue; continue;
if (!ostree_repo_load_object_stream (repo, OSTREE_OBJECT_TYPE_FILE, checksum, if (!ostree_repo_load_object_stream (repo, OSTREE_OBJECT_TYPE_FILE, checksum,
@ -872,14 +1078,15 @@ generate_delta_lowlatency (OstreeRepo *repo,
} }
} }
/* Now non-rollsummed regular file content */ /* Now non-rollsummed or bsdiff'ed regular file content */
g_hash_table_iter_init (&hashiter, new_reachable_regfile_content); g_hash_table_iter_init (&hashiter, new_reachable_regfile_content);
while (g_hash_table_iter_next (&hashiter, &key, &value)) while (g_hash_table_iter_next (&hashiter, &key, &value))
{ {
const char *checksum = key; const char *checksum = key;
/* Skip content objects we rollsum'd */ /* Skip content objects we rollsum'd */
if (g_hash_table_contains (rollsum_optimized_content_objects, checksum)) if (g_hash_table_contains (rollsum_optimized_content_objects, checksum) ||
g_hash_table_contains (bsdiff_optimized_content_objects, checksum))
continue; continue;
if (!process_one_object (repo, builder, &current_part, if (!process_one_object (repo, builder, &current_part,

View File

@ -137,7 +137,8 @@ typedef enum {
OSTREE_STATIC_DELTA_OP_WRITE = 'w', OSTREE_STATIC_DELTA_OP_WRITE = 'w',
OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r', OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r',
OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R', OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R',
OSTREE_STATIC_DELTA_OP_CLOSE = 'c' OSTREE_STATIC_DELTA_OP_CLOSE = 'c',
OSTREE_STATIC_DELTA_OP_BSPATCH = 'B'
} OstreeStaticDeltaOpCode; } OstreeStaticDeltaOpCode;
gboolean gboolean

View File

@ -33,6 +33,7 @@
#include "ostree-lzma-decompressor.h" #include "ostree-lzma-decompressor.h"
#include "otutil.h" #include "otutil.h"
#include "ostree-varint.h" #include "ostree-varint.h"
#include "bsdiff/bspatch.h"
/* This should really always be true, but hey, let's just assert it */ /* This should really always be true, but hey, let's just assert it */
G_STATIC_ASSERT (sizeof (guint) >= sizeof (guint32)); G_STATIC_ASSERT (sizeof (guint) >= sizeof (guint32));
@ -100,6 +101,7 @@ OPPROTO(write)
OPPROTO(set_read_source) OPPROTO(set_read_source)
OPPROTO(unset_read_source) OPPROTO(unset_read_source)
OPPROTO(close) OPPROTO(close)
OPPROTO(bspatch)
#undef OPPROTO #undef OPPROTO
static gboolean static gboolean
@ -259,6 +261,10 @@ _ostree_static_delta_part_execute_raw (OstreeRepo *repo,
if (!dispatch_close (repo, state, cancellable, error)) if (!dispatch_close (repo, state, cancellable, error))
goto out; goto out;
break; break;
case OSTREE_STATIC_DELTA_OP_BSPATCH:
if (!dispatch_bspatch (repo, state, cancellable, error))
goto out;
break;
default: default:
g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT, g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
"Unknown opcode %u at offset %u", opcode, n_executed); "Unknown opcode %u at offset %u", opcode, n_executed);
@ -486,6 +492,82 @@ do_content_open_generic (OstreeRepo *repo,
return ret; return ret;
} }
struct bzpatch_opaque_s
{
StaticDeltaExecutionState *state;
guint64 offset, length;
};
static int
bspatch_read (const struct bspatch_stream* stream, void* buffer, int length)
{
struct bzpatch_opaque_s *opaque = stream->opaque;
g_assert (length <= opaque->length);
g_assert (opaque->offset + length <= opaque->state->payload_size);
memcpy (buffer, opaque->state->payload_data + opaque->offset, length);
opaque->offset += length;
opaque->length -= length;
return 0;
}
static gboolean
dispatch_bspatch (OstreeRepo *repo,
StaticDeltaExecutionState *state,
GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
guint64 offset, length;
gs_unref_object GInputStream *in_stream = NULL;
gs_unref_object GOutputStream *out_mem_stream = NULL;
gs_free guchar *buf = NULL;
struct bspatch_stream stream;
struct bzpatch_opaque_s opaque;
gsize bytes_written;
if (!read_varuint64 (state, &offset, error))
goto out;
if (!read_varuint64 (state, &length, error))
goto out;
buf = g_malloc0 (state->content_size);
in_stream = g_unix_input_stream_new (state->read_source_fd, FALSE);
out_mem_stream = g_memory_output_stream_new_resizable ();
if (!g_output_stream_splice (out_mem_stream, in_stream, G_OUTPUT_STREAM_SPLICE_NONE,
cancellable, error) < 0)
goto out;
opaque.state = state;
opaque.offset = offset;
opaque.length = length;
stream.read = bspatch_read;
stream.opaque = &opaque;
if (bspatch (g_memory_output_stream_get_data (G_MEMORY_OUTPUT_STREAM (out_mem_stream)),
g_memory_output_stream_get_data_size (G_MEMORY_OUTPUT_STREAM (out_mem_stream)),
buf,
state->content_size,
&stream) < 0)
goto out;
if (!g_output_stream_write_all (state->content_out,
buf,
state->content_size,
&bytes_written,
cancellable, error))
goto out;
g_assert (bytes_written == state->content_size);
ret = TRUE;
out:
return ret;
}
static gboolean static gboolean
dispatch_open_splice_and_close (OstreeRepo *repo, dispatch_open_splice_and_close (OstreeRepo *repo,
StaticDeltaExecutionState *state, StaticDeltaExecutionState *state,

View File

@ -41,4 +41,3 @@ _ostree_compute_rollsum_matches (GBytes *from,
void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum); void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum);
G_END_DECLS G_END_DECLS