diff --git a/Makefile-libostree.am b/Makefile-libostree.am index 9f596420..f77a36a7 100644 --- a/Makefile-libostree.am +++ b/Makefile-libostree.am @@ -51,6 +51,8 @@ libostree_1_la_SOURCES = \ src/libostree/ostree-lzma-decompressor.h \ src/libostree/bupsplit.h \ src/libostree/bupsplit.c \ + src/libostree/ostree-rollsum.h \ + src/libostree/ostree-rollsum.c \ src/libostree/ostree-varint.h \ src/libostree/ostree-varint.c \ src/libostree/ostree-linuxfsutil.h \ diff --git a/Makefile-tests.am b/Makefile-tests.am index 8571fa1c..42d1be7d 100644 --- a/Makefile-tests.am +++ b/Makefile-tests.am @@ -87,12 +87,6 @@ INSTALL_DATA_HOOKS += install-gpg-data-hook echo 'Output=TAP' >> $@.tmp; \ mv $@.tmp $@) -%.test: tests/%.js Makefile - $(AM_V_GEN) (echo '[Test]' > $@.tmp; \ - echo 'Exec=env TESTDATADIR=$(pkglibexecdir)/installed-tests $(pkglibexecdir)/installed-tests/$(notdir $<)' >> $@.tmp; \ - echo 'Type=session' >> $@.tmp; \ - mv $@.tmp $@) - if BUILDOPT_GJS insttest_SCRIPTS += tests/test-core.js \ tests/test-sizes.js \ @@ -109,7 +103,7 @@ check_PROGRAMS = tests/test-rollsum tests/test-varint tests/test-ot-unix-utils tests_test_ot_unix_utils_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS) tests_test_ot_unix_utils_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS) -tests_test_rollsum_SOURCES = src/libostree/bupsplit.c tests/test-rollsum.c +tests_test_rollsum_SOURCES = src/libostree/bupsplit.c src/libostree/ostree-rollsum.c tests/test-rollsum.c tests_test_rollsum_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS) tests_test_rollsum_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS) diff --git a/src/libostree/ostree-repo-static-delta-compilation.c b/src/libostree/ostree-repo-static-delta-compilation.c index ad995d0e..594b432f 100644 --- a/src/libostree/ostree-repo-static-delta-compilation.c +++ b/src/libostree/ostree-repo-static-delta-compilation.c @@ -21,18 +21,16 @@ #include "config.h" #include -#include +#include #include "ostree-core-private.h" #include "ostree-repo-private.h" #include "ostree-lzma-compressor.h" #include "ostree-repo-static-delta-private.h" #include "ostree-diff.h" +#include "ostree-rollsum.h" #include "otutil.h" #include "ostree-varint.h" -#include "bupsplit.h" - -#define ROLLSUM_BLOB_MAX (8192*4) typedef struct { guint64 uncompressed_size; @@ -260,6 +258,36 @@ splice_stream_to_payload (OstreeStaticDeltaPartBuilder *current_part, return ret; } +static void +write_content_mode_xattrs (OstreeRepo *repo, + OstreeStaticDeltaPartBuilder *current_part, + GFileInfo *content_finfo, + GVariant *content_xattrs, + gsize *out_mode_offset, + gsize *out_xattr_offset) +{ + guint32 uid = + g_file_info_get_attribute_uint32 (content_finfo, "unix::uid"); + guint32 gid = + g_file_info_get_attribute_uint32 (content_finfo, "unix::gid"); + guint32 mode = + g_file_info_get_attribute_uint32 (content_finfo, "unix::mode"); + gs_unref_variant GVariant *modev + = g_variant_ref_sink (g_variant_new ("(uuu)", + GUINT32_TO_BE (uid), + GUINT32_TO_BE (gid), + GUINT32_TO_BE (mode))); + + *out_mode_offset = write_unique_variant_chunk (current_part, + current_part->mode_set, + current_part->modes, + modev); + *out_xattr_offset = write_unique_variant_chunk (current_part, + current_part->xattr_set, + current_part->xattrs, + content_xattrs); +} + static gboolean process_one_object (OstreeRepo *repo, OstreeStaticDeltaBuilder *builder, @@ -327,26 +355,12 @@ process_one_object (OstreeRepo *repo, else { gsize mode_offset, xattr_offset, content_offset; - guint32 uid = - g_file_info_get_attribute_uint32 (content_finfo, "unix::uid"); - guint32 gid = - g_file_info_get_attribute_uint32 (content_finfo, "unix::gid"); - guint32 mode = - g_file_info_get_attribute_uint32 (content_finfo, "unix::mode"); - gs_unref_variant GVariant *modev - = g_variant_ref_sink (g_variant_new ("(uuu)", - GUINT32_TO_BE (uid), - GUINT32_TO_BE (gid), - GUINT32_TO_BE (mode))); + guint32 mode; - mode_offset = write_unique_variant_chunk (current_part, - current_part->mode_set, - current_part->modes, - modev); - xattr_offset = write_unique_variant_chunk (current_part, - current_part->xattr_set, - current_part->xattrs, - content_xattrs); + mode = g_file_info_get_attribute_uint32 (content_finfo, "unix::mode"); + + write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs, + &mode_offset, &xattr_offset); if (S_ISLNK (mode)) { @@ -381,106 +395,75 @@ process_one_object (OstreeRepo *repo, return ret; } -typedef struct { - GPtrArray *keys; - GHashTable *values; -} OrderedRollsums; - -static void -ordered_rollsums_free (OrderedRollsums *ohash) -{ - g_ptr_array_unref (ohash->keys); - g_hash_table_unref (ohash->values); - g_free (ohash); -} - -static gboolean -rollsum_chunks_crc32 (GInputStream *istream, - OrderedRollsums **out_rollsums, - GCancellable *cancellable, - GError **error) -{ - gboolean ret = FALSE; - gsize start = 0; - gboolean rollsum_end = FALSE; - OrderedRollsums *ret_rollsums = g_new0 (OrderedRollsums, 1); - gs_unref_object GBufferedInputStream *bufinput = - (GBufferedInputStream*) g_buffered_input_stream_new_sized (istream, ROLLSUM_BLOB_MAX); - - ret_rollsums->keys = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref); - ret_rollsums->values = g_hash_table_new (NULL, NULL); - - while (TRUE) - { - gssize bytes_read; - const guint8 *buf; - gsize bufsize; - int offset, bits; - - bytes_read = g_buffered_input_stream_fill (bufinput, -1, cancellable, error); - if (bytes_read == -1) - goto out; - if (bytes_read == 0) - break; - - buf = g_buffered_input_stream_peek_buffer (bufinput, &bufsize); - - if (!rollsum_end) - { - offset = bupsplit_find_ofs (buf, MIN(G_MAXINT32, bufsize), &bits); - if (offset == 0) - { - rollsum_end = TRUE; - offset = MIN(ROLLSUM_BLOB_MAX, bufsize); - } - else if (offset > ROLLSUM_BLOB_MAX) - offset = ROLLSUM_BLOB_MAX; - } - else - offset = MIN(ROLLSUM_BLOB_MAX, bufsize); - - if (!g_input_stream_skip ((GInputStream*)bufinput, bufsize, cancellable, error)) - goto out; - - /* Use zlib's crc32 */ - { guint32 crc = crc32 (0L, NULL, 0); - GVariant *val; - - crc = crc32 (crc, buf, offset); - - val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset)); - g_ptr_array_add (ret_rollsums->keys, val); - g_hash_table_insert (ret_rollsums->values, GUINT_TO_POINTER (crc), val); - } - - start += offset; - } - - ret = TRUE; - gs_transfer_out_value (out_rollsums, &ret_rollsums); - out: - if (ret_rollsums) - ordered_rollsums_free (ret_rollsums); - return ret; -} - typedef struct { char *from_checksum; - OrderedRollsums *from_rollsums; - OrderedRollsums *to_rollsums; - guint match_ratio; - guint64 match_size; + OstreeRollsumMatches *matches; + GBytes *tmp_to; } ContentRollsum; static void content_rollsums_free (ContentRollsum *rollsum) { g_free (rollsum->from_checksum); - ordered_rollsums_free (rollsum->from_rollsums); - ordered_rollsums_free (rollsum->to_rollsums); + _ostree_rollsum_matches_free (rollsum->matches); + g_bytes_unref (rollsum->tmp_to); g_free (rollsum); } +/* Load a content object, uncompressing it to an unlinked tmpfile + that's mmap()'d and suitable for seeking. + */ +static gboolean +get_unpacked_unlinked_content (OstreeRepo *repo, + const char *checksum, + GBytes **out_content, + GFileInfo **out_finfo, + GCancellable *cancellable, + GError **error) +{ + gboolean ret = FALSE; + gs_free char *tmpname = g_strdup ("tmpostree-deltaobj-XXXXXX"); + gs_fd_close int fd = -1; + gs_unref_bytes GBytes *ret_content = NULL; + gs_unref_object GInputStream *istream = NULL; + gs_unref_object GFileInfo *ret_finfo = NULL; + gs_unref_object GOutputStream *out = NULL; + + fd = g_mkstemp (tmpname); + if (fd == -1) + { + gs_set_error_from_errno (error, errno); + goto out; + } + /* Doesn't need a name */ + (void) unlink (tmpname); + + if (!ostree_repo_load_file (repo, checksum, &istream, &ret_finfo, NULL, + cancellable, error)) + goto out; + + if (g_file_info_get_file_type (ret_finfo) != G_FILE_TYPE_REGULAR) + { + ret = TRUE; + goto out; + } + + out = g_unix_output_stream_new (fd, FALSE); + if (g_output_stream_splice (out, istream, G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET, + cancellable, error) < 0) + goto out; + + { GMappedFile *mfile = g_mapped_file_new_from_fd (fd, FALSE, error); + ret_content = g_mapped_file_get_bytes (mfile); + g_mapped_file_unref (mfile); + } + + ret = TRUE; + gs_transfer_out_value (out_content, &ret_content); + out: + return ret; +} + static gboolean try_content_rollsum (OstreeRepo *repo, const char *from, @@ -490,85 +473,203 @@ try_content_rollsum (OstreeRepo *repo, GError **error) { gboolean ret = FALSE; - OrderedRollsums *from_rollsum = NULL; - OrderedRollsums *to_rollsum = NULL; - gs_unref_object GInputStream *from_istream = NULL; + gs_unref_hashtable GHashTable *from_rollsum = NULL; + gs_unref_hashtable GHashTable *to_rollsum = NULL; + gs_unref_bytes GBytes *tmp_from = NULL; + gs_unref_bytes GBytes *tmp_to = NULL; gs_unref_object GFileInfo *from_finfo = NULL; - gs_unref_object GInputStream *to_istream = NULL; gs_unref_object GFileInfo *to_finfo = NULL; + OstreeRollsumMatches *matches; ContentRollsum *ret_rollsum = NULL; - guint total = 0; - guint matches = 0; - guint match_ratio = 0; - guint64 match_size = 0; - gpointer hkey, hvalue; - GHashTableIter hiter; *out_rollsum = NULL; - if (!ostree_repo_load_file (repo, from, &from_istream, &from_finfo, NULL, - cancellable, error)) + /* Load the content objects, splice them to uncompressed temporary files that + * we can just mmap() and seek around in conveniently. + */ + if (!get_unpacked_unlinked_content (repo, from, &tmp_from, &from_finfo, + cancellable, error)) goto out; - if (!ostree_repo_load_file (repo, to, &to_istream, &to_finfo, NULL, - cancellable, error)) + if (!get_unpacked_unlinked_content (repo, to, &tmp_to, &to_finfo, + cancellable, error)) goto out; /* Only try to rollsum regular files obviously */ - if (!(g_file_info_get_file_type (from_finfo) == G_FILE_TYPE_REGULAR - && g_file_info_get_file_type (to_finfo) == G_FILE_TYPE_REGULAR)) + if (!(tmp_from && tmp_to)) { ret = TRUE; goto out; } - g_assert (from_istream && to_istream); + matches = _ostree_compute_rollsum_matches (tmp_from, tmp_to); - if (!rollsum_chunks_crc32 (from_istream, &from_rollsum, cancellable, error)) - goto out; - if (!rollsum_chunks_crc32 (to_istream, &to_rollsum, cancellable, error)) - goto out; + { guint match_ratio = (matches->bufmatches*100)/matches->total; - g_clear_object (&from_istream); - g_clear_object (&to_istream); + /* Only proceed if the file contains (arbitrary) more than 25% of + * the previous chunks. + */ + if (match_ratio < 25) + { + ret = TRUE; + goto out; + } + } - g_hash_table_iter_init (&hiter, to_rollsum->values); - while (g_hash_table_iter_next (&hiter, &hkey, &hvalue)) - { - GVariant *chunk = hvalue; - if (g_hash_table_contains (from_rollsum->values, hkey)) - { - guint64 offset; - g_variant_get (chunk, "(utt)", NULL, NULL, &offset); - matches++; - match_size += offset; - } - total++; - } - - match_ratio = (matches*100)/total; - - /* Only proceed if the file contains (arbitrary) more than 25% of - * the previous chunks. - */ - if (match_ratio < 25) - { - ret = TRUE; - goto out; - } + g_printerr ("rollsum for %s; crcs=%u bufs=%u total=%u matchsize=%llu\n", + to, matches->crcmatches, + matches->bufmatches, + matches->total, (unsigned long long)matches->match_size); ret_rollsum = g_new0 (ContentRollsum, 1); - ret_rollsum->match_ratio = match_ratio; - ret_rollsum->match_size = match_size; ret_rollsum->from_checksum = g_strdup (from); - ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL; - ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL; + ret_rollsum->matches = matches; matches = NULL; + ret_rollsum->tmp_to = tmp_to; tmp_to = NULL; ret = TRUE; gs_transfer_out_value (out_rollsum, &ret_rollsum); out: + if (matches) + _ostree_rollsum_matches_free (matches); return ret; } +static void +append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part, + const guint8 *buf, + guint64 offset) +{ + guint64 payload_start; + + payload_start = current_part->payload->len; + g_string_append_len (current_part->payload, (char*)buf, offset); + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE); + _ostree_write_varuint64 (current_part->operations, offset); + _ostree_write_varuint64 (current_part->operations, payload_start); +} + +static gboolean +process_one_rollsum (OstreeRepo *repo, + OstreeStaticDeltaBuilder *builder, + OstreeStaticDeltaPartBuilder **current_part_val, + const char *to_checksum, + ContentRollsum *rollsum, + GCancellable *cancellable, + GError **error) +{ + gboolean ret = FALSE; + guint64 content_size; + gs_unref_object GInputStream *content_stream = NULL; + gs_unref_object GFileInfo *content_finfo = NULL; + gs_unref_variant GVariant *content_xattrs = NULL; + OstreeStaticDeltaPartBuilder *current_part = *current_part_val; + const guint8 *tmp_to_buf; + gsize tmp_to_len; + + /* Check to see if this delta has gone over maximum size */ + if (current_part->objects->len > 0 && + current_part->payload->len > builder->max_chunk_size_bytes) + { + *current_part_val = current_part = allocate_part (builder); + } + + tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len); + + if (!ostree_repo_load_file (repo, to_checksum, &content_stream, + &content_finfo, &content_xattrs, + cancellable, error)) + goto out; + content_size = g_file_info_get_size (content_finfo); + g_assert_cmpint (tmp_to_len, ==, content_size); + + current_part->uncompressed_size += content_size; + + g_ptr_array_add (current_part->objects, ostree_object_name_serialize (to_checksum, OSTREE_OBJECT_TYPE_FILE)); + + { gsize mode_offset, xattr_offset, from_csum_offset; + gboolean reading_payload = TRUE; + guchar source_csum[32]; + guint i; + + write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs, + &mode_offset, &xattr_offset); + + /* Write the origin checksum */ + ostree_checksum_inplace_to_bytes (rollsum->from_checksum, source_csum); + from_csum_offset = current_part->payload->len; + g_string_append_len (current_part->payload, (char*)source_csum, sizeof (source_csum)); + + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_OPEN); + _ostree_write_varuint64 (current_part->operations, mode_offset); + _ostree_write_varuint64 (current_part->operations, xattr_offset); + _ostree_write_varuint64 (current_part->operations, content_size); + + { guint64 writing_offset = 0; + guint64 offset = 0, to_start = 0, from_start = 0; + GPtrArray *matchlist = rollsum->matches->matches; + + g_assert (matchlist->len > 0); + for (i = 0; i < matchlist->len; i++) + { + GVariant *match = matchlist->pdata[i]; + guint32 crc; + guint64 prefix; + + g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start); + + prefix = to_start - writing_offset; + + if (prefix > 0) + { + if (!reading_payload) + { + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE); + reading_payload = TRUE; + } + + g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len); + append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix); + writing_offset += prefix; + } + + if (reading_payload) + { + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE); + _ostree_write_varuint64 (current_part->operations, from_csum_offset); + reading_payload = FALSE; + } + + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE); + _ostree_write_varuint64 (current_part->operations, offset); + _ostree_write_varuint64 (current_part->operations, from_start); + writing_offset += offset; + } + + if (!reading_payload) + { + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE); + reading_payload = TRUE; + } + + { guint64 remainder = tmp_to_len - writing_offset; + if (remainder > 0) + append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder); + writing_offset += remainder; + g_assert_cmpint (writing_offset, ==, tmp_to_len); + } + + g_assert_cmpint (writing_offset, ==, content_size); + } + + + g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE); + } + + ret = TRUE; + out: + return ret; +} + + static gboolean generate_delta_lowlatency (OstreeRepo *repo, const char *from, @@ -694,46 +795,13 @@ generate_delta_lowlatency (OstreeRepo *repo, continue; g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum); - builder->rollsum_size += rollsum->match_size; + builder->rollsum_size += rollsum->matches->match_size; } g_printerr ("rollsum for %u/%u modified\n", g_hash_table_size (rollsum_optimized_content_objects), g_hash_table_size (modified_content_objects)); - /* Scan for large objects, so we can fall back to plain HTTP-based - * fetch. In the future this should come after an rsync-style - * rolling delta check for modified files. - */ - g_hash_table_iter_init (&hashiter, new_reachable_content); - while (g_hash_table_iter_next (&hashiter, &key, &value)) - { - GVariant *serialized_key = key; - const char *checksum; - OstreeObjectType objtype; - guint64 uncompressed_size; - gboolean fallback = FALSE; - - ostree_object_name_deserialize (serialized_key, &checksum, &objtype); - - if (!ostree_repo_load_object_stream (repo, objtype, checksum, - NULL, &uncompressed_size, - cancellable, error)) - goto out; - if (uncompressed_size > builder->min_fallback_size_bytes) - fallback = TRUE; - - if (fallback) - { - gs_free char *size = g_format_size (uncompressed_size); - g_printerr ("fallback for %s (%s)\n", - ostree_object_to_string (checksum, objtype), size); - g_ptr_array_add (builder->fallback_objects, - g_variant_ref (serialized_key)); - g_hash_table_iter_remove (&hashiter); - } - } - current_part = allocate_part (builder); /* Pack the metadata first */ @@ -752,7 +820,57 @@ generate_delta_lowlatency (OstreeRepo *repo, goto out; } - /* Now content */ + /* Now do rollsummed objects */ + + g_hash_table_iter_init (&hashiter, rollsum_optimized_content_objects); + while (g_hash_table_iter_next (&hashiter, &key, &value)) + { + const char *checksum = key; + ContentRollsum *rollsum = value; + + if (!process_one_rollsum (repo, builder, ¤t_part, + checksum, rollsum, + cancellable, error)) + goto out; + } + + /* Scan for large objects, so we can fall back to plain HTTP-based + * fetch. + */ + g_hash_table_iter_init (&hashiter, new_reachable_content); + while (g_hash_table_iter_next (&hashiter, &key, &value)) + { + GVariant *serialized_key = key; + const char *checksum; + OstreeObjectType objtype; + guint64 uncompressed_size; + gboolean fallback = FALSE; + + ostree_object_name_deserialize (serialized_key, &checksum, &objtype); + + /* Skip content objects we rollsum'd */ + if (g_hash_table_contains (rollsum_optimized_content_objects, checksum)) + continue; + + if (!ostree_repo_load_object_stream (repo, objtype, checksum, + NULL, &uncompressed_size, + cancellable, error)) + goto out; + if (uncompressed_size > builder->min_fallback_size_bytes) + fallback = TRUE; + + if (fallback) + { + gs_free char *size = g_format_size (uncompressed_size); + g_printerr ("fallback for %s (%s)\n", + ostree_object_to_string (checksum, objtype), size); + g_ptr_array_add (builder->fallback_objects, + g_variant_ref (serialized_key)); + g_hash_table_iter_remove (&hashiter); + } + } + + /* Now non-rollsummed content */ g_hash_table_iter_init (&hashiter, new_reachable_content); while (g_hash_table_iter_next (&hashiter, &key, &value)) { @@ -762,6 +880,10 @@ generate_delta_lowlatency (OstreeRepo *repo, ostree_object_name_deserialize (serialized_key, &checksum, &objtype); + /* Skip content objects we rollsum'd */ + if (g_hash_table_contains (rollsum_optimized_content_objects, checksum)) + continue; + if (!process_one_object (repo, builder, ¤t_part, checksum, objtype, cancellable, error)) diff --git a/src/libostree/ostree-repo-static-delta-private.h b/src/libostree/ostree-repo-static-delta-private.h index 09d4898a..2478f167 100644 --- a/src/libostree/ostree-repo-static-delta-private.h +++ b/src/libostree/ostree-repo-static-delta-private.h @@ -135,7 +135,8 @@ typedef enum { OSTREE_STATIC_DELTA_OP_OPEN_SPLICE_AND_CLOSE = 'S', OSTREE_STATIC_DELTA_OP_OPEN = 'o', OSTREE_STATIC_DELTA_OP_WRITE = 'w', - OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'R', + OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r', + OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R', OSTREE_STATIC_DELTA_OP_CLOSE = 'c' } OstreeStaticDeltaOpCode; diff --git a/src/libostree/ostree-repo-static-delta-processing.c b/src/libostree/ostree-repo-static-delta-processing.c index 1cb66de4..b268fb2c 100644 --- a/src/libostree/ostree-repo-static-delta-processing.c +++ b/src/libostree/ostree-repo-static-delta-processing.c @@ -98,6 +98,7 @@ OPPROTO(open_splice_and_close) OPPROTO(open) OPPROTO(write) OPPROTO(set_read_source) +OPPROTO(unset_read_source) OPPROTO(close) #undef OPPROTO @@ -250,6 +251,10 @@ _ostree_static_delta_part_execute_raw (OstreeRepo *repo, if (!dispatch_set_read_source (repo, state, cancellable, error)) goto out; break; + case OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE: + if (!dispatch_unset_read_source (repo, state, cancellable, error)) + goto out; + break; case OSTREE_STATIC_DELTA_OP_CLOSE: if (!dispatch_close (repo, state, cancellable, error)) goto out; @@ -491,7 +496,7 @@ dispatch_open_splice_and_close (OstreeRepo *repo, if (!open_output_target (state, cancellable, error)) goto out; - + if (OSTREE_OBJECT_TYPE_IS_META (state->output_objtype)) { gs_unref_variant GVariant *metadata = NULL; @@ -519,7 +524,6 @@ dispatch_open_splice_and_close (OstreeRepo *repo, { guint64 content_offset; guint64 objlen; - guint64 content_size; gsize bytes_written; gs_unref_object GInputStream *object_input = NULL; gs_unref_object GInputStream *memin = NULL; @@ -527,11 +531,11 @@ dispatch_open_splice_and_close (OstreeRepo *repo, if (!do_content_open_generic (repo, state, cancellable, error)) goto out; - if (!read_varuint64 (state, &content_size, error)) + if (!read_varuint64 (state, &state->content_size, error)) goto out; if (!read_varuint64 (state, &content_offset, error)) goto out; - if (!validate_ofs (state, content_offset, content_size, error)) + if (!validate_ofs (state, content_offset, state->content_size, error)) goto out; /* Fast path for regular files to bare repositories */ @@ -551,7 +555,7 @@ dispatch_open_splice_and_close (OstreeRepo *repo, { if (!g_output_stream_write_all (state->content_out, state->payload_data + content_offset, - content_size, + state->content_size, &bytes_written, cancellable, error)) goto out; @@ -567,14 +571,14 @@ dispatch_open_splice_and_close (OstreeRepo *repo, if (S_ISLNK (state->mode)) { gs_free char *nulterminated_target = - g_strndup ((char*)state->payload_data + content_offset, content_size); + g_strndup ((char*)state->payload_data + content_offset, state->content_size); g_file_info_set_symlink_target (finfo, nulterminated_target); } else { g_assert (S_ISREG (state->mode)); - g_file_info_set_size (finfo, content_size); - memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset, content_size, NULL); + g_file_info_set_size (finfo, state->content_size); + memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset, state->content_size, NULL); } if (!ostree_raw_file_to_content_stream (memin, finfo, state->xattrs, @@ -621,6 +625,9 @@ dispatch_open (OstreeRepo *repo, if (!do_content_open_generic (repo, state, cancellable, error)) goto out; + if (!read_varuint64 (state, &state->content_size, error)) + goto out; + if (!_ostree_repo_open_trusted_content_bare (repo, state->checksum, state->content_size, &state->barecommitstate, @@ -651,8 +658,6 @@ dispatch_write (OstreeRepo *repo, goto out; if (!read_varuint64 (state, &content_offset, error)) goto out; - if (!validate_ofs (state, content_offset, content_size, error)) - goto out; if (!state->have_obj) { @@ -695,6 +700,9 @@ dispatch_write (OstreeRepo *repo, } else { + if (!validate_ofs (state, content_offset, content_size, error)) + goto out; + if (!g_output_stream_write_all (state->content_out, state->payload_data + content_offset, content_size, @@ -745,6 +753,29 @@ dispatch_set_read_source (OstreeRepo *repo, return ret; } +static gboolean +dispatch_unset_read_source (OstreeRepo *repo, + StaticDeltaExecutionState *state, + GCancellable *cancellable, + GError **error) +{ + gboolean ret = FALSE; + + if (state->read_source_fd) + { + (void) close (state->read_source_fd); + state->read_source_fd = -1; + } + + g_clear_pointer (&state->read_source_object, g_free); + + ret = TRUE; + /* out: */ + if (!ret) + g_prefix_error (error, "opcode unset-read-source: "); + return ret; +} + static gboolean dispatch_close (OstreeRepo *repo, StaticDeltaExecutionState *state, @@ -765,14 +796,10 @@ dispatch_close (OstreeRepo *repo, goto out; } - if (state->read_source_fd) - { - (void) close (state->read_source_fd); - state->read_source_fd = -1; - } + if (!dispatch_unset_read_source (repo, state, cancellable, error)) + goto out; g_clear_pointer (&state->xattrs, g_variant_unref); - g_clear_pointer (&state->read_source_object, g_free); g_clear_object (&state->content_out); state->checksum_index++; diff --git a/src/libostree/ostree-rollsum.c b/src/libostree/ostree-rollsum.c new file mode 100644 index 00000000..5a57e1c3 --- /dev/null +++ b/src/libostree/ostree-rollsum.c @@ -0,0 +1,201 @@ +/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- + * + * Copyright (C) 2015 Colin Walters + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "config.h" + +#include +#include + +#include "ostree-rollsum.h" +#include "libgsystem.h" +#include "bupsplit.h" + +#define ROLLSUM_BLOB_MAX (8192*4) + +static GHashTable * +rollsum_chunks_crc32 (GBytes *bytes) +{ + gsize start = 0; + gboolean rollsum_end = FALSE; + GHashTable *ret_rollsums = NULL; + const guint8 *buf; + gsize buflen; + gsize remaining; + + ret_rollsums = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_ptr_array_unref); + + buf = g_bytes_get_data (bytes, &buflen); + + remaining = buflen; + while (remaining > 0) + { + int offset, bits; + + if (!rollsum_end) + { + offset = bupsplit_find_ofs (buf + start, MIN(G_MAXINT32, remaining), &bits); + if (offset == 0) + { + rollsum_end = TRUE; + offset = MIN(ROLLSUM_BLOB_MAX, remaining); + } + else if (offset > ROLLSUM_BLOB_MAX) + offset = ROLLSUM_BLOB_MAX; + } + else + offset = MIN(ROLLSUM_BLOB_MAX, remaining); + + /* Use zlib's crc32 */ + { guint32 crc = crc32 (0L, NULL, 0); + GVariant *val; + GPtrArray *matches; + + crc = crc32 (crc, buf, offset); + + val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset)); + matches = g_hash_table_lookup (ret_rollsums, GUINT_TO_POINTER (crc)); + if (!matches) + { + matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref); + g_hash_table_insert (ret_rollsums, GUINT_TO_POINTER (crc), matches); + } + g_ptr_array_add (matches, val); + } + + start += offset; + remaining -= offset; + } + + return ret_rollsums; +} + +static gint +compare_matches (const void *app, + const void *bpp) +{ + GVariant **avpp = (GVariant**)app; + GVariant *a = *avpp; + GVariant **bvpp = (GVariant**)bpp; + GVariant *b = *bvpp; + guint64 a_start, b_start; + + g_variant_get_child (a, 2, "t", &a_start); + g_variant_get_child (b, 2, "t", &b_start); + + g_assert_cmpint (a_start, !=, b_start); + + if (a_start < b_start) + return -1; + return 1; +} + +OstreeRollsumMatches * +_ostree_compute_rollsum_matches (GBytes *from, + GBytes *to) +{ + OstreeRollsumMatches *ret_rollsum = NULL; + gs_unref_hashtable GHashTable *from_rollsum = NULL; + gs_unref_hashtable GHashTable *to_rollsum = NULL; + gs_unref_ptrarray GPtrArray *matches = NULL; + const guint8 *from_buf; + gsize from_len; + const guint8 *to_buf; + gsize to_len; + gpointer hkey, hvalue; + GHashTableIter hiter; + + ret_rollsum = g_new0 (OstreeRollsumMatches, 1); + + matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref); + + from_buf = g_bytes_get_data (from, &from_len); + to_buf = g_bytes_get_data (to, &to_len); + + from_rollsum = rollsum_chunks_crc32 (from); + to_rollsum = rollsum_chunks_crc32 (to); + + g_hash_table_iter_init (&hiter, to_rollsum); + while (g_hash_table_iter_next (&hiter, &hkey, &hvalue)) + { + GPtrArray *to_chunks = hvalue; + GPtrArray *from_chunks; + + from_chunks = g_hash_table_lookup (from_rollsum, hkey); + if (from_chunks != NULL) + { + guint i; + + ret_rollsum->crcmatches++; + + for (i = 0; i < to_chunks->len; i++) + { + GVariant *to_chunk = to_chunks->pdata[i]; + guint64 to_start, to_offset; + guint32 tocrc; + guint j; + + g_variant_get (to_chunk, "(utt)", &tocrc, &to_start, &to_offset); + + for (j = 0; j < from_chunks->len; j++) + { + GVariant *from_chunk = from_chunks->pdata[j]; + guint32 fromcrc; + guint64 from_start, from_offset; + + g_variant_get (from_chunk, "(utt)", &fromcrc, &from_start, &from_offset); + + g_assert (fromcrc == tocrc); + g_assert (to_offset == from_offset); + + /* Rsync uses a cryptographic checksum, but let's be + * very conservative here and just memcmp. + */ + if (memcmp (from_buf + from_start, to_buf + to_start, to_offset) == 0) + { + GVariant *match = g_variant_new ("(uttt)", fromcrc, to_offset, to_start, from_start); + ret_rollsum->bufmatches++; + ret_rollsum->match_size += to_offset; + g_ptr_array_add (matches, g_variant_ref_sink (match)); + break; /* Don't need any more matches */ + } + } + } + } + + ret_rollsum->total += to_chunks->len; + } + + g_ptr_array_sort (matches, compare_matches); + + ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL; + ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL; + ret_rollsum->matches = matches; matches = NULL; + + return ret_rollsum; +} + +void +_ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum) +{ + g_hash_table_unref (rollsum->to_rollsums); + g_hash_table_unref (rollsum->from_rollsums); + g_ptr_array_unref (rollsum->matches); + g_free (rollsum); +} diff --git a/src/libostree/ostree-rollsum.h b/src/libostree/ostree-rollsum.h new file mode 100644 index 00000000..37003d8f --- /dev/null +++ b/src/libostree/ostree-rollsum.h @@ -0,0 +1,44 @@ +/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- + * + * Copyright (C) 2015 Colin Walters + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#pragma once + +#include + +G_BEGIN_DECLS + +typedef struct { + GHashTable *from_rollsums; + GHashTable *to_rollsums; + guint crcmatches; + guint bufmatches; + guint total; + guint64 match_size; + GPtrArray *matches; +} OstreeRollsumMatches; + +OstreeRollsumMatches * +_ostree_compute_rollsum_matches (GBytes *from, + GBytes *to); + +void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum); + +G_END_DECLS + diff --git a/tests/test-rollsum.c b/tests/test-rollsum.c index 1b9174cc..e79c8956 100644 --- a/tests/test-rollsum.c +++ b/tests/test-rollsum.c @@ -20,138 +20,51 @@ #include "config.h" +#include "ostree-rollsum.h" +#include +#include + #include "libgsystem.h" -#include "bupsplit.h" - -#define BLOB_MAX (8192*4) - -static GPtrArray * -rollsum_checksums_for_data (GBytes *bytes) -{ - const guint8 *start; - gsize len; - gboolean rollsum_end = FALSE; - GPtrArray *ret = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref); - - start = g_bytes_get_data (bytes, &len); - while (len > 0) - { - int offset, bits; - if (!rollsum_end) - { - offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits); - if (offset == 0) - { - rollsum_end = TRUE; - offset = MIN(BLOB_MAX, len); - } - else if (offset > BLOB_MAX) - offset = BLOB_MAX; - } - else - offset = MIN(BLOB_MAX, len); - - { - gs_free char *blobcsum = - g_compute_checksum_for_data (G_CHECKSUM_SHA256, - start, offset); - g_ptr_array_add (ret, g_variant_ref_sink (g_variant_new ("(st)", - blobcsum, (guint64)offset))); - } - start += offset; - len -= offset; - } - return ret; -} - -static void -print_rollsums (GPtrArray *rollsums) -{ - guint i; - for (i = 0; i < rollsums->len; i++) - { - GVariant *sum = rollsums->pdata[i]; - const char *csum; - guint64 val; - g_variant_get (sum, "(&st)", &csum, &val); - g_print ("chunk %s %" G_GUINT64_FORMAT "\n", csum, val); - } -} - int main (int argc, char **argv) { - GCancellable *cancellable = NULL; GError *local_error = NULL; GError **error = &local_error; - gs_unref_object GFile *path = NULL; - GBytes *bytes = NULL; + GBytes *from_bytes = NULL; + GBytes *to_bytes = NULL; + const char *from_path; + const char *to_path; + OstreeRollsumMatches *matches; + GMappedFile *mfile; g_setenv ("GIO_USE_VFS", "local", TRUE); - if (argc == 2) - { - gs_unref_ptrarray GPtrArray *rollsums = NULL; + if (argc < 3) + exit (1); - path = g_file_new_for_path (argv[1]); - bytes = gs_file_map_readonly (path, cancellable, error); - if (!bytes) - goto out; + from_path = argv[1]; + to_path = argv[2]; - rollsums = rollsum_checksums_for_data (bytes); - print_rollsums (rollsums); - } - else if (argc > 2) - { - guint i; - gs_unref_hashtable GHashTable *sums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); - guint64 input_size = 0; - guint64 rollsum_size = 0; + mfile = g_mapped_file_new (from_path, FALSE, error); + if (!mfile) + goto out; + from_bytes = g_mapped_file_get_bytes (mfile); + g_mapped_file_unref (mfile); + mfile = g_mapped_file_new (to_path, FALSE, error); + if (!mfile) + goto out; + to_bytes = g_mapped_file_get_bytes (mfile); + g_mapped_file_unref (mfile); - for (i = 1; i < argc; i++) - { - guint j; - gs_unref_ptrarray GPtrArray *rollsums = NULL; - guint64 this_rollsum_size = 0; + matches = _ostree_compute_rollsum_matches (from_bytes, to_bytes); - path = g_file_new_for_path (argv[i]); - bytes = gs_file_map_readonly (path, cancellable, error); - if (!bytes) - goto out; - - input_size += g_bytes_get_size (bytes); - - g_print ("input: %s size: %" G_GUINT64_FORMAT "\n", argv[i], g_bytes_get_size (bytes)); - - rollsums = rollsum_checksums_for_data (bytes); - print_rollsums (rollsums); - for (j = 0; j < rollsums->len; j++) - { - GVariant *sum = rollsums->pdata[j]; - const char *csum; - guint64 ofs; - g_variant_get (sum, "(&st)", &csum, &ofs); - if (!g_hash_table_contains (sums, csum)) - { - g_hash_table_add (sums, g_strdup (csum)); - rollsum_size += ofs; - } - this_rollsum_size += ofs; - } - g_print ("input: rollsum size: %" G_GUINT64_FORMAT "\n", this_rollsum_size); - } - g_print ("rollsum total:%u input:%" G_GUINT64_FORMAT " output: %" G_GUINT64_FORMAT " speedup:%f\n", - g_hash_table_size (sums), input_size, rollsum_size, - (((double)(input_size+1)) / ((double) rollsum_size + 1))); - } - else - { - bupsplit_selftest (); - } + g_printerr ("rollsum crcs=%u bufs=%u total=%u matchsize=%llu\n", + matches->crcmatches, + matches->bufmatches, + matches->total, (unsigned long long)matches->match_size); out: - g_clear_pointer (&bytes, g_bytes_unref); if (local_error) { g_printerr ("%s\n", local_error->message);