deltas: Compute rollsum targets
This commit is contained in:
parent
d749932f6b
commit
3df8be0d92
|
|
@ -49,6 +49,8 @@ libostree_1_la_SOURCES = \
|
||||||
src/libostree/ostree-lzma-compressor.h \
|
src/libostree/ostree-lzma-compressor.h \
|
||||||
src/libostree/ostree-lzma-decompressor.c \
|
src/libostree/ostree-lzma-decompressor.c \
|
||||||
src/libostree/ostree-lzma-decompressor.h \
|
src/libostree/ostree-lzma-decompressor.h \
|
||||||
|
src/libostree/bupsplit.h \
|
||||||
|
src/libostree/bupsplit.c \
|
||||||
src/libostree/ostree-varint.h \
|
src/libostree/ostree-varint.h \
|
||||||
src/libostree/ostree-varint.c \
|
src/libostree/ostree-varint.c \
|
||||||
src/libostree/ostree-linuxfsutil.h \
|
src/libostree/ostree-linuxfsutil.h \
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <zlib.h>
|
||||||
|
|
||||||
#include "ostree-core-private.h"
|
#include "ostree-core-private.h"
|
||||||
#include "ostree-repo-private.h"
|
#include "ostree-repo-private.h"
|
||||||
|
|
@ -379,6 +380,185 @@ process_one_object (OstreeRepo *repo,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
GPtrArray *keys;
|
||||||
|
GHashTable *values;
|
||||||
|
} OrderedRollsums;
|
||||||
|
|
||||||
|
static void
|
||||||
|
ordered_rollsums_free (OrderedRollsums *ohash)
|
||||||
|
{
|
||||||
|
g_ptr_array_unref (ohash->keys);
|
||||||
|
g_hash_table_unref (ohash->values);
|
||||||
|
g_free (ohash);
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
rollsum_chunks_crc32 (GInputStream *istream,
|
||||||
|
OrderedRollsums **out_rollsums,
|
||||||
|
GCancellable *cancellable,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
gboolean ret = FALSE;
|
||||||
|
gsize start = 0;
|
||||||
|
gboolean rollsum_end = FALSE;
|
||||||
|
OrderedRollsums *ret_rollsums = g_new0 (OrderedRollsums, 1);
|
||||||
|
gs_unref_object GBufferedInputStream *bufinput =
|
||||||
|
(GBufferedInputStream*) g_buffered_input_stream_new_sized (istream, ROLLSUM_BLOB_MAX);
|
||||||
|
|
||||||
|
ret_rollsums->keys = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
|
||||||
|
ret_rollsums->values = g_hash_table_new (NULL, NULL);
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
gssize bytes_read;
|
||||||
|
const guint8 *buf;
|
||||||
|
gsize bufsize;
|
||||||
|
int offset, bits;
|
||||||
|
|
||||||
|
bytes_read = g_buffered_input_stream_fill (bufinput, -1, cancellable, error);
|
||||||
|
if (bytes_read == -1)
|
||||||
|
goto out;
|
||||||
|
if (bytes_read == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
buf = g_buffered_input_stream_peek_buffer (bufinput, &bufsize);
|
||||||
|
|
||||||
|
if (!rollsum_end)
|
||||||
|
{
|
||||||
|
offset = bupsplit_find_ofs (buf, MIN(G_MAXINT32, bufsize), &bits);
|
||||||
|
if (offset == 0)
|
||||||
|
{
|
||||||
|
rollsum_end = TRUE;
|
||||||
|
offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
|
||||||
|
}
|
||||||
|
else if (offset > ROLLSUM_BLOB_MAX)
|
||||||
|
offset = ROLLSUM_BLOB_MAX;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
|
||||||
|
|
||||||
|
if (!g_input_stream_skip ((GInputStream*)bufinput, bufsize, cancellable, error))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Use zlib's crc32 */
|
||||||
|
{ guint32 crc = crc32 (0L, NULL, 0);
|
||||||
|
GVariant *val;
|
||||||
|
|
||||||
|
crc = crc32 (crc, buf, offset);
|
||||||
|
|
||||||
|
val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
|
||||||
|
g_ptr_array_add (ret_rollsums->keys, val);
|
||||||
|
g_hash_table_insert (ret_rollsums->values, GUINT_TO_POINTER (crc), val);
|
||||||
|
}
|
||||||
|
|
||||||
|
start += offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = TRUE;
|
||||||
|
gs_transfer_out_value (out_rollsums, &ret_rollsums);
|
||||||
|
out:
|
||||||
|
if (ret_rollsums)
|
||||||
|
ordered_rollsums_free (ret_rollsums);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *from_checksum;
|
||||||
|
OrderedRollsums *from_rollsums;
|
||||||
|
OrderedRollsums *to_rollsums;
|
||||||
|
guint match_ratio;
|
||||||
|
} ContentRollsum;
|
||||||
|
|
||||||
|
static void
|
||||||
|
content_rollsums_free (ContentRollsum *rollsum)
|
||||||
|
{
|
||||||
|
g_free (rollsum->from_checksum);
|
||||||
|
ordered_rollsums_free (rollsum->from_rollsums);
|
||||||
|
ordered_rollsums_free (rollsum->to_rollsums);
|
||||||
|
g_free (rollsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
try_content_rollsum (OstreeRepo *repo,
|
||||||
|
const char *from,
|
||||||
|
const char *to,
|
||||||
|
ContentRollsum **out_rollsum,
|
||||||
|
GCancellable *cancellable,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
gboolean ret = FALSE;
|
||||||
|
OrderedRollsums *from_rollsum = NULL;
|
||||||
|
OrderedRollsums *to_rollsum = NULL;
|
||||||
|
gs_unref_object GInputStream *from_istream = NULL;
|
||||||
|
gs_unref_object GFileInfo *from_finfo = NULL;
|
||||||
|
gs_unref_object GInputStream *to_istream = NULL;
|
||||||
|
gs_unref_object GFileInfo *to_finfo = NULL;
|
||||||
|
ContentRollsum *ret_rollsum = NULL;
|
||||||
|
guint total = 0;
|
||||||
|
guint matches = 0;
|
||||||
|
guint match_ratio = 0;
|
||||||
|
gpointer hkey, hvalue;
|
||||||
|
GHashTableIter hiter;
|
||||||
|
|
||||||
|
*out_rollsum = NULL;
|
||||||
|
|
||||||
|
if (!ostree_repo_load_file (repo, from, &from_istream, &from_finfo, NULL,
|
||||||
|
cancellable, error))
|
||||||
|
goto out;
|
||||||
|
if (!ostree_repo_load_file (repo, to, &to_istream, &to_finfo, NULL,
|
||||||
|
cancellable, error))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Only try to rollsum regular files obviously */
|
||||||
|
if (!(g_file_info_get_file_type (from_finfo) == G_FILE_TYPE_REGULAR
|
||||||
|
&& g_file_info_get_file_type (to_finfo) == G_FILE_TYPE_REGULAR))
|
||||||
|
{
|
||||||
|
ret = TRUE;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert (from_istream && to_istream);
|
||||||
|
|
||||||
|
if (!rollsum_chunks_crc32 (from_istream, &from_rollsum, cancellable, error))
|
||||||
|
goto out;
|
||||||
|
if (!rollsum_chunks_crc32 (to_istream, &to_rollsum, cancellable, error))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
g_clear_object (&from_istream);
|
||||||
|
g_clear_object (&to_istream);
|
||||||
|
|
||||||
|
g_hash_table_iter_init (&hiter, to_rollsum->values);
|
||||||
|
while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
|
||||||
|
{
|
||||||
|
if (g_hash_table_contains (from_rollsum->values, hkey))
|
||||||
|
matches++;
|
||||||
|
total++;
|
||||||
|
}
|
||||||
|
|
||||||
|
match_ratio = (matches*100)/total;
|
||||||
|
|
||||||
|
/* Only proceed if the file contains (arbitrary) more than 25% of
|
||||||
|
* the previous chunks.
|
||||||
|
*/
|
||||||
|
if (match_ratio < 25)
|
||||||
|
{
|
||||||
|
ret = TRUE;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret_rollsum = g_new0 (ContentRollsum, 1);
|
||||||
|
ret_rollsum->match_ratio = match_ratio;
|
||||||
|
ret_rollsum->from_checksum = g_strdup (from);
|
||||||
|
ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
|
||||||
|
ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
|
||||||
|
|
||||||
|
ret = TRUE;
|
||||||
|
gs_transfer_out_value (out_rollsum, &ret_rollsum);
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
generate_delta_lowlatency (OstreeRepo *repo,
|
generate_delta_lowlatency (OstreeRepo *repo,
|
||||||
const char *from,
|
const char *from,
|
||||||
|
|
@ -402,6 +582,7 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
||||||
gs_unref_hashtable GHashTable *new_reachable_metadata = NULL;
|
gs_unref_hashtable GHashTable *new_reachable_metadata = NULL;
|
||||||
gs_unref_hashtable GHashTable *new_reachable_content = NULL;
|
gs_unref_hashtable GHashTable *new_reachable_content = NULL;
|
||||||
gs_unref_hashtable GHashTable *modified_content_objects = NULL;
|
gs_unref_hashtable GHashTable *modified_content_objects = NULL;
|
||||||
|
gs_unref_hashtable GHashTable *rollsum_optimized_content_objects = NULL;
|
||||||
gs_unref_hashtable GHashTable *content_object_to_size = NULL;
|
gs_unref_hashtable GHashTable *content_object_to_size = NULL;
|
||||||
|
|
||||||
if (from != NULL)
|
if (from != NULL)
|
||||||
|
|
@ -424,15 +605,21 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
||||||
cancellable, error))
|
cancellable, error))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
modified_content_objects = g_hash_table_new_full (ostree_hash_object_name, g_variant_equal,
|
modified_content_objects = g_hash_table_new_full (g_str_hash, g_str_equal,
|
||||||
NULL,
|
g_free, g_free);
|
||||||
(GDestroyNotify) g_variant_unref);
|
|
||||||
for (i = 0; i < modified->len; i++)
|
for (i = 0; i < modified->len; i++)
|
||||||
{
|
{
|
||||||
OstreeDiffItem *diffitem = modified->pdata[i];
|
OstreeDiffItem *diffitem = modified->pdata[i];
|
||||||
GVariant *objname = ostree_object_name_serialize (diffitem->target_checksum,
|
/* Theoretically, a target file could replace multiple source
|
||||||
OSTREE_OBJECT_TYPE_FILE);
|
* files. That could happen if say a project changed from having
|
||||||
g_hash_table_add (modified_content_objects, objname);
|
* multiple binaries to one binary.
|
||||||
|
*
|
||||||
|
* In that case, we have last one wins behavior. For ELF rollsum
|
||||||
|
* tends to be useless unless there's a large static data blob.
|
||||||
|
*/
|
||||||
|
g_hash_table_replace (modified_content_objects,
|
||||||
|
g_strdup (diffitem->target_checksum),
|
||||||
|
g_strdup (diffitem->src_checksum));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (from)
|
if (from)
|
||||||
|
|
@ -478,6 +665,31 @@ generate_delta_lowlatency (OstreeRepo *repo,
|
||||||
g_hash_table_remove (new_reachable_metadata,
|
g_hash_table_remove (new_reachable_metadata,
|
||||||
ostree_object_name_serialize (to, OSTREE_OBJECT_TYPE_COMMIT));
|
ostree_object_name_serialize (to, OSTREE_OBJECT_TYPE_COMMIT));
|
||||||
|
|
||||||
|
rollsum_optimized_content_objects = g_hash_table_new_full (g_str_hash, g_str_equal,
|
||||||
|
g_free,
|
||||||
|
(GDestroyNotify) content_rollsums_free);
|
||||||
|
|
||||||
|
g_hash_table_iter_init (&hashiter, modified_content_objects);
|
||||||
|
while (g_hash_table_iter_next (&hashiter, &key, &value))
|
||||||
|
{
|
||||||
|
const char *to_checksum = key;
|
||||||
|
const char *from_checksum = value;
|
||||||
|
ContentRollsum *rollsum;
|
||||||
|
|
||||||
|
if (!try_content_rollsum (repo, from_checksum, to_checksum,
|
||||||
|
&rollsum, cancellable, error))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (!rollsum)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_printerr ("rollsum for %u/%u modified\n",
|
||||||
|
g_hash_table_size (rollsum_optimized_content_objects),
|
||||||
|
g_hash_table_size (modified_content_objects));
|
||||||
|
|
||||||
/* Scan for large objects, so we can fall back to plain HTTP-based
|
/* Scan for large objects, so we can fall back to plain HTTP-based
|
||||||
* fetch. In the future this should come after an rsync-style
|
* fetch. In the future this should come after an rsync-style
|
||||||
* rolling delta check for modified files.
|
* rolling delta check for modified files.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue