diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-04-16 13:54:20 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-04-16 13:54:21 -0700 |
| commit | a271b05066a1fd2c3a62508d9908d6c5df14a1cb (patch) | |
| tree | 57e55fd6b395f8e5da4e23e1344c55d0d38ed80c | |
| parent | 9bdd7ecf7ec90433fc1803bf5d608d08857b3b49 (diff) | |
| parent | 8002e8ee1829f0c727aa2f7d9c18ad706cb63565 (diff) | |
| download | git-a271b05066a1fd2c3a62508d9908d6c5df14a1cb.tar.gz | |
Merge branch 'ps/cat-file-filter-batch'
"git cat-file --batch" and friends learned to allow "--filter=" to
omit certain objects, just like the transport layer does.
* ps/cat-file-filter-batch:
builtin/cat-file: use bitmaps to efficiently filter by object type
builtin/cat-file: deduplicate logic to iterate over all objects
pack-bitmap: introduce function to check whether a pack is bitmapped
pack-bitmap: add function to iterate over filtered bitmapped objects
pack-bitmap: allow passing payloads to `show_reachable_fn()`
builtin/cat-file: support "object:type=" objects filter
builtin/cat-file: support "blob:limit=" objects filter
builtin/cat-file: support "blob:none" objects filter
builtin/cat-file: wire up an option to filter objects
builtin/cat-file: introduce function to report object status
builtin/cat-file: rename variable that tracks usage
| -rw-r--r-- | Documentation/git-cat-file.adoc | 26 | ||||
| -rw-r--r-- | builtin/cat-file.c | 256 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 3 | ||||
| -rw-r--r-- | builtin/rev-list.c | 3 | ||||
| -rw-r--r-- | pack-bitmap.c | 81 | ||||
| -rw-r--r-- | pack-bitmap.h | 22 | ||||
| -rw-r--r-- | reachable.c | 3 | ||||
| -rwxr-xr-x | t/t1006-cat-file.sh | 99 |
8 files changed, 411 insertions, 82 deletions
diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index 30359f5dbd..fc4b92f104 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -81,6 +81,25 @@ OPTIONS end-of-line conversion, etc). In this case, `<object>` has to be of the form `<tree-ish>:<path>`, or `:<path>`. +--filter=<filter-spec>:: +--no-filter:: + Omit objects from the list of printed objects. This can only be used in + combination with one of the batched modes. Excluded objects that have + been explicitly requested via any of the batch modes that read objects + via standard input (`--batch`, `--batch-check`) will be reported as + "filtered". Excluded objects in `--batch-all-objects` mode will not be + printed at all. The '<filter-spec>' may be one of the following: ++ +The form '--filter=blob:none' omits all blobs. ++ +The form '--filter=blob:limit=<n>[kmg]' omits blobs of size at least n +bytes or units. n may be zero. The suffixes k, m, and g can be used to name +units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as +'blob:limit=1024'. ++ +The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects which +are not of the requested type. + --path=<path>:: For use with `--textconv` or `--filters`, to allow specifying an object name and a path separately, e.g. when it is difficult to figure out @@ -340,6 +359,13 @@ the repository, then `cat-file` will ignore any custom format and print: <object> SP missing LF ------------ +If a name is specified on stdin that is filtered out via `--filter=`, +then `cat-file` will ignore any custom format and print: + +------------ +<object> SP excluded LF +------------ + If a name is specified that might refer to more than one object (an ambiguous short sha), then `cat-file` will ignore any custom format and print: ------------ diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b13561cf73..ead7554a57 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -15,11 +15,13 @@ #include "gettext.h" #include "hex.h" #include "ident.h" +#include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" #include "streaming.h" #include "oid-array.h" #include "packfile.h" +#include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "object-store-ll.h" @@ -35,6 +37,7 @@ enum batch_mode { }; struct batch_options { + struct list_objects_filter_options objects_filter; int enabled; int follow_symlinks; enum batch_mode batch_mode; @@ -455,6 +458,16 @@ static void print_default_format(struct strbuf *scratch, struct expand_data *dat (uintmax_t)data->size, opt->output_delim); } +static void report_object_status(struct batch_options *opt, + const char *obj_name, + const struct object_id *oid, + const char *status) +{ + printf("%s %s%c", obj_name ? obj_name : oid_to_hex(oid), + status, opt->output_delim); + fflush(stdout); +} + /* * If "pack" is non-NULL, then "offset" is the byte offset within the pack from * which the object may be accessed (though note that we may also rely on @@ -470,8 +483,13 @@ static void batch_object_write(const char *obj_name, if (!data->skip_object_info) { int ret; - if (use_mailmap) + if (use_mailmap || + opt->objects_filter.choice == LOFC_BLOB_NONE || + opt->objects_filter.choice == LOFC_BLOB_LIMIT || + opt->objects_filter.choice == LOFC_OBJECT_TYPE) data->info.typep = &data->type; + if (opt->objects_filter.choice == LOFC_BLOB_LIMIT) + data->info.sizep = &data->size; if (pack) ret = packed_object_info(the_repository, pack, offset, @@ -481,12 +499,42 @@ static void batch_object_write(const char *obj_name, &data->oid, &data->info, OBJECT_INFO_LOOKUP_REPLACE); if (ret < 0) { - printf("%s missing%c", - obj_name ? obj_name : oid_to_hex(&data->oid), opt->output_delim); - fflush(stdout); + report_object_status(opt, obj_name, &data->oid, "missing"); return; } + switch (opt->objects_filter.choice) { + case LOFC_DISABLED: + break; + case LOFC_BLOB_NONE: + if (data->type == OBJ_BLOB) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + case LOFC_BLOB_LIMIT: + if (data->type == OBJ_BLOB && + data->size >= opt->objects_filter.blob_limit_value) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + case LOFC_OBJECT_TYPE: + if (data->type != opt->objects_filter.object_type) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + default: + BUG("unsupported objects filter"); + } + if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) { size_t s = data->size; char *buf = NULL; @@ -535,10 +583,10 @@ static void batch_one_object(const char *obj_name, if (result != FOUND) { switch (result) { case MISSING_OBJECT: - printf("%s missing%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "missing"); break; case SHORT_NAME_AMBIGUOUS: - printf("%s ambiguous%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "ambiguous"); break; case DANGLING_SYMLINK: printf("dangling %"PRIuMAX"%c%s%c", @@ -595,25 +643,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata) return 0; } -static int collect_loose_object(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - oid_array_append(data, oid); - return 0; -} - -static int collect_packed_object(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data) +static int collect_object(const struct object_id *oid, + struct packed_git *pack UNUSED, + off_t offset UNUSED, + void *data) { oid_array_append(data, oid); return 0; } static int batch_unordered_object(const struct object_id *oid, - struct packed_git *pack, off_t offset, + struct packed_git *pack, + off_t offset, void *vdata) { struct object_cb_data *data = vdata; @@ -627,23 +668,6 @@ static int batch_unordered_object(const struct object_id *oid, return 0; } -static int batch_unordered_loose(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - return batch_unordered_object(oid, NULL, 0, data); -} - -static int batch_unordered_packed(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) -{ - return batch_unordered_object(oid, pack, - nth_packed_object_offset(pack, pos), - data); -} - typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *, struct strbuf *, struct expand_data *); @@ -776,6 +800,76 @@ static void batch_objects_command(struct batch_options *opt, #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)" +typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack, + off_t offset, void *data); + +struct for_each_object_payload { + for_each_object_fn callback; + void *payload; +}; + +static int batch_one_object_loose(const struct object_id *oid, + const char *path UNUSED, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, NULL, 0, payload->payload); +} + +static int batch_one_object_packed(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, nth_packed_object_offset(pack, pos), + payload->payload); +} + +static int batch_one_object_bitmapped(const struct object_id *oid, + enum object_type type UNUSED, + int flags UNUSED, + uint32_t hash UNUSED, + struct packed_git *pack, + off_t offset, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, offset, payload->payload); +} + +static void batch_each_object(struct batch_options *opt, + for_each_object_fn callback, + unsigned flags, + void *_payload) +{ + struct for_each_object_payload payload = { + .callback = callback, + .payload = _payload, + }; + struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + + for_each_loose_object(batch_one_object_loose, &payload, 0); + + if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, + batch_one_object_bitmapped, &payload)) { + struct packed_git *pack; + + for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + if (bitmap_index_contains_pack(bitmap, pack) || + open_pack_index(pack)) + continue; + for_each_object_in_pack(pack, batch_one_object_packed, + &payload, flags); + } + } else { + for_each_packed_object(the_repository, batch_one_object_packed, + &payload, flags); + } + + free_bitmap_index(bitmap); +} + static int batch_objects(struct batch_options *opt) { struct strbuf input = STRBUF_INIT; @@ -812,7 +906,8 @@ static int batch_objects(struct batch_options *opt) struct object_cb_data cb; struct object_info empty = OBJECT_INFO_INIT; - if (!memcmp(&data.info, &empty, sizeof(empty))) + if (!memcmp(&data.info, &empty, sizeof(empty)) && + opt->objects_filter.choice == LOFC_DISABLED) data.skip_object_info = 1; if (repo_has_promisor_remote(the_repository)) @@ -829,18 +924,14 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; - for_each_loose_object(batch_unordered_loose, &cb, 0); - for_each_packed_object(the_repository, batch_unordered_packed, - &cb, FOR_EACH_OBJECT_PACK_ORDER); + batch_each_object(opt, batch_unordered_object, + FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { struct oid_array sa = OID_ARRAY_INIT; - for_each_loose_object(collect_loose_object, &sa, 0); - for_each_packed_object(the_repository, collect_packed_object, - &sa, 0); - + batch_each_object(opt, collect_object, 0, &sa); oid_array_for_each_unique(&sa, batch_object_cb, &cb); oid_array_clear(&sa); @@ -936,12 +1027,15 @@ int cmd_cat_file(int argc, int opt_cw = 0; int opt_epts = 0; const char *exp_type = NULL, *obj_name = NULL; - struct batch_options batch = {0}; + struct batch_options batch = { + .objects_filter = LIST_OBJECTS_FILTER_INIT, + }; int unknown_type = 0; int input_nul_terminated = 0; int nul_terminated = 0; + int ret; - const char * const usage[] = { + const char * const builtin_catfile_usage[] = { N_("git cat-file <type> <object>"), N_("git cat-file (-e | -p) <object>"), N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"), @@ -1000,6 +1094,7 @@ int cmd_cat_file(int argc, N_("run filters on object's content"), 'w'), OPT_STRING(0, "path", &force_path, N_("blob|tree"), N_("use a <path> for (--textconv | --filters); Not with 'batch'")), + OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter), OPT_END() }; @@ -1007,13 +1102,27 @@ int cmd_cat_file(int argc, batch.buffer_output = -1; - argc = parse_options(argc, argv, prefix, options, usage, 0); + argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0); opt_cw = (opt == 'c' || opt == 'w'); opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's'); if (use_mailmap) read_mailmap(&mailmap); + switch (batch.objects_filter.choice) { + case LOFC_DISABLED: + break; + case LOFC_BLOB_NONE: + case LOFC_BLOB_LIMIT: + case LOFC_OBJECT_TYPE: + if (!batch.enabled) + usage(_("objects filter only supported in batch mode")); + break; + default: + usagef(_("objects filter not supported: '%s'"), + list_object_filter_config_name(batch.objects_filter.choice)); + } + /* --batch-all-objects? */ if (opt == 'b') batch.all_objects = 1; @@ -1021,7 +1130,7 @@ int cmd_cat_file(int argc, /* Option compatibility */ if (force_path && !opt_cw) usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"), - usage, options, + builtin_catfile_usage, options, "--path", _("path|tree-ish"), "--filters", "--textconv"); @@ -1029,20 +1138,20 @@ int cmd_cat_file(int argc, if (batch.enabled) ; else if (batch.follow_symlinks) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--follow-symlinks"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--follow-symlinks"); else if (batch.buffer_output >= 0) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--buffer"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--buffer"); else if (batch.all_objects) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--batch-all-objects"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--batch-all-objects"); else if (input_nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-z"); else if (nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-Z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-Z"); batch.input_delim = batch.output_delim = '\n'; if (input_nul_terminated) @@ -1063,33 +1172,37 @@ int cmd_cat_file(int argc, batch.transform_mode = opt; else if (opt && opt != 'b') usage_msg_optf(_("'-%c' is incompatible with batch mode"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc) - usage_msg_opt(_("batch modes take no arguments"), usage, - options); + usage_msg_opt(_("batch modes take no arguments"), + builtin_catfile_usage, options); - return batch_objects(&batch); + ret = batch_objects(&batch); + goto out; } if (opt) { if (!argc && opt == 'c') usage_msg_optf(_("<rev> required with '%s'"), - usage, options, "--textconv"); + builtin_catfile_usage, options, + "--textconv"); else if (!argc && opt == 'w') usage_msg_optf(_("<rev> required with '%s'"), - usage, options, "--filters"); + builtin_catfile_usage, options, + "--filters"); else if (!argc && opt_epts) usage_msg_optf(_("<object> required with '-%c'"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc == 1) obj_name = argv[0]; else - usage_msg_opt(_("too many arguments"), usage, options); + usage_msg_opt(_("too many arguments"), builtin_catfile_usage, + options); } else if (!argc) { - usage_with_options(usage, options); + usage_with_options(builtin_catfile_usage, options); } else if (argc != 2) { usage_msg_optf(_("only two arguments allowed in <type> <object> mode, not %d"), - usage, options, argc); + builtin_catfile_usage, options, argc); } else if (argc) { exp_type = argv[0]; obj_name = argv[1]; @@ -1097,5 +1210,10 @@ int cmd_cat_file(int argc, if (unknown_type && opt != 't' && opt != 's') die("git cat-file --allow-unknown-type: use with -s or -t"); - return cat_one_file(opt, exp_type, obj_name, unknown_type); + + ret = cat_one_file(opt, exp_type, obj_name, unknown_type); + +out: + list_objects_filter_release(&batch.objects_filter); + return ret; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4764aa1b8c..3973267e9e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1820,7 +1820,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, static int add_object_entry_from_bitmap(const struct object_id *oid, enum object_type type, int flags UNUSED, uint32_t name_hash, - struct packed_git *pack, off_t offset) + struct packed_git *pack, off_t offset, + void *payload UNUSED) { display_progress(progress_state, ++nr_seen); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e6ee3f82ee..4a84f18f9e 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -461,7 +461,8 @@ static int show_object_fast( int exclude UNUSED, uint32_t name_hash UNUSED, struct packed_git *found_pack UNUSED, - off_t found_offset UNUSED) + off_t found_offset UNUSED, + void *payload UNUSED) { fprintf(stdout, "%s\n", oid_to_hex(oid)); return 1; diff --git a/pack-bitmap.c b/pack-bitmap.c index aa67cb40ab..5299f49d59 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -745,6 +745,21 @@ struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx) return NULL; } +int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack) +{ + for (; bitmap; bitmap = bitmap->base) { + if (bitmap_is_midx(bitmap)) { + for (size_t i = 0; i < bitmap->midx->num_packs; i++) + if (bitmap->midx->packs[i] == pack) + return 1; + } else if (bitmap->pack == pack) { + return 1; + } + } + + return 0; +} + struct include_data { struct bitmap_index *bitmap_git; struct bitmap *base; @@ -1625,7 +1640,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, (obj->type == OBJ_TAG && !revs->tag_objects)) continue; - show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0); + show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL); } } @@ -1662,8 +1677,10 @@ static void init_type_iterator(struct ewah_or_iterator *it, static void show_objects_for_type( struct bitmap_index *bitmap_git, + struct bitmap *objects, enum object_type object_type, - show_reachable_fn show_reach) + show_reachable_fn show_reach, + void *payload) { size_t i = 0; uint32_t offset; @@ -1671,8 +1688,6 @@ static void show_objects_for_type( struct ewah_or_iterator it; eword_t filter; - struct bitmap *objects = bitmap_git->result; - init_type_iterator(&it, bitmap_git, object_type); for (i = 0; i < objects->word_alloc && @@ -1715,7 +1730,7 @@ static void show_objects_for_type( if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, pack, ofs); + show_reach(&oid, object_type, 0, hash, pack, ofs, payload); } } @@ -2024,6 +2039,50 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git, } } +int for_each_bitmapped_object(struct bitmap_index *bitmap_git, + struct list_objects_filter_options *filter, + show_reachable_fn show_reach, + void *payload) +{ + struct bitmap *filtered_bitmap = NULL; + uint32_t objects_nr; + size_t full_word_count; + int ret; + + if (!can_filter_bitmap(filter)) { + ret = -1; + goto out; + } + + objects_nr = bitmap_num_objects(bitmap_git); + full_word_count = objects_nr / BITS_IN_EWORD; + + /* We start from the all-1 bitmap and then filter down from there. */ + filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD)); + memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words)); + for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++) + bitmap_set(filtered_bitmap, i); + + if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) { + ret = -1; + goto out; + } + + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_COMMIT, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_TREE, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_BLOB, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_TAG, show_reach, payload); + + ret = 0; +out: + bitmap_free(filtered_bitmap); + return ret; +} + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects) { @@ -2518,13 +2577,17 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, { assert(bitmap_git->result); - show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_COMMIT, show_reachable, NULL); if (revs->tree_objects) - show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_TREE, show_reachable, NULL); if (revs->blob_objects) - show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_BLOB, show_reachable, NULL); if (revs->tag_objects) - show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_TAG, show_reachable, NULL); show_extended_objects(bitmap_git, revs, show_reachable); } diff --git a/pack-bitmap.h b/pack-bitmap.h index 0e9d25e6f2..382d39499a 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)( int flags, uint32_t hash, struct packed_git *found_pack, - off_t found_offset); + off_t found_offset, + void *payload); struct bitmap_index; @@ -66,6 +67,13 @@ struct bitmapped_pack { struct bitmap_index *prepare_bitmap_git(struct repository *r); struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx); + +/* + * Given a bitmap index, determine whether it contains the pack either directly + * or via the multi-pack-index. + */ +int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack); + void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags); void traverse_bitmap_commit_list(struct bitmap_index *, @@ -78,6 +86,18 @@ int test_bitmap_pseudo_merges(struct repository *r); int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n); int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n); +struct list_objects_filter_options; + +/* + * Filter bitmapped objects and iterate through all resulting objects, + * executing `show_reach` for each of them. Returns `-1` in case the filter is + * not supported, `0` otherwise. + */ +int for_each_bitmapped_object(struct bitmap_index *bitmap_git, + struct list_objects_filter_options *filter, + show_reachable_fn show_reach, + void *payload); + #define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL" diff --git a/reachable.c b/reachable.c index 1b26b9b1d7..299e129249 100644 --- a/reachable.c +++ b/reachable.c @@ -341,7 +341,8 @@ static int mark_object_seen(const struct object_id *oid, int exclude UNUSED, uint32_t name_hash UNUSED, struct packed_git *found_pack UNUSED, - off_t found_offset UNUSED) + off_t found_offset UNUSED, + void *payload UNUSED) { struct object *obj = lookup_object_by_type(the_repository, oid, type); if (!obj) diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index aa64678c64..ce8b27bf54 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -1410,4 +1410,103 @@ test_expect_success PERL_IPC_OPEN2 '--batch-command info is unbuffered by defaul perl -e "$perl_script" -- --batch-command $hello_oid "$expect" "info " ' +test_expect_success 'setup for objects filter' ' + git init repo && + ( + # Seed the repository with four different sets of objects: + # + # - The first set is fully packed and has a bitmap. + # - The second set is packed, but has no bitmap. + # - The third set is loose. + # - The fourth set is loose and contains big objects. + # + # This ensures that we cover all these types as expected. + cd repo && + test_commit first && + git repack -Adb && + test_commit second && + git repack -d && + test_commit third && + + for n in 1000 10000 + do + printf "%"$n"s" X >large.$n || return 1 + done && + git add large.* && + git commit -m fourth + ) +' + +test_expect_success 'objects filter with unknown option' ' + cat >expect <<-EOF && + fatal: invalid filter-spec ${SQ}unknown${SQ} + EOF + test_must_fail git -C repo cat-file --filter=unknown 2>err && + test_cmp expect err +' + +for option in sparse:oid=1234 tree:1 sparse:path=x +do + test_expect_success "objects filter with unsupported option $option" ' + case "$option" in + tree:1) + echo "usage: objects filter not supported: ${SQ}tree${SQ}" >expect + ;; + sparse:path=x) + echo "fatal: sparse:path filters support has been dropped" >expect + ;; + *) + option_name=$(echo "$option" | cut -d= -f1) && + printf "usage: objects filter not supported: ${SQ}%s${SQ}\n" "$option_name" >expect + ;; + esac && + test_must_fail git -C repo cat-file --filter=$option 2>err && + test_cmp expect err + ' +done + +test_expect_success 'objects filter: disabled' ' + git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --no-filter >actual && + sort actual >actual.sorted && + git -C repo rev-list --objects --no-object-names --all >expect && + sort expect >expect.sorted && + test_cmp expect.sorted actual.sorted +' + +test_objects_filter () { + filter="$1" + + test_expect_success "objects filter: $filter" ' + git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --filter="$filter" >actual && + sort actual >actual.sorted && + git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >expect && + sort expect >expect.sorted && + test_cmp expect.sorted actual.sorted + ' + + test_expect_success "objects filter prints excluded objects: $filter" ' + # Find all objects that would be excluded by the current filter. + git -C repo rev-list --objects --no-object-names --all >all && + git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >filtered && + sort all >all.sorted && + sort filtered >filtered.sorted && + comm -23 all.sorted filtered.sorted >expected.excluded && + test_line_count -gt 0 expected.excluded && + + git -C repo cat-file --batch-check="%(objectname)" --filter="$filter" <expected.excluded >actual && + awk "/excluded/{ print \$1 }" actual | sort >actual.excluded && + test_cmp expected.excluded actual.excluded + ' +} + +test_objects_filter "blob:none" +test_objects_filter "blob:limit=1" +test_objects_filter "blob:limit=500" +test_objects_filter "blob:limit=1000" +test_objects_filter "blob:limit=1k" +test_objects_filter "object:type=blob" +test_objects_filter "object:type=commit" +test_objects_filter "object:type=tag" +test_objects_filter "object:type=tree" + test_done |
