diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-04-16 13:54:20 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-04-16 13:54:21 -0700 |
| commit | a271b05066a1fd2c3a62508d9908d6c5df14a1cb (patch) | |
| tree | 57e55fd6b395f8e5da4e23e1344c55d0d38ed80c /builtin | |
| parent | 9bdd7ecf7ec90433fc1803bf5d608d08857b3b49 (diff) | |
| parent | 8002e8ee1829f0c727aa2f7d9c18ad706cb63565 (diff) | |
| download | git-a271b05066a1fd2c3a62508d9908d6c5df14a1cb.tar.gz | |
Merge branch 'ps/cat-file-filter-batch'
"git cat-file --batch" and friends learned to allow "--filter=" to
omit certain objects, just like the transport layer does.
* ps/cat-file-filter-batch:
builtin/cat-file: use bitmaps to efficiently filter by object type
builtin/cat-file: deduplicate logic to iterate over all objects
pack-bitmap: introduce function to check whether a pack is bitmapped
pack-bitmap: add function to iterate over filtered bitmapped objects
pack-bitmap: allow passing payloads to `show_reachable_fn()`
builtin/cat-file: support "object:type=" objects filter
builtin/cat-file: support "blob:limit=" objects filter
builtin/cat-file: support "blob:none" objects filter
builtin/cat-file: wire up an option to filter objects
builtin/cat-file: introduce function to report object status
builtin/cat-file: rename variable that tracks usage
Diffstat (limited to 'builtin')
| -rw-r--r-- | builtin/cat-file.c | 256 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 3 | ||||
| -rw-r--r-- | builtin/rev-list.c | 3 |
3 files changed, 191 insertions, 71 deletions
diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b13561cf73..ead7554a57 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -15,11 +15,13 @@ #include "gettext.h" #include "hex.h" #include "ident.h" +#include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" #include "streaming.h" #include "oid-array.h" #include "packfile.h" +#include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "object-store-ll.h" @@ -35,6 +37,7 @@ enum batch_mode { }; struct batch_options { + struct list_objects_filter_options objects_filter; int enabled; int follow_symlinks; enum batch_mode batch_mode; @@ -455,6 +458,16 @@ static void print_default_format(struct strbuf *scratch, struct expand_data *dat (uintmax_t)data->size, opt->output_delim); } +static void report_object_status(struct batch_options *opt, + const char *obj_name, + const struct object_id *oid, + const char *status) +{ + printf("%s %s%c", obj_name ? obj_name : oid_to_hex(oid), + status, opt->output_delim); + fflush(stdout); +} + /* * If "pack" is non-NULL, then "offset" is the byte offset within the pack from * which the object may be accessed (though note that we may also rely on @@ -470,8 +483,13 @@ static void batch_object_write(const char *obj_name, if (!data->skip_object_info) { int ret; - if (use_mailmap) + if (use_mailmap || + opt->objects_filter.choice == LOFC_BLOB_NONE || + opt->objects_filter.choice == LOFC_BLOB_LIMIT || + opt->objects_filter.choice == LOFC_OBJECT_TYPE) data->info.typep = &data->type; + if (opt->objects_filter.choice == LOFC_BLOB_LIMIT) + data->info.sizep = &data->size; if (pack) ret = packed_object_info(the_repository, pack, offset, @@ -481,12 +499,42 @@ static void batch_object_write(const char *obj_name, &data->oid, &data->info, OBJECT_INFO_LOOKUP_REPLACE); if (ret < 0) { - printf("%s missing%c", - obj_name ? obj_name : oid_to_hex(&data->oid), opt->output_delim); - fflush(stdout); + report_object_status(opt, obj_name, &data->oid, "missing"); return; } + switch (opt->objects_filter.choice) { + case LOFC_DISABLED: + break; + case LOFC_BLOB_NONE: + if (data->type == OBJ_BLOB) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + case LOFC_BLOB_LIMIT: + if (data->type == OBJ_BLOB && + data->size >= opt->objects_filter.blob_limit_value) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + case LOFC_OBJECT_TYPE: + if (data->type != opt->objects_filter.object_type) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; + default: + BUG("unsupported objects filter"); + } + if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) { size_t s = data->size; char *buf = NULL; @@ -535,10 +583,10 @@ static void batch_one_object(const char *obj_name, if (result != FOUND) { switch (result) { case MISSING_OBJECT: - printf("%s missing%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "missing"); break; case SHORT_NAME_AMBIGUOUS: - printf("%s ambiguous%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "ambiguous"); break; case DANGLING_SYMLINK: printf("dangling %"PRIuMAX"%c%s%c", @@ -595,25 +643,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata) return 0; } -static int collect_loose_object(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - oid_array_append(data, oid); - return 0; -} - -static int collect_packed_object(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data) +static int collect_object(const struct object_id *oid, + struct packed_git *pack UNUSED, + off_t offset UNUSED, + void *data) { oid_array_append(data, oid); return 0; } static int batch_unordered_object(const struct object_id *oid, - struct packed_git *pack, off_t offset, + struct packed_git *pack, + off_t offset, void *vdata) { struct object_cb_data *data = vdata; @@ -627,23 +668,6 @@ static int batch_unordered_object(const struct object_id *oid, return 0; } -static int batch_unordered_loose(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - return batch_unordered_object(oid, NULL, 0, data); -} - -static int batch_unordered_packed(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) -{ - return batch_unordered_object(oid, pack, - nth_packed_object_offset(pack, pos), - data); -} - typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *, struct strbuf *, struct expand_data *); @@ -776,6 +800,76 @@ static void batch_objects_command(struct batch_options *opt, #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)" +typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack, + off_t offset, void *data); + +struct for_each_object_payload { + for_each_object_fn callback; + void *payload; +}; + +static int batch_one_object_loose(const struct object_id *oid, + const char *path UNUSED, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, NULL, 0, payload->payload); +} + +static int batch_one_object_packed(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, nth_packed_object_offset(pack, pos), + payload->payload); +} + +static int batch_one_object_bitmapped(const struct object_id *oid, + enum object_type type UNUSED, + int flags UNUSED, + uint32_t hash UNUSED, + struct packed_git *pack, + off_t offset, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, offset, payload->payload); +} + +static void batch_each_object(struct batch_options *opt, + for_each_object_fn callback, + unsigned flags, + void *_payload) +{ + struct for_each_object_payload payload = { + .callback = callback, + .payload = _payload, + }; + struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + + for_each_loose_object(batch_one_object_loose, &payload, 0); + + if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, + batch_one_object_bitmapped, &payload)) { + struct packed_git *pack; + + for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + if (bitmap_index_contains_pack(bitmap, pack) || + open_pack_index(pack)) + continue; + for_each_object_in_pack(pack, batch_one_object_packed, + &payload, flags); + } + } else { + for_each_packed_object(the_repository, batch_one_object_packed, + &payload, flags); + } + + free_bitmap_index(bitmap); +} + static int batch_objects(struct batch_options *opt) { struct strbuf input = STRBUF_INIT; @@ -812,7 +906,8 @@ static int batch_objects(struct batch_options *opt) struct object_cb_data cb; struct object_info empty = OBJECT_INFO_INIT; - if (!memcmp(&data.info, &empty, sizeof(empty))) + if (!memcmp(&data.info, &empty, sizeof(empty)) && + opt->objects_filter.choice == LOFC_DISABLED) data.skip_object_info = 1; if (repo_has_promisor_remote(the_repository)) @@ -829,18 +924,14 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; - for_each_loose_object(batch_unordered_loose, &cb, 0); - for_each_packed_object(the_repository, batch_unordered_packed, - &cb, FOR_EACH_OBJECT_PACK_ORDER); + batch_each_object(opt, batch_unordered_object, + FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { struct oid_array sa = OID_ARRAY_INIT; - for_each_loose_object(collect_loose_object, &sa, 0); - for_each_packed_object(the_repository, collect_packed_object, - &sa, 0); - + batch_each_object(opt, collect_object, 0, &sa); oid_array_for_each_unique(&sa, batch_object_cb, &cb); oid_array_clear(&sa); @@ -936,12 +1027,15 @@ int cmd_cat_file(int argc, int opt_cw = 0; int opt_epts = 0; const char *exp_type = NULL, *obj_name = NULL; - struct batch_options batch = {0}; + struct batch_options batch = { + .objects_filter = LIST_OBJECTS_FILTER_INIT, + }; int unknown_type = 0; int input_nul_terminated = 0; int nul_terminated = 0; + int ret; - const char * const usage[] = { + const char * const builtin_catfile_usage[] = { N_("git cat-file <type> <object>"), N_("git cat-file (-e | -p) <object>"), N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"), @@ -1000,6 +1094,7 @@ int cmd_cat_file(int argc, N_("run filters on object's content"), 'w'), OPT_STRING(0, "path", &force_path, N_("blob|tree"), N_("use a <path> for (--textconv | --filters); Not with 'batch'")), + OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter), OPT_END() }; @@ -1007,13 +1102,27 @@ int cmd_cat_file(int argc, batch.buffer_output = -1; - argc = parse_options(argc, argv, prefix, options, usage, 0); + argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0); opt_cw = (opt == 'c' || opt == 'w'); opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's'); if (use_mailmap) read_mailmap(&mailmap); + switch (batch.objects_filter.choice) { + case LOFC_DISABLED: + break; + case LOFC_BLOB_NONE: + case LOFC_BLOB_LIMIT: + case LOFC_OBJECT_TYPE: + if (!batch.enabled) + usage(_("objects filter only supported in batch mode")); + break; + default: + usagef(_("objects filter not supported: '%s'"), + list_object_filter_config_name(batch.objects_filter.choice)); + } + /* --batch-all-objects? */ if (opt == 'b') batch.all_objects = 1; @@ -1021,7 +1130,7 @@ int cmd_cat_file(int argc, /* Option compatibility */ if (force_path && !opt_cw) usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"), - usage, options, + builtin_catfile_usage, options, "--path", _("path|tree-ish"), "--filters", "--textconv"); @@ -1029,20 +1138,20 @@ int cmd_cat_file(int argc, if (batch.enabled) ; else if (batch.follow_symlinks) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--follow-symlinks"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--follow-symlinks"); else if (batch.buffer_output >= 0) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--buffer"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--buffer"); else if (batch.all_objects) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--batch-all-objects"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--batch-all-objects"); else if (input_nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-z"); else if (nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-Z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-Z"); batch.input_delim = batch.output_delim = '\n'; if (input_nul_terminated) @@ -1063,33 +1172,37 @@ int cmd_cat_file(int argc, batch.transform_mode = opt; else if (opt && opt != 'b') usage_msg_optf(_("'-%c' is incompatible with batch mode"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc) - usage_msg_opt(_("batch modes take no arguments"), usage, - options); + usage_msg_opt(_("batch modes take no arguments"), + builtin_catfile_usage, options); - return batch_objects(&batch); + ret = batch_objects(&batch); + goto out; } if (opt) { if (!argc && opt == 'c') usage_msg_optf(_("<rev> required with '%s'"), - usage, options, "--textconv"); + builtin_catfile_usage, options, + "--textconv"); else if (!argc && opt == 'w') usage_msg_optf(_("<rev> required with '%s'"), - usage, options, "--filters"); + builtin_catfile_usage, options, + "--filters"); else if (!argc && opt_epts) usage_msg_optf(_("<object> required with '-%c'"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc == 1) obj_name = argv[0]; else - usage_msg_opt(_("too many arguments"), usage, options); + usage_msg_opt(_("too many arguments"), builtin_catfile_usage, + options); } else if (!argc) { - usage_with_options(usage, options); + usage_with_options(builtin_catfile_usage, options); } else if (argc != 2) { usage_msg_optf(_("only two arguments allowed in <type> <object> mode, not %d"), - usage, options, argc); + builtin_catfile_usage, options, argc); } else if (argc) { exp_type = argv[0]; obj_name = argv[1]; @@ -1097,5 +1210,10 @@ int cmd_cat_file(int argc, if (unknown_type && opt != 't' && opt != 's') die("git cat-file --allow-unknown-type: use with -s or -t"); - return cat_one_file(opt, exp_type, obj_name, unknown_type); + + ret = cat_one_file(opt, exp_type, obj_name, unknown_type); + +out: + list_objects_filter_release(&batch.objects_filter); + return ret; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4764aa1b8c..3973267e9e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1820,7 +1820,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, static int add_object_entry_from_bitmap(const struct object_id *oid, enum object_type type, int flags UNUSED, uint32_t name_hash, - struct packed_git *pack, off_t offset) + struct packed_git *pack, off_t offset, + void *payload UNUSED) { display_progress(progress_state, ++nr_seen); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e6ee3f82ee..4a84f18f9e 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -461,7 +461,8 @@ static int show_object_fast( int exclude UNUSED, uint32_t name_hash UNUSED, struct packed_git *found_pack UNUSED, - off_t found_offset UNUSED) + off_t found_offset UNUSED, + void *payload UNUSED) { fprintf(stdout, "%s\n", oid_to_hex(oid)); return 1; |
