aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2025-04-16 13:54:20 -0700
committerJunio C Hamano <gitster@pobox.com>2025-04-16 13:54:21 -0700
commita271b05066a1fd2c3a62508d9908d6c5df14a1cb (patch)
tree57e55fd6b395f8e5da4e23e1344c55d0d38ed80c
parent9bdd7ecf7ec90433fc1803bf5d608d08857b3b49 (diff)
parent8002e8ee1829f0c727aa2f7d9c18ad706cb63565 (diff)
downloadgit-a271b05066a1fd2c3a62508d9908d6c5df14a1cb.tar.gz
Merge branch 'ps/cat-file-filter-batch'
"git cat-file --batch" and friends learned to allow "--filter=" to omit certain objects, just like the transport layer does. * ps/cat-file-filter-batch: builtin/cat-file: use bitmaps to efficiently filter by object type builtin/cat-file: deduplicate logic to iterate over all objects pack-bitmap: introduce function to check whether a pack is bitmapped pack-bitmap: add function to iterate over filtered bitmapped objects pack-bitmap: allow passing payloads to `show_reachable_fn()` builtin/cat-file: support "object:type=" objects filter builtin/cat-file: support "blob:limit=" objects filter builtin/cat-file: support "blob:none" objects filter builtin/cat-file: wire up an option to filter objects builtin/cat-file: introduce function to report object status builtin/cat-file: rename variable that tracks usage
-rw-r--r--Documentation/git-cat-file.adoc26
-rw-r--r--builtin/cat-file.c256
-rw-r--r--builtin/pack-objects.c3
-rw-r--r--builtin/rev-list.c3
-rw-r--r--pack-bitmap.c81
-rw-r--r--pack-bitmap.h22
-rw-r--r--reachable.c3
-rwxr-xr-xt/t1006-cat-file.sh99
8 files changed, 411 insertions, 82 deletions
diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc
index 30359f5dbd..fc4b92f104 100644
--- a/Documentation/git-cat-file.adoc
+++ b/Documentation/git-cat-file.adoc
@@ -81,6 +81,25 @@ OPTIONS
end-of-line conversion, etc). In this case, `<object>` has to be of
the form `<tree-ish>:<path>`, or `:<path>`.
+--filter=<filter-spec>::
+--no-filter::
+ Omit objects from the list of printed objects. This can only be used in
+ combination with one of the batched modes. Excluded objects that have
+ been explicitly requested via any of the batch modes that read objects
+ via standard input (`--batch`, `--batch-check`) will be reported as
+ "filtered". Excluded objects in `--batch-all-objects` mode will not be
+ printed at all. The '<filter-spec>' may be one of the following:
++
+The form '--filter=blob:none' omits all blobs.
++
+The form '--filter=blob:limit=<n>[kmg]' omits blobs of size at least n
+bytes or units. n may be zero. The suffixes k, m, and g can be used to name
+units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as
+'blob:limit=1024'.
++
+The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects which
+are not of the requested type.
+
--path=<path>::
For use with `--textconv` or `--filters`, to allow specifying an object
name and a path separately, e.g. when it is difficult to figure out
@@ -340,6 +359,13 @@ the repository, then `cat-file` will ignore any custom format and print:
<object> SP missing LF
------------
+If a name is specified on stdin that is filtered out via `--filter=`,
+then `cat-file` will ignore any custom format and print:
+
+------------
+<object> SP excluded LF
+------------
+
If a name is specified that might refer to more than one object (an ambiguous short sha), then `cat-file` will ignore any custom format and print:
------------
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index b13561cf73..ead7554a57 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -15,11 +15,13 @@
#include "gettext.h"
#include "hex.h"
#include "ident.h"
+#include "list-objects-filter-options.h"
#include "parse-options.h"
#include "userdiff.h"
#include "streaming.h"
#include "oid-array.h"
#include "packfile.h"
+#include "pack-bitmap.h"
#include "object-file.h"
#include "object-name.h"
#include "object-store-ll.h"
@@ -35,6 +37,7 @@ enum batch_mode {
};
struct batch_options {
+ struct list_objects_filter_options objects_filter;
int enabled;
int follow_symlinks;
enum batch_mode batch_mode;
@@ -455,6 +458,16 @@ static void print_default_format(struct strbuf *scratch, struct expand_data *dat
(uintmax_t)data->size, opt->output_delim);
}
+static void report_object_status(struct batch_options *opt,
+ const char *obj_name,
+ const struct object_id *oid,
+ const char *status)
+{
+ printf("%s %s%c", obj_name ? obj_name : oid_to_hex(oid),
+ status, opt->output_delim);
+ fflush(stdout);
+}
+
/*
* If "pack" is non-NULL, then "offset" is the byte offset within the pack from
* which the object may be accessed (though note that we may also rely on
@@ -470,8 +483,13 @@ static void batch_object_write(const char *obj_name,
if (!data->skip_object_info) {
int ret;
- if (use_mailmap)
+ if (use_mailmap ||
+ opt->objects_filter.choice == LOFC_BLOB_NONE ||
+ opt->objects_filter.choice == LOFC_BLOB_LIMIT ||
+ opt->objects_filter.choice == LOFC_OBJECT_TYPE)
data->info.typep = &data->type;
+ if (opt->objects_filter.choice == LOFC_BLOB_LIMIT)
+ data->info.sizep = &data->size;
if (pack)
ret = packed_object_info(the_repository, pack, offset,
@@ -481,12 +499,42 @@ static void batch_object_write(const char *obj_name,
&data->oid, &data->info,
OBJECT_INFO_LOOKUP_REPLACE);
if (ret < 0) {
- printf("%s missing%c",
- obj_name ? obj_name : oid_to_hex(&data->oid), opt->output_delim);
- fflush(stdout);
+ report_object_status(opt, obj_name, &data->oid, "missing");
return;
}
+ switch (opt->objects_filter.choice) {
+ case LOFC_DISABLED:
+ break;
+ case LOFC_BLOB_NONE:
+ if (data->type == OBJ_BLOB) {
+ if (!opt->all_objects)
+ report_object_status(opt, obj_name,
+ &data->oid, "excluded");
+ return;
+ }
+ break;
+ case LOFC_BLOB_LIMIT:
+ if (data->type == OBJ_BLOB &&
+ data->size >= opt->objects_filter.blob_limit_value) {
+ if (!opt->all_objects)
+ report_object_status(opt, obj_name,
+ &data->oid, "excluded");
+ return;
+ }
+ break;
+ case LOFC_OBJECT_TYPE:
+ if (data->type != opt->objects_filter.object_type) {
+ if (!opt->all_objects)
+ report_object_status(opt, obj_name,
+ &data->oid, "excluded");
+ return;
+ }
+ break;
+ default:
+ BUG("unsupported objects filter");
+ }
+
if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) {
size_t s = data->size;
char *buf = NULL;
@@ -535,10 +583,10 @@ static void batch_one_object(const char *obj_name,
if (result != FOUND) {
switch (result) {
case MISSING_OBJECT:
- printf("%s missing%c", obj_name, opt->output_delim);
+ report_object_status(opt, obj_name, &data->oid, "missing");
break;
case SHORT_NAME_AMBIGUOUS:
- printf("%s ambiguous%c", obj_name, opt->output_delim);
+ report_object_status(opt, obj_name, &data->oid, "ambiguous");
break;
case DANGLING_SYMLINK:
printf("dangling %"PRIuMAX"%c%s%c",
@@ -595,25 +643,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata)
return 0;
}
-static int collect_loose_object(const struct object_id *oid,
- const char *path UNUSED,
- void *data)
-{
- oid_array_append(data, oid);
- return 0;
-}
-
-static int collect_packed_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data)
+static int collect_object(const struct object_id *oid,
+ struct packed_git *pack UNUSED,
+ off_t offset UNUSED,
+ void *data)
{
oid_array_append(data, oid);
return 0;
}
static int batch_unordered_object(const struct object_id *oid,
- struct packed_git *pack, off_t offset,
+ struct packed_git *pack,
+ off_t offset,
void *vdata)
{
struct object_cb_data *data = vdata;
@@ -627,23 +668,6 @@ static int batch_unordered_object(const struct object_id *oid,
return 0;
}
-static int batch_unordered_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *data)
-{
- return batch_unordered_object(oid, NULL, 0, data);
-}
-
-static int batch_unordered_packed(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data)
-{
- return batch_unordered_object(oid, pack,
- nth_packed_object_offset(pack, pos),
- data);
-}
-
typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
struct strbuf *, struct expand_data *);
@@ -776,6 +800,76 @@ static void batch_objects_command(struct batch_options *opt,
#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
+typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack,
+ off_t offset, void *data);
+
+struct for_each_object_payload {
+ for_each_object_fn callback;
+ void *payload;
+};
+
+static int batch_one_object_loose(const struct object_id *oid,
+ const char *path UNUSED,
+ void *_payload)
+{
+ struct for_each_object_payload *payload = _payload;
+ return payload->callback(oid, NULL, 0, payload->payload);
+}
+
+static int batch_one_object_packed(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *_payload)
+{
+ struct for_each_object_payload *payload = _payload;
+ return payload->callback(oid, pack, nth_packed_object_offset(pack, pos),
+ payload->payload);
+}
+
+static int batch_one_object_bitmapped(const struct object_id *oid,
+ enum object_type type UNUSED,
+ int flags UNUSED,
+ uint32_t hash UNUSED,
+ struct packed_git *pack,
+ off_t offset,
+ void *_payload)
+{
+ struct for_each_object_payload *payload = _payload;
+ return payload->callback(oid, pack, offset, payload->payload);
+}
+
+static void batch_each_object(struct batch_options *opt,
+ for_each_object_fn callback,
+ unsigned flags,
+ void *_payload)
+{
+ struct for_each_object_payload payload = {
+ .callback = callback,
+ .payload = _payload,
+ };
+ struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+
+ for_each_loose_object(batch_one_object_loose, &payload, 0);
+
+ if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
+ batch_one_object_bitmapped, &payload)) {
+ struct packed_git *pack;
+
+ for (pack = get_all_packs(the_repository); pack; pack = pack->next) {
+ if (bitmap_index_contains_pack(bitmap, pack) ||
+ open_pack_index(pack))
+ continue;
+ for_each_object_in_pack(pack, batch_one_object_packed,
+ &payload, flags);
+ }
+ } else {
+ for_each_packed_object(the_repository, batch_one_object_packed,
+ &payload, flags);
+ }
+
+ free_bitmap_index(bitmap);
+}
+
static int batch_objects(struct batch_options *opt)
{
struct strbuf input = STRBUF_INIT;
@@ -812,7 +906,8 @@ static int batch_objects(struct batch_options *opt)
struct object_cb_data cb;
struct object_info empty = OBJECT_INFO_INIT;
- if (!memcmp(&data.info, &empty, sizeof(empty)))
+ if (!memcmp(&data.info, &empty, sizeof(empty)) &&
+ opt->objects_filter.choice == LOFC_DISABLED)
data.skip_object_info = 1;
if (repo_has_promisor_remote(the_repository))
@@ -829,18 +924,14 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
- for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(the_repository, batch_unordered_packed,
- &cb, FOR_EACH_OBJECT_PACK_ORDER);
+ batch_each_object(opt, batch_unordered_object,
+ FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
- for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(the_repository, collect_packed_object,
- &sa, 0);
-
+ batch_each_object(opt, collect_object, 0, &sa);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
oid_array_clear(&sa);
@@ -936,12 +1027,15 @@ int cmd_cat_file(int argc,
int opt_cw = 0;
int opt_epts = 0;
const char *exp_type = NULL, *obj_name = NULL;
- struct batch_options batch = {0};
+ struct batch_options batch = {
+ .objects_filter = LIST_OBJECTS_FILTER_INIT,
+ };
int unknown_type = 0;
int input_nul_terminated = 0;
int nul_terminated = 0;
+ int ret;
- const char * const usage[] = {
+ const char * const builtin_catfile_usage[] = {
N_("git cat-file <type> <object>"),
N_("git cat-file (-e | -p) <object>"),
N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"),
@@ -1000,6 +1094,7 @@ int cmd_cat_file(int argc,
N_("run filters on object's content"), 'w'),
OPT_STRING(0, "path", &force_path, N_("blob|tree"),
N_("use a <path> for (--textconv | --filters); Not with 'batch'")),
+ OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter),
OPT_END()
};
@@ -1007,13 +1102,27 @@ int cmd_cat_file(int argc,
batch.buffer_output = -1;
- argc = parse_options(argc, argv, prefix, options, usage, 0);
+ argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0);
opt_cw = (opt == 'c' || opt == 'w');
opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's');
if (use_mailmap)
read_mailmap(&mailmap);
+ switch (batch.objects_filter.choice) {
+ case LOFC_DISABLED:
+ break;
+ case LOFC_BLOB_NONE:
+ case LOFC_BLOB_LIMIT:
+ case LOFC_OBJECT_TYPE:
+ if (!batch.enabled)
+ usage(_("objects filter only supported in batch mode"));
+ break;
+ default:
+ usagef(_("objects filter not supported: '%s'"),
+ list_object_filter_config_name(batch.objects_filter.choice));
+ }
+
/* --batch-all-objects? */
if (opt == 'b')
batch.all_objects = 1;
@@ -1021,7 +1130,7 @@ int cmd_cat_file(int argc,
/* Option compatibility */
if (force_path && !opt_cw)
usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"),
- usage, options,
+ builtin_catfile_usage, options,
"--path", _("path|tree-ish"), "--filters",
"--textconv");
@@ -1029,20 +1138,20 @@ int cmd_cat_file(int argc,
if (batch.enabled)
;
else if (batch.follow_symlinks)
- usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
- "--follow-symlinks");
+ usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
+ options, "--follow-symlinks");
else if (batch.buffer_output >= 0)
- usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
- "--buffer");
+ usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
+ options, "--buffer");
else if (batch.all_objects)
- usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
- "--batch-all-objects");
+ usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
+ options, "--batch-all-objects");
else if (input_nul_terminated)
- usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
- "-z");
+ usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
+ options, "-z");
else if (nul_terminated)
- usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
- "-Z");
+ usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage,
+ options, "-Z");
batch.input_delim = batch.output_delim = '\n';
if (input_nul_terminated)
@@ -1063,33 +1172,37 @@ int cmd_cat_file(int argc,
batch.transform_mode = opt;
else if (opt && opt != 'b')
usage_msg_optf(_("'-%c' is incompatible with batch mode"),
- usage, options, opt);
+ builtin_catfile_usage, options, opt);
else if (argc)
- usage_msg_opt(_("batch modes take no arguments"), usage,
- options);
+ usage_msg_opt(_("batch modes take no arguments"),
+ builtin_catfile_usage, options);
- return batch_objects(&batch);
+ ret = batch_objects(&batch);
+ goto out;
}
if (opt) {
if (!argc && opt == 'c')
usage_msg_optf(_("<rev> required with '%s'"),
- usage, options, "--textconv");
+ builtin_catfile_usage, options,
+ "--textconv");
else if (!argc && opt == 'w')
usage_msg_optf(_("<rev> required with '%s'"),
- usage, options, "--filters");
+ builtin_catfile_usage, options,
+ "--filters");
else if (!argc && opt_epts)
usage_msg_optf(_("<object> required with '-%c'"),
- usage, options, opt);
+ builtin_catfile_usage, options, opt);
else if (argc == 1)
obj_name = argv[0];
else
- usage_msg_opt(_("too many arguments"), usage, options);
+ usage_msg_opt(_("too many arguments"), builtin_catfile_usage,
+ options);
} else if (!argc) {
- usage_with_options(usage, options);
+ usage_with_options(builtin_catfile_usage, options);
} else if (argc != 2) {
usage_msg_optf(_("only two arguments allowed in <type> <object> mode, not %d"),
- usage, options, argc);
+ builtin_catfile_usage, options, argc);
} else if (argc) {
exp_type = argv[0];
obj_name = argv[1];
@@ -1097,5 +1210,10 @@ int cmd_cat_file(int argc,
if (unknown_type && opt != 't' && opt != 's')
die("git cat-file --allow-unknown-type: use with -s or -t");
- return cat_one_file(opt, exp_type, obj_name, unknown_type);
+
+ ret = cat_one_file(opt, exp_type, obj_name, unknown_type);
+
+out:
+ list_objects_filter_release(&batch.objects_filter);
+ return ret;
}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4764aa1b8c..3973267e9e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1820,7 +1820,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
static int add_object_entry_from_bitmap(const struct object_id *oid,
enum object_type type,
int flags UNUSED, uint32_t name_hash,
- struct packed_git *pack, off_t offset)
+ struct packed_git *pack, off_t offset,
+ void *payload UNUSED)
{
display_progress(progress_state, ++nr_seen);
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index e6ee3f82ee..4a84f18f9e 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -461,7 +461,8 @@ static int show_object_fast(
int exclude UNUSED,
uint32_t name_hash UNUSED,
struct packed_git *found_pack UNUSED,
- off_t found_offset UNUSED)
+ off_t found_offset UNUSED,
+ void *payload UNUSED)
{
fprintf(stdout, "%s\n", oid_to_hex(oid));
return 1;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index aa67cb40ab..5299f49d59 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -745,6 +745,21 @@ struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
return NULL;
}
+int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack)
+{
+ for (; bitmap; bitmap = bitmap->base) {
+ if (bitmap_is_midx(bitmap)) {
+ for (size_t i = 0; i < bitmap->midx->num_packs; i++)
+ if (bitmap->midx->packs[i] == pack)
+ return 1;
+ } else if (bitmap->pack == pack) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
struct include_data {
struct bitmap_index *bitmap_git;
struct bitmap *base;
@@ -1625,7 +1640,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
(obj->type == OBJ_TAG && !revs->tag_objects))
continue;
- show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0);
+ show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL);
}
}
@@ -1662,8 +1677,10 @@ static void init_type_iterator(struct ewah_or_iterator *it,
static void show_objects_for_type(
struct bitmap_index *bitmap_git,
+ struct bitmap *objects,
enum object_type object_type,
- show_reachable_fn show_reach)
+ show_reachable_fn show_reach,
+ void *payload)
{
size_t i = 0;
uint32_t offset;
@@ -1671,8 +1688,6 @@ static void show_objects_for_type(
struct ewah_or_iterator it;
eword_t filter;
- struct bitmap *objects = bitmap_git->result;
-
init_type_iterator(&it, bitmap_git, object_type);
for (i = 0; i < objects->word_alloc &&
@@ -1715,7 +1730,7 @@ static void show_objects_for_type(
if (bitmap_git->hashes)
hash = get_be32(bitmap_git->hashes + index_pos);
- show_reach(&oid, object_type, 0, hash, pack, ofs);
+ show_reach(&oid, object_type, 0, hash, pack, ofs, payload);
}
}
@@ -2024,6 +2039,50 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
}
}
+int for_each_bitmapped_object(struct bitmap_index *bitmap_git,
+ struct list_objects_filter_options *filter,
+ show_reachable_fn show_reach,
+ void *payload)
+{
+ struct bitmap *filtered_bitmap = NULL;
+ uint32_t objects_nr;
+ size_t full_word_count;
+ int ret;
+
+ if (!can_filter_bitmap(filter)) {
+ ret = -1;
+ goto out;
+ }
+
+ objects_nr = bitmap_num_objects(bitmap_git);
+ full_word_count = objects_nr / BITS_IN_EWORD;
+
+ /* We start from the all-1 bitmap and then filter down from there. */
+ filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD));
+ memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words));
+ for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++)
+ bitmap_set(filtered_bitmap, i);
+
+ if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ show_objects_for_type(bitmap_git, filtered_bitmap,
+ OBJ_COMMIT, show_reach, payload);
+ show_objects_for_type(bitmap_git, filtered_bitmap,
+ OBJ_TREE, show_reach, payload);
+ show_objects_for_type(bitmap_git, filtered_bitmap,
+ OBJ_BLOB, show_reach, payload);
+ show_objects_for_type(bitmap_git, filtered_bitmap,
+ OBJ_TAG, show_reach, payload);
+
+ ret = 0;
+out:
+ bitmap_free(filtered_bitmap);
+ return ret;
+}
+
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects)
{
@@ -2518,13 +2577,17 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,
{
assert(bitmap_git->result);
- show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable);
+ show_objects_for_type(bitmap_git, bitmap_git->result,
+ OBJ_COMMIT, show_reachable, NULL);
if (revs->tree_objects)
- show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable);
+ show_objects_for_type(bitmap_git, bitmap_git->result,
+ OBJ_TREE, show_reachable, NULL);
if (revs->blob_objects)
- show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable);
+ show_objects_for_type(bitmap_git, bitmap_git->result,
+ OBJ_BLOB, show_reachable, NULL);
if (revs->tag_objects)
- show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable);
+ show_objects_for_type(bitmap_git, bitmap_git->result,
+ OBJ_TAG, show_reachable, NULL);
show_extended_objects(bitmap_git, revs, show_reachable);
}
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 0e9d25e6f2..382d39499a 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)(
int flags,
uint32_t hash,
struct packed_git *found_pack,
- off_t found_offset);
+ off_t found_offset,
+ void *payload);
struct bitmap_index;
@@ -66,6 +67,13 @@ struct bitmapped_pack {
struct bitmap_index *prepare_bitmap_git(struct repository *r);
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx);
+
+/*
+ * Given a bitmap index, determine whether it contains the pack either directly
+ * or via the multi-pack-index.
+ */
+int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack);
+
void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits,
uint32_t *trees, uint32_t *blobs, uint32_t *tags);
void traverse_bitmap_commit_list(struct bitmap_index *,
@@ -78,6 +86,18 @@ int test_bitmap_pseudo_merges(struct repository *r);
int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n);
int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n);
+struct list_objects_filter_options;
+
+/*
+ * Filter bitmapped objects and iterate through all resulting objects,
+ * executing `show_reach` for each of them. Returns `-1` in case the filter is
+ * not supported, `0` otherwise.
+ */
+int for_each_bitmapped_object(struct bitmap_index *bitmap_git,
+ struct list_objects_filter_options *filter,
+ show_reachable_fn show_reach,
+ void *payload);
+
#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
diff --git a/reachable.c b/reachable.c
index 1b26b9b1d7..299e129249 100644
--- a/reachable.c
+++ b/reachable.c
@@ -341,7 +341,8 @@ static int mark_object_seen(const struct object_id *oid,
int exclude UNUSED,
uint32_t name_hash UNUSED,
struct packed_git *found_pack UNUSED,
- off_t found_offset UNUSED)
+ off_t found_offset UNUSED,
+ void *payload UNUSED)
{
struct object *obj = lookup_object_by_type(the_repository, oid, type);
if (!obj)
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index aa64678c64..ce8b27bf54 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -1410,4 +1410,103 @@ test_expect_success PERL_IPC_OPEN2 '--batch-command info is unbuffered by defaul
perl -e "$perl_script" -- --batch-command $hello_oid "$expect" "info "
'
+test_expect_success 'setup for objects filter' '
+ git init repo &&
+ (
+ # Seed the repository with four different sets of objects:
+ #
+ # - The first set is fully packed and has a bitmap.
+ # - The second set is packed, but has no bitmap.
+ # - The third set is loose.
+ # - The fourth set is loose and contains big objects.
+ #
+ # This ensures that we cover all these types as expected.
+ cd repo &&
+ test_commit first &&
+ git repack -Adb &&
+ test_commit second &&
+ git repack -d &&
+ test_commit third &&
+
+ for n in 1000 10000
+ do
+ printf "%"$n"s" X >large.$n || return 1
+ done &&
+ git add large.* &&
+ git commit -m fourth
+ )
+'
+
+test_expect_success 'objects filter with unknown option' '
+ cat >expect <<-EOF &&
+ fatal: invalid filter-spec ${SQ}unknown${SQ}
+ EOF
+ test_must_fail git -C repo cat-file --filter=unknown 2>err &&
+ test_cmp expect err
+'
+
+for option in sparse:oid=1234 tree:1 sparse:path=x
+do
+ test_expect_success "objects filter with unsupported option $option" '
+ case "$option" in
+ tree:1)
+ echo "usage: objects filter not supported: ${SQ}tree${SQ}" >expect
+ ;;
+ sparse:path=x)
+ echo "fatal: sparse:path filters support has been dropped" >expect
+ ;;
+ *)
+ option_name=$(echo "$option" | cut -d= -f1) &&
+ printf "usage: objects filter not supported: ${SQ}%s${SQ}\n" "$option_name" >expect
+ ;;
+ esac &&
+ test_must_fail git -C repo cat-file --filter=$option 2>err &&
+ test_cmp expect err
+ '
+done
+
+test_expect_success 'objects filter: disabled' '
+ git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --no-filter >actual &&
+ sort actual >actual.sorted &&
+ git -C repo rev-list --objects --no-object-names --all >expect &&
+ sort expect >expect.sorted &&
+ test_cmp expect.sorted actual.sorted
+'
+
+test_objects_filter () {
+ filter="$1"
+
+ test_expect_success "objects filter: $filter" '
+ git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --filter="$filter" >actual &&
+ sort actual >actual.sorted &&
+ git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >expect &&
+ sort expect >expect.sorted &&
+ test_cmp expect.sorted actual.sorted
+ '
+
+ test_expect_success "objects filter prints excluded objects: $filter" '
+ # Find all objects that would be excluded by the current filter.
+ git -C repo rev-list --objects --no-object-names --all >all &&
+ git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >filtered &&
+ sort all >all.sorted &&
+ sort filtered >filtered.sorted &&
+ comm -23 all.sorted filtered.sorted >expected.excluded &&
+ test_line_count -gt 0 expected.excluded &&
+
+ git -C repo cat-file --batch-check="%(objectname)" --filter="$filter" <expected.excluded >actual &&
+ awk "/excluded/{ print \$1 }" actual | sort >actual.excluded &&
+ test_cmp expected.excluded actual.excluded
+ '
+}
+
+test_objects_filter "blob:none"
+test_objects_filter "blob:limit=1"
+test_objects_filter "blob:limit=500"
+test_objects_filter "blob:limit=1000"
+test_objects_filter "blob:limit=1k"
+test_objects_filter "object:type=blob"
+test_objects_filter "object:type=commit"
+test_objects_filter "object:type=tag"
+test_objects_filter "object:type=tree"
+
test_done