#include "git-compat-util.h" #include "bloom.h" #include "builtin.h" #include "commit-graph.h" #include "commit-slab.h" #include "commit.h" #include "config.h" #include "diff.h" #include "diffcore.h" #include "environment.h" #include "ewah/ewok.h" #include "hashmap.h" #include "hex.h" #include "object-name.h" #include "object.h" #include "parse-options.h" #include "prio-queue.h" #include "quote.h" #include "repository.h" #include "revision.h" /* Remember to update object flag allocation in object.h */ #define PARENT1 (1u<<16) /* used instead of SEEN */ #define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */ struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; struct bloom_key key; size_t diff_idx; const char path[FLEX_ARRAY]; }; static int last_modified_entry_hashcmp(const void *unused UNUSED, const struct hashmap_entry *hent1, const struct hashmap_entry *hent2, const void *path) { const struct last_modified_entry *ent1 = container_of(hent1, const struct last_modified_entry, hashent); const struct last_modified_entry *ent2 = container_of(hent2, const struct last_modified_entry, hashent); return strcmp(ent1->path, path ? path : ent2->path); } /* * Hold a bitmap for each commit we're working with. In the bitmap, each bit * represents a path in `lm->all_paths`. An active bit indicates the path still * needs to be associated to a commit. */ define_commit_slab(active_paths_for_commit, struct bitmap *); struct last_modified { struct hashmap paths; struct rev_info rev; bool recursive; bool show_trees; const char **all_paths; size_t all_paths_nr; struct active_paths_for_commit active_paths; /* 'scratch' to avoid allocating a bitmap every process_parent() */ struct bitmap *scratch; }; static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c) { struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); if (!*bitmap) *bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1); return *bitmap; } static void active_paths_free(struct last_modified *lm, struct commit *c) { struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); if (*bitmap) { bitmap_free(*bitmap); *bitmap = NULL; } } static void last_modified_release(struct last_modified *lm) { struct hashmap_iter iter; struct last_modified_entry *ent; hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) bloom_key_clear(&ent->key); hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); free(lm->all_paths); } struct last_modified_callback_data { struct last_modified *lm; struct commit *commit; }; static void add_path_from_diff(struct diff_queue_struct *q, struct diff_options *opt UNUSED, void *data) { struct last_modified *lm = data; for (int i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; struct last_modified_entry *ent; const char *path = p->two->path; FLEX_ALLOC_STR(ent, path, path); oidcpy(&ent->oid, &p->two->oid); if (lm->rev.bloom_filter_settings) bloom_key_fill(&ent->key, path, strlen(path), lm->rev.bloom_filter_settings); hashmap_entry_init(&ent->hashent, strhash(ent->path)); hashmap_add(&lm->paths, &ent->hashent); } } static int populate_paths_from_revs(struct last_modified *lm) { int num_interesting = 0; struct diff_options diffopt; /* * Create a copy of `struct diff_options`. In this copy a callback is * set that when called adds entries to `paths` in `struct last_modified`. * This copy is used to diff the tree of the target revision against an * empty tree. This results in all paths in the target revision being * listed. After `paths` is populated, we don't need this copy no more. */ memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); diffopt.output_format = DIFF_FORMAT_CALLBACK; diffopt.format_callback = add_path_from_diff; diffopt.format_callback_data = lm; for (size_t i = 0; i < lm->rev.pending.nr; i++) { struct object_array_entry *obj = lm->rev.pending.objects + i; if (obj->item->flags & UNINTERESTING) continue; if (num_interesting++) return error(_("last-modified can only operate on one tree at a time")); diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, &obj->item->oid, "", &diffopt); diff_flush(&diffopt); } clear_pathspec(&diffopt.pathspec); return 0; } static void last_modified_emit(struct last_modified *lm, const char *path, const struct commit *commit) { if (commit->object.flags & BOUNDARY) putchar('^'); printf("%s\t", oid_to_hex(&commit->object.oid)); if (lm->rev.diffopt.line_termination) write_name_quoted(path, stdout, '\n'); else printf("%s%c", path, '\0'); } static void mark_path(const char *path, const struct object_id *oid, struct last_modified_callback_data *data) { struct last_modified_entry *ent; /* Is it even a path that we are interested in? */ ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, struct last_modified_entry, hashent); if (!ent) return; /* * Is it arriving at a version of interest, or is it from a side branch * which did not contribute to the final state? */ if (oid && !oideq(oid, &ent->oid)) return; last_modified_emit(data->lm, path, data->commit); hashmap_remove(&data->lm->paths, &ent->hashent, path); bloom_key_clear(&ent->key); free(ent); } static void last_modified_diff(struct diff_queue_struct *q, struct diff_options *opt UNUSED, void *cbdata) { struct last_modified_callback_data *data = cbdata; for (int i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; switch (p->status) { case DIFF_STATUS_DELETED: /* * There's no point in feeding a deletion, as it could * not have resulted in our current state, which * actually has the file. */ break; default: /* * Otherwise, we care only that we somehow arrived at * a final oid state. Note that this covers some * potentially controversial areas, including: * * 1. A rename or copy will be found, as it is the * first time the content has arrived at the given * path. * * 2. Even a non-content modification like a mode or * type change will trigger it. * * We take the inclusive approach for now, and find * anything which impacts the path. Options to tweak * the behavior (e.g., to "--follow" the content across * renames) can come later. */ mark_path(p->two->path, &p->two->oid, data); break; } } } static void pass_to_parent(struct bitmap *c, struct bitmap *p, size_t pos) { bitmap_unset(c, pos); bitmap_set(p, pos); } static bool maybe_changed_path(struct last_modified *lm, struct commit *origin, struct bitmap *active) { struct bloom_filter *filter; struct last_modified_entry *ent; struct hashmap_iter iter; if (!lm->rev.bloom_filter_settings) return true; if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY) return true; filter = get_bloom_filter(lm->rev.repo, origin); if (!filter) return true; hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { if (active && !bitmap_get(active, ent->diff_idx)) continue; if (bloom_filter_contains(filter, &ent->key, lm->rev.bloom_filter_settings)) return true; } return false; } static void process_parent(struct last_modified *lm, struct prio_queue *queue, struct commit *c, struct bitmap *active_c, struct commit *parent, int parent_i) { struct bitmap *active_p; repo_parse_commit(lm->rev.repo, parent); active_p = active_paths_for(lm, parent); /* * The first time entering this function for this commit (i.e. first parent) * see if Bloom filters will tell us it's worth to do the diff. */ if (parent_i || maybe_changed_path(lm, c, active_c)) { diff_tree_oid(&parent->object.oid, &c->object.oid, "", &lm->rev.diffopt); diffcore_std(&lm->rev.diffopt); } /* * Test each path for TREESAME-ness against the parent. If a path is * TREESAME, pass it on to this parent. * * First, collect all paths that are *not* TREESAME in 'scratch'. * Then, pass paths that *are* TREESAME and active to the parent. */ for (int i = 0; i < diff_queued_diff.nr; i++) { struct diff_filepair *fp = diff_queued_diff.queue[i]; const char *path = fp->two->path; struct last_modified_entry *ent = hashmap_get_entry_from_hash(&lm->paths, strhash(path), path, struct last_modified_entry, hashent); if (ent) { size_t k = ent->diff_idx; if (bitmap_get(active_c, k)) bitmap_set(lm->scratch, k); } } for (size_t i = 0; i < lm->all_paths_nr; i++) { if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i)) pass_to_parent(active_c, active_p, i); } /* * If parent has any active paths, put it on the queue (if not already). */ if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) { parent->object.flags |= PARENT1; prio_queue_put(queue, parent); } if (!(parent->object.flags & PARENT1)) active_paths_free(lm, parent); memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); diff_queue_clear(&diff_queued_diff); } static int last_modified_run(struct last_modified *lm) { int max_count, queue_popped = 0; struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *list; struct last_modified_callback_data data = { .lm = lm }; lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; lm->rev.diffopt.format_callback = last_modified_diff; lm->rev.diffopt.format_callback_data = &data; lm->rev.no_walk = 1; prepare_revision_walk(&lm->rev); max_count = lm->rev.max_count; init_active_paths_for_commit(&lm->active_paths); lm->scratch = bitmap_word_alloc(lm->all_paths_nr); /* * lm->rev.commits holds the set of boundary commits for our walk. * * Loop through each such commit, and place it in the appropriate queue. */ for (list = lm->rev.commits; list; list = list->next) { struct commit *c = list->item; if (c->object.flags & BOTTOM) { prio_queue_put(¬_queue, c); c->object.flags |= PARENT2; } else if (!(c->object.flags & PARENT1)) { /* * If the commit is a starting point (and hasn't been * seen yet), then initialize the set of interesting * paths, too. */ struct bitmap *active; prio_queue_put(&queue, c); c->object.flags |= PARENT1; active = active_paths_for(lm, c); for (size_t i = 0; i < lm->all_paths_nr; i++) bitmap_set(active, i); } } while (queue.nr) { int parent_i; struct commit_list *p; struct commit *c = prio_queue_get(&queue); struct bitmap *active_c = active_paths_for(lm, c); if ((0 <= max_count && max_count < ++queue_popped) || (c->object.flags & PARENT2)) { /* * Either a boundary commit, or we have already seen too * many others. Either way, stop here. */ c->object.flags |= PARENT2 | BOUNDARY; data.commit = c; diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, &c->object.oid, "", &lm->rev.diffopt); diff_flush(&lm->rev.diffopt); goto cleanup; } /* * Otherwise, make sure that 'c' isn't reachable from anything * in the '--not' queue. */ repo_parse_commit(lm->rev.repo, c); while (not_queue.nr) { struct commit_list *np; struct commit *n = prio_queue_get(¬_queue); repo_parse_commit(lm->rev.repo, n); for (np = n->parents; np; np = np->next) { if (!(np->item->object.flags & PARENT2)) { prio_queue_put(¬_queue, np->item); np->item->object.flags |= PARENT2; } } if (commit_graph_generation(n) < commit_graph_generation(c)) break; } /* * Look at each parent and pass on each path that's TREESAME * with that parent. Stop early when no active paths remain. */ for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) { process_parent(lm, &queue, c, active_c, p->item, parent_i); if (bitmap_is_empty(active_c)) break; } /* * Paths that remain active, or not TREESAME with any parent, * were changed by 'c'. */ if (!bitmap_is_empty(active_c)) { data.commit = c; for (size_t i = 0; i < lm->all_paths_nr; i++) { if (bitmap_get(active_c, i)) mark_path(lm->all_paths[i], NULL, &data); } } cleanup: active_paths_free(lm, c); } if (hashmap_get_size(&lm->paths)) BUG("paths remaining beyond boundary in last-modified"); clear_prio_queue(¬_queue); clear_prio_queue(&queue); clear_active_paths_for_commit(&lm->active_paths); bitmap_free(lm->scratch); return 0; } static int last_modified_init(struct last_modified *lm, struct repository *r, const char *prefix, int argc, const char **argv) { struct hashmap_iter iter; struct last_modified_entry *ent; hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); repo_init_revisions(r, &lm->rev, prefix); lm->rev.def = "HEAD"; lm->rev.combine_merges = 1; lm->rev.show_root_diff = 1; lm->rev.boundary = 1; lm->rev.no_commit_id = 1; lm->rev.diff = 1; lm->rev.diffopt.flags.no_recursive_diff_tree_combined = 1; lm->rev.diffopt.flags.recursive = lm->recursive; lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; argc = setup_revisions(argc, argv, &lm->rev, NULL); if (argc > 1) { error(_("unknown last-modified argument: %s"), argv[1]); return argc; } lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified")); CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths)); lm->all_paths_nr = 0; hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { ent->diff_idx = lm->all_paths_nr++; lm->all_paths[ent->diff_idx] = ent->path; } return 0; } int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo) { int ret; struct last_modified lm = { 0 }; const char * const last_modified_usage[] = { N_("git last-modified [--recursive] [--show-trees] " "[] [[--] ...]"), NULL }; struct option last_modified_options[] = { OPT_BOOL('r', "recursive", &lm.recursive, N_("recurse into subtrees")), OPT_BOOL('t', "show-trees", &lm.show_trees, N_("show tree entries when recursing into subtrees")), OPT_END() }; argc = parse_options(argc, argv, prefix, last_modified_options, last_modified_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); repo_config(repo, git_default_config, NULL); ret = last_modified_init(&lm, repo, prefix, argc, argv); if (ret > 0) usage_with_options(last_modified_usage, last_modified_options); if (ret) goto out; ret = last_modified_run(&lm); if (ret) goto out; out: last_modified_release(&lm); return ret; }