top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = ilist.o binaryheap.o hyperloglog.o pairingheap.o rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
+ rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * bipartite_match.c
+ * Hopcroft-Karp maximum cardinality algorithm for bipartite graphs
+ *
+ * This implementation is based on pseudocode found at:
+ *
+ * http://en.wikipedia.org/w/index.php?title=Hopcroft%E2%80%93Karp_algorithm&oldid=593898016
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/hk.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/bipartite_match.h"
+#include "miscadmin.h"
+#include "utils/palloc.h"
+
+static bool hk_breadth_search(BipartiteMatchState *state);
+static bool hk_depth_search(BipartiteMatchState *state, int u, int depth);
+
+BipartiteMatchState *
+BipartiteMatch(int u_size, int v_size, short **adjacency)
+{
+ BipartiteMatchState *state = palloc(sizeof(BipartiteMatchState));
+
+ Assert(u_size < SHRT_MAX);
+ Assert(v_size < SHRT_MAX);
+
+ state->u_size = u_size;
+ state->v_size = v_size;
+ state->matching = 0;
+ state->adjacency = adjacency;
+ state->pair_uv = palloc0((u_size + 1) * sizeof(short));
+ state->pair_vu = palloc0((v_size + 1) * sizeof(short));
+ state->distance = palloc((u_size + 1) * sizeof(float));
+ state->queue = palloc((u_size + 2) * sizeof(short));
+
+ while (hk_breadth_search(state))
+ {
+ int u;
+
+ for (u = 1; u <= u_size; ++u)
+ if (state->pair_uv[u] == 0)
+ if (hk_depth_search(state, u, 1))
+ state->matching++;
+
+ CHECK_FOR_INTERRUPTS(); /* just in case */
+ }
+
+ return state;
+}
+
+void
+BipartiteMatchFree(BipartiteMatchState *state)
+{
+ /* adjacency matrix is treated as owned by the caller */
+ pfree(state->pair_uv);
+ pfree(state->pair_vu);
+ pfree(state->distance);
+ pfree(state->queue);
+ pfree(state);
+}
+
+static bool
+hk_breadth_search(BipartiteMatchState *state)
+{
+ int usize = state->u_size;
+ short *queue = state->queue;
+ float *distance = state->distance;
+ int qhead = 0; /* we never enqueue any node more than once */
+ int qtail = 0; /* so don't have to worry about wrapping */
+ int u;
+
+ distance[0] = INFINITY;
+
+ for (u = 1; u <= usize; ++u)
+ {
+ if (state->pair_uv[u] == 0)
+ {
+ distance[u] = 0;
+ queue[qhead++] = u;
+ }
+ else
+ distance[u] = INFINITY;
+ }
+
+ while (qtail < qhead)
+ {
+ u = queue[qtail++];
+
+ if (distance[u] < distance[0])
+ {
+ short *u_adj = state->adjacency[u];
+ int i = u_adj ? u_adj[0] : 0;
+
+ for (; i > 0; --i)
+ {
+ int u_next = state->pair_vu[u_adj[i]];
+
+ if (isinf(distance[u_next]))
+ {
+ distance[u_next] = 1 + distance[u];
+ queue[qhead++] = u_next;
+ Assert(qhead <= usize+2);
+ }
+ }
+ }
+ }
+
+ return !isinf(distance[0]);
+}
+
+static bool
+hk_depth_search(BipartiteMatchState *state, int u, int depth)
+{
+ float *distance = state->distance;
+ short *pair_uv = state->pair_uv;
+ short *pair_vu = state->pair_vu;
+ short *u_adj = state->adjacency[u];
+ int i = u_adj ? u_adj[0] : 0;
+
+ if (u == 0)
+ return true;
+
+ if ((depth % 8) == 0)
+ check_stack_depth();
+
+ for (; i > 0; --i)
+ {
+ int v = u_adj[i];
+
+ if (distance[pair_vu[v]] == distance[u] + 1)
+ {
+ if (hk_depth_search(state, pair_vu[v], depth+1))
+ {
+ pair_vu[v] = u;
+ pair_uv[u] = v;
+ return true;
+ }
+ }
+ }
+
+ distance[u] = INFINITY;
+ return false;
+}
+
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "miscadmin.h"
+#include "lib/bipartite_match.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
return new_groupclause;
}
-
-/*
- * We want to produce the absolute minimum possible number of lists here to
- * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
- * of finding the minimal partition of a poset into chains (which is what we
- * need, taking the list of grouping sets as a poset ordered by set inclusion)
- * can be mapped to the problem of finding the maximum cardinality matching on
- * a bipartite graph, which is solvable in polynomial time with a worst case of
- * no worse than O(n^2.5) and usually much better. Since our N is at most 4096,
- * we don't need to consider fallbacks to heuristic or approximate methods.
- * (Planning time for a 12-d cube is under half a second on my modest system
- * even with optimization off and assertions on.)
- *
- * We use the Hopcroft-Karp algorithm for the graph matching; it seems to work
- * well enough for our purposes. This implementation is based on pseudocode
- * found at:
- *
- * http://en.wikipedia.org/w/index.php?title=Hopcroft%E2%80%93Karp_algorithm&oldid=593898016
- *
- * This implementation uses the same indices for elements of U and V (the two
- * halves of the graph) because in our case they are always the same size, and
- * we always know whether an index represents a u or a v. Index 0 is reserved
- * for the NIL node.
- */
-
-struct hk_state
-{
- int graph_size; /* size of half the graph plus NIL node */
- int matching;
- short **adjacency; /* adjacency[u] = [n, v1,v2,v3,...,vn] */
- short *pair_uv; /* pair_uv[u] -> v */
- short *pair_vu; /* pair_vu[v] -> u */
- float *distance; /* distance[u], float so we can have +inf */
- short *queue; /* queue storage for breadth search */
-};
-
-static bool
-hk_breadth_search(struct hk_state *state)
-{
- int gsize = state->graph_size;
- short *queue = state->queue;
- float *distance = state->distance;
- int qhead = 0; /* we never enqueue any node more than once */
- int qtail = 0; /* so don't have to worry about wrapping */
- int u;
-
- distance[0] = INFINITY;
-
- for (u = 1; u < gsize; ++u)
- {
- if (state->pair_uv[u] == 0)
- {
- distance[u] = 0;
- queue[qhead++] = u;
- }
- else
- distance[u] = INFINITY;
- }
-
- while (qtail < qhead)
- {
- u = queue[qtail++];
-
- if (distance[u] < distance[0])
- {
- short *u_adj = state->adjacency[u];
- int i = u_adj ? u_adj[0] : 0;
-
- for (; i > 0; --i)
- {
- int u_next = state->pair_vu[u_adj[i]];
-
- if (isinf(distance[u_next]))
- {
- distance[u_next] = 1 + distance[u];
- queue[qhead++] = u_next;
- Assert(qhead <= gsize+1);
- }
- }
- }
- }
-
- return !isinf(distance[0]);
-}
-
-static bool
-hk_depth_search(struct hk_state *state, int u, int depth)
-{
- float *distance = state->distance;
- short *pair_uv = state->pair_uv;
- short *pair_vu = state->pair_vu;
- short *u_adj = state->adjacency[u];
- int i = u_adj ? u_adj[0] : 0;
-
- if (u == 0)
- return true;
-
- if ((depth % 8) == 0)
- check_stack_depth();
-
- for (; i > 0; --i)
- {
- int v = u_adj[i];
-
- if (distance[pair_vu[v]] == distance[u] + 1)
- {
- if (hk_depth_search(state, pair_vu[v], depth+1))
- {
- pair_vu[v] = u;
- pair_uv[u] = v;
- return true;
- }
- }
- }
-
- distance[u] = INFINITY;
- return false;
-}
-
-static struct hk_state *
-hk_match(int graph_size, short **adjacency)
-{
- struct hk_state *state = palloc(sizeof(struct hk_state));
-
- state->graph_size = graph_size;
- state->matching = 0;
- state->adjacency = adjacency;
- state->pair_uv = palloc0(graph_size * sizeof(short));
- state->pair_vu = palloc0(graph_size * sizeof(short));
- state->distance = palloc(graph_size * sizeof(float));
- state->queue = palloc((graph_size + 2) * sizeof(short));
-
- while (hk_breadth_search(state))
- {
- int u;
-
- for (u = 1; u < graph_size; ++u)
- if (state->pair_uv[u] == 0)
- if (hk_depth_search(state, u, 1))
- state->matching++;
-
- CHECK_FOR_INTERRUPTS(); /* just in case */
- }
-
- return state;
-}
-
-static void
-hk_free(struct hk_state *state)
-{
- /* adjacency matrix is treated as owned by the caller */
- pfree(state->pair_uv);
- pfree(state->pair_vu);
- pfree(state->distance);
- pfree(state->queue);
- pfree(state);
-}
-
/*
* Extract lists of grouping sets that can be implemented using a single
* rollup-type aggregate pass each. Returns a list of lists of grouping sets.
int *chains;
short **adjacency;
short *adjacency_buf;
- struct hk_state *state;
+ BipartiteMatchState *state;
int i;
int j;
int j_size;
num_sets = i - 1;
/*
- * Apply the matching algorithm to do the work.
+ * Apply the graph matching algorithm to do the work.
*/
- state = hk_match(num_sets + 1, adjacency);
+ state = BipartiteMatch(num_sets, num_sets, adjacency);
/*
* Now, the state->pair* fields have the info we need to assign sets to
* up a nontrivial amount of memory.)
*/
- hk_free(state);
+ BipartiteMatchFree(state);
pfree(results);
pfree(chains);
for (i = 1; i <= num_sets; ++i)
--- /dev/null
+/*
+ * bipartite_match.h
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/lib/bipartite_match.h
+ */
+
+#ifndef BIPARTITE_MATCH_H
+#define BIPARTITE_MATCH_H
+
+/*
+ * Given a bipartite graph consisting of nodes U numbered 1..nU, nodes V
+ * numbered 1..nV, and an adjacency map of undirected edges in the form
+ * adjacency[u] = [n, v1, v2, v3, ... vn], we wish to find a "maximum
+ * cardinality matching", which is defined as follows: a matching is a subset
+ * of the original edges such that no node has more than one edge, and a
+ * matching has maximum cardinality if there exists no other matching with a
+ * greater number of edges.
+ *
+ * This matching has various applications in graph theory, but the motivating
+ * example here is Dilworth's theorem: a partially-ordered set can be divided
+ * into the minimum number of chains (i.e. subsets X where x1 < x2 < x3 ...) by
+ * a bipartite graph construction. This gives us a polynomial-time solution to
+ * the problem of planning a collection of grouping sets with the provably
+ * minimal number of sort operations.
+ */
+
+typedef struct bipartite_match_state
+{
+ int u_size; /* size of U */
+ int v_size; /* size of V */
+ int matching; /* number of edges in matching */
+ short **adjacency; /* adjacency[u] = [n, v1,v2,v3,...,vn] */
+ short *pair_uv; /* pair_uv[u] -> v */
+ short *pair_vu; /* pair_vu[v] -> u */
+
+ float *distance; /* distance[u], float so we can have +inf */
+ short *queue; /* queue storage for breadth search */
+} BipartiteMatchState;
+
+/*
+ * Given the size of U and V, where each is indexed 1..size, and an adjacency
+ * list, perform the matching and return the resulting state.
+ */
+
+BipartiteMatchState *BipartiteMatch(int u_size, int v_size, short **adjacency);
+
+/*
+ * Free a state returned by BipartiteMatch, except for the original adjacency
+ * list, which is owned by the caller. This only frees memory, so it's optional.
+ */
+
+void BipartiteMatchFree(BipartiteMatchState *state);
+
+#endif /* BIPARTITE_MATCH_H */