Peter Norvig
Decembers 2016–2023

Advent of Code Utilities¶

Stuff I might need for Advent of Code.

First, some imports that I have used in past AoC years:

In [8]:

from collections import Counter, defaultdict, namedtuple, deque, abc
from dataclasses import dataclass, field
from itertools   import permutations, combinations, cycle, chain, islice, filterfalse
from itertools   import count as count_from, product as cross_product, takewhile
from typing      import *
from statistics  import mean, median
from math        import ceil, floor, factorial, gcd, log, log2, log10, sqrt, inf, atan2

import matplotlib.pyplot as plt
import ast
import fractions
import functools
import heapq
import operator
import pathlib
import re
import string
import sys
import time

Daily Workflow¶

Each day's work will consist of three tasks, denoted by three sections in the notebook:

Input: Parse the day's input file with the function parse.
Part 1: Understand the day's instructions and:
- Write code to compute the answer to Part 1.
- Once I have computed the answer and submitted it to the AoC site to verify it is correct, I record it with the answer class.
Part 2: Repeat the above steps for Part 2.
Occasionally I'll introduce a Part 3 where I explore beyond the official instructions.

Parsing Input Files¶

The function parse is meant to handle each day's input. A call parse(day, parser, sections) does the following:

Reads the input file for day.
Breaks the file into a sections. By default, this is lines, but you can use paragraphs, or pass in a custom function.
Applies parser to each section and returns the results as a tuple of records.
- Useful parser functions include ints, digits, atoms, words, and the built-ins int and str.
Prints the first few input lines and output records. This is useful to me as a debugging tool, and to the reader.
The defaults are parser=str, sections=lines, so by default parse(n) gives a tuple of lines from fuile day.

In [7]:

current_year = 2023    # Subdirectory name for input files

lines = str.splitlines # By default, split input text into lines

def paragraphs(text): "Split text into paragraphs"; return text.split('\n\n')
def whole(text): "The whole text"; return [text]

def parse(day_or_text:Union[int, str], parser=str, sections=lines, show=8) -> tuple:
    """Split the input text into `sections`, and apply `parser` to each.
    The first argument is either the text itself, or the day number of a text file."""
    if isinstance(day_or_text, str) and show == 8: 
        show = 0 # By default, don't show lines when parsing example text.
    start = time.time()
    text = get_text(day_or_text)
    show_items('Puzzle input', text.splitlines(), show)
    records = mapt(parser, sections(text.rstrip()))
    if parser != str or sections != lines:
        show_items('Parsed representation', records, show)
    return records

def get_text(day_or_text: Union[int, str]) -> str:
    """The text used as input to the puzzle: either a string or the day number,
    which denotes the file 'AOC/year/input{day}.txt'."""
    if isinstance(day_or_text, str):
        return day_or_text
    else:
        filename = f'AOC/{current_year}/input{day_or_text}.txt'
        return pathlib.Path(filename).read_text()

def show_items(source, items, show:int, hr="─"*100):
    """Show the first few items, in a pretty format."""
    if show:
        types = Counter(map(type, items))
        counts = ', '.join(f'{n} {t.__name__}{"" if n == 1 else "s"}' for t, n in types.items())
        print(f'{hr}\n{source} ➜ {counts}:\n{hr}')
        for line in items[:show]:
            print(truncate(line))
        if show < len(items):
            print('...')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 6
      3 lines = str.splitlines # By default, split input text into lines
      5 def paragraphs(text): "Split text into paragraphs"; return text.split('\n\n')
----> 6 whole
      8 def parse(day_or_text:Union[int, str], parser=str, sections=lines, show=8) -> tuple:
      9     """Split the input text into `sections`, and apply `parser` to each.
     10     The first argument is either the text itself, or the day number of a text file."""

NameError: name 'whole' is not defined

Functions that can be used as the parser argument to parse (also, consider str.split to split the line on whitespace):

In [12]:

Char = str # Intended as the type of a one-character string
Atom = Union[str, float, int] # The type of a string or number
Ints = Sequence[int]

def ints(text: str) -> Tuple[int]:
    """A tuple of all the integers in text, ignoring non-number characters."""
    return mapt(int, re.findall(r'-?[0-9]+', text))

def positive_ints(text: str) -> Tuple[int]:
    """A tuple of all the integers in text, ignoring non-number characters."""
    return mapt(int, re.findall(r'[0-9]+', text))

def digits(text: str) -> Tuple[int]:
    """A tuple of all the digits in text (as ints 0–9), ignoring non-digit characters."""
    return mapt(int, re.findall(r'[0-9]', text))

def words(text: str) -> Tuple[str]:
    """A tuple of all the alphabetic words in text, ignoring non-letters."""
    return tuple(re.findall(r'[a-zA-Z]+', text))

def atoms(text: str) -> Tuple[Atom]:
    """A tuple of all the atoms (numbers or identifiers) in text. Skip punctuation."""
    return mapt(atom, re.findall(r'[+-]?\d+\.?\d*|\w+', text))

def atom(text: str) -> Atom:
    """Parse text into a single float or int or str."""
    try:
        x = float(text)
        return round(x) if x.is_integer() else x
    except ValueError:
        return text.strip()

Daily Answers¶

Here is the answer class, which gives verification of a correct computation (or an error message for an incorrect computation), times how long the computation took, and stores the result in the dict answers.

In [78]:

answers = {} # `answers` is a dict of {puzzle_number: answer}
unknown = 'unknown'

class answer:
    """Verify that calling `code` computes the `solution` to `puzzle`. 
    Record results in the dict `answers`."""
    def __init__(self, puzzle: float, solution, code:Callable=lambda:unknown):
        self.puzzle, self.solution, self.code = puzzle, solution, code
        answers[puzzle] = self
        self.check()
        
    def check(self) -> bool:
        """Check if the code computes the correct solution; record run time."""
        start     = time.time()
        self.got  = self.code()
        self.secs = time.time() - start
        self.ok   = (self.got == self.solution)
        return self.ok
    
    def __repr__(self) -> str:
        """The repr of an answer shows what happened."""
        secs    = f'{self.secs:6.3f}'.replace(' 0.', '  .')
        comment = (f'' if self.got == unknown else
                   f' ok' if self.ok else 
                   f' WRONG; expected answer is {self.solution}')
        return f'Puzzle {self.puzzle:4.1f}: {secs} seconds, answer {self.got:<17}{comment}'

def summary(answers):
    """Print a report that summarizes the answers."""
    for d in sorted(answers):
        print(answers[d])
    times = [answers[d].secs for d in answers]
    print(f'\nCorrect: {quantify(answers[d].ok for d in answers)}/{len(answers)}')
    print(f'\nTime in seconds: {median(times):.3f} median, {mean(times):.3f} mean, {sum(times):.3f} total.')

Additional utility functions¶

All of the following have been used in solutions to multiple puzzles in the past, so I pulled them all in here:

In [16]:

class multimap(defaultdict):
    """A mapping of {key: [val1, val2, ...]}."""
    def __init__(self, pairs:Iterable[tuple]=(), symmetric=False):
        """Given (key, val) pairs, return {key: [val, ...], ...}.
        If `symmetric` is True, treat (key, val) as (key, val) plus (val, key)."""
        self.default_factory = list
        for (key, val) in pairs:
            self[key].append(val)
            if symmetric:
                self[val].append(key)

def prod(numbers) -> float: # Will be math.prod in Python 3.8
    """The product formed by multiplying `numbers` together."""
    result = 1
    for x in numbers:
        result *= x
    return result

def T(matrix: Sequence[Sequence]) -> List[Tuple]:
    """The transpose of a matrix: T([(1,2,3), (4,5,6)]) == [(1,4), (2,5), (3,6)]"""
    return list(zip(*matrix))

def total(counter: Counter) -> int: 
    """The sum of all the counts in a Counter."""
    return sum(counter.values())

def minmax(numbers) -> Tuple[int, int]:
    """A tuple of the (minimum, maximum) of numbers."""
    numbers = list(numbers)
    return min(numbers), max(numbers)

def cover(*integers) -> range:
    """A `range` that covers all the given integers, and any in between them.
    cover(lo, hi) is an inclusive (or closed) range, equal to range(lo, hi + 1).
    The same range results from cover(hi, lo) or cover([hi, lo])."""
    if len(integers) == 1: integers = the(integers)
    return range(min(integers), max(integers) + 1)

def the(sequence) -> object:
    """Return the one item in a sequence. Raise error if not exactly one."""
    for i, item in enumerate(sequence, 1):
        if i > 1: raise ValueError(f'Expected exactly one item in the sequence.')
    return item

def split_at(sequence, i) -> Tuple[Sequence, Sequence]:
    """The sequence split into two pieces: (before position i, and i-and-after)."""
    return sequence[:i], sequence[i:]

def ignore(*args) -> None: "Just return None."; return None

def is_int(x) -> bool: "Is x an int?"; return isinstance(x, int)  

def sign(x) -> int: "0, +1, or -1"; return (0 if x == 0 else +1 if x > 0 else -1)

def lcm(i, j) -> int: "Least common multiple"; return i * j // gcd(i, j)

def union(sets) -> set: "Union of several sets"; return set().union(*sets)

def intersection(sets):
    "Intersection of several sets; error if no sets."
    first, *rest = sets
    return set(first).intersection(*rest)

def accumulate(item_count_pairs: Iterable[Tuple[object, int]]) -> Counter:
    """Add up all the (item, count) pairs into a Counter."""
    counter = Counter()
    for (item, count) in item_count_pairs:
        counter[item] += count
    return counter

def range_intersection(range1, range2) -> range:
    """Return a range that is the intersection of these two ranges."""
    return range(max(range1.start, range2.start), min(range1.stop, range2.stop))
    
def naked_plot(points, marker='o', size=(10, 10), invert=True, square=False, **kwds):
    """Plot `points` without any axis lines or tick marks.
    Optionally specify size, whether square or not, and whether to invery y axis."""
    if size: plt.figure(figsize=((size, size) if is_int(size) else size))
    plt.plot(*T(points), marker, **kwds)
    if square: plt.axis('square')
    plt.axis('off')
    if invert: plt.gca().invert_yaxis()
    
def clock_mod(i, m) -> int:
    """i % m, but replace a result of 0 with m"""
    # This is like a clock, where 24 mod 12 is 12, not 0.
    return (i % m) or m

def invert_dict(dic) -> dict:
    """Invert a dict, e.g. {1: 'a', 2: 'b'} -> {'a': 1, 'b': 2}."""
    return {dic[x]: x for x in dic}

def walrus(name, value):
    """If you're not in 3.8 or more, and you can't do `x := val`,
    then you can use `walrus('x', val)`, if `x` is global."""
    globals()[name] = value
    return value

def truncate(object, width=100, ellipsis=' ...') -> str:
    """Use elipsis to truncate `str(object)` to `width` characters, if necessary."""
    string = str(object)
    return string if len(string) <= width else string[:width-len(ellipsis)] + ellipsis

def mapt(function: Callable, *sequences) -> tuple:
    """`map`, with the result as a tuple."""
    return tuple(map(function, *sequences))

def mapl(function: Callable, *sequences) -> list:
    """`map`, with the result as a list."""
    return list(map(function, *sequences))

def cat(things: Collection, sep='') -> str:
    """Concatenate the things."""
    return sep.join(map(str, things))
    
cache   = functools.lru_cache(None)
Ø       = frozenset() # empty set

Itertools Recipes¶

The Python docs for the itertools module has some "recipes" that I include here (some I have slightly modified):

In [18]:

def quantify(iterable, pred=bool) -> int:
    """Count the number of items in iterable for which pred is true."""
    return sum(1 for item in iterable if pred(item))

def dotproduct(vec1, vec2):
    """The dot product of two vectors."""
    return sum(map(operator.mul, vec1, vec2))

def powerset(iterable) -> Iterable[tuple]:
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return flatten(combinations(s, r) for r in range(len(s) + 1))

flatten = chain.from_iterable # Yield items from each sequence in turn

def append(sequences) -> Sequence: "Append into a list"; return list(flatten(sequences))

def batched(iterable, n) -> Iterable[tuple]:
    "Batch data into non-overlapping tuples of length n. The last batch may be shorter."
    # batched('ABCDEFG', 3) --> ABC DEF G
    it = iter(iterable)
    while True:
        batch = tuple(islice(it, n))
        if batch:
            yield batch
        else:
            return

def sliding_window(sequence, n) -> Iterable[Sequence]:
    """All length-n subsequences of sequence."""
    return (sequence[i:i+n] for i in range(len(sequence) + 1 - n))

def first(iterable, default=None) -> Optional[object]: 
    """The first element in an iterable, or the default if iterable is empty."""
    return next(iter(iterable), default)

def last(iterable) -> Optional[object]: 
    """The last element in an iterable."""
    for item in iterable:
        pass
    return item

def nth(iterable, n, default=None):
    "Returns the nth item or a default value"
    return next(islice(iterable, n, None), default)

def first_true(iterable, default=False):
    """Returns the first true value in the iterable.
    If no true value is found, returns `default`."""
    return next((x for x in iterable if x), default)

Points in Space¶

Many puzzles involve points; usually two-dimensional points on a plane. A few puzzles involve three-dimensional points, and perhaps one might involve non-integers, so I'll try to make my Point implementation flexible in a duck-typing way. A point can also be considered a Vector; that is, (1, 0) can be a Point that means "this is location x=1, y=0 in the plane" and it also can be a Vector that means "move Eat (+1 in the along the x axis)." First we'll define points/vectors:

In [154]:

Point  = Tuple[int, ...]      # Type for points
Point2D = Tuple[int, int]     # Type for 2-dimensional point
Vector = Point                # E.g., (1, 0) can be a point, or can be a direction, a Vector
Zero   = (0, 0)

directions4 = East, South, West, North = ((1, 0), (0, 1),  (-1, 0), (0, -1))
diagonals   = SE,   NE,    SW,   NW    = ((1, 1), (1, -1), (-1, 1), (-1, -1))
directions8 = directions4 + diagonals
directions5 = directions4 + (Zero,)
directions9 = directions8 + (Zero,)
arrow_direction = {'^': North, 'v': South, '>': East, '<': West, '.': Zero,
                   'U': North, 'D': South, 'R': East, 'L': West}

def X_(point) -> int: "X coordinate of a point"; return point[0]
def Y_(point) -> int: "Y coordinate of a point"; return point[1]
def Z_(point) -> int: "Z coordinate of a point"; return point[2]

def Xs(points) -> Tuple[int]: "X coordinates of a collection of points"; return mapt(X_, points)
def Ys(points) -> Tuple[int]: "Y coordinates of a collection of points"; return mapt(Y_, points)
def Zs(points) -> Tuple[int]: "X coordinates of a collection of points"; return mapt(Z_, points)

## I define point arithmetic for general points and separate versions for 2D points,
## because profiling showed the 2D versions are significantly faster

def add3(p: Point, q: Point) -> Point:  "Add points";      return mapt(operator.add, p, q)
def sub3(p: Point, q: Point) -> Point:  "Subtract points"; return mapt(operator.sub, p, q)
def mul3(p: Point, k: float) -> Vector: "Scalar multiply"; return tuple(k * c for c in p)
def neg(p: Point2D)             -> Vector:  "Negate a 2D Point";  return mapt(operator.neg, p)

def add(p: Point2D, q: Point2D) -> Point2D: "Add 2D Points";      return (p[0] + q[0], p[1] + q[1])
def sub(p: Point2D, q: Point2D) -> Point2D: "Subtract 2D Points"; return (p[0] - q[0], p[1] - q[1])
def mul(p: Point2D, k: float)   -> Point2D: "Scalar multiply";    return (p[0] * k, p[1] * k)
def neg(p: Point2D)             -> Vector:  "Negate a 2D Point";  return (-p[0], -p[1])

def slide(points: Set[Point], delta: Vector) -> Set[Point]: 
    """Slide all the points in the set of points by the amount delta."""
    return {add(p, delta) for p in points}

def make_turn(facing: Vector, turn: str) -> Vector:
    """Turn 90 degrees left or right. `turn` can be 'L' or 'Left' or 'R' or 'Right' or lowercase."""
    (x, y) = facing
    return (y, -x) if turn[0] in ('L', 'l') else (-y, x)

def distance(p: Point2D, q: Point2D) -> float:
    """Euclidean (L2) distance between two points."""
    d = sum((pi - qi) ** 2 for pi, qi in zip(p, q)) ** 0.5
    return int(d) if d.is_integer() else d
    
def distance_squared(p: Point2D, q: Point2D) -> float:
    """Square of the Euclidean (L2) distance between two 2D points."""
    return (p[0] - q[0]) ** 2 + (p[1] - q[1]) ** 2
    
def taxi_distance(p: Point2D, q: Point2D) -> int:
    """Manhattan (L1) distance between two 2D Points."""
    return abs(p[0] - q[0]) + abs(p[1] - q[1])

Points on a Grid¶

Many puzzles seem to involve a two-dimensional rectangular grid with integer coordinates. A Grid is a rectangular array of (integer, integer) points, where each point holds some contents. Important things to know:

Grid is a subclass of dict
Usually the contents will be a character or an integer, but that's not specified or restricted.
A Grid can be initialized three ways:
- With another dict of {point: contents}, or an iterable of `(point, contents) pairs.
- With an iterable of strings, each depicting a row (e.g. ["#..", "..#"].
- With a single string, which will be split on newlines.
Contents that are a member of skip will be skipped. (For example, you could do skip=[' '] to not store any point that has a space as its contents.
There is a grid.neighbors(point) method. By default it returns the 4 orthogonal neighbors but you could make it all 8 adjacent squares, or something else, by specifying the directions keyword value in the Grid constructor.
By default, grids have bounded size; accessing a point outside the grid results in a KeyError. But some grids extend in all directions without limit; you can implement that by specifying, say, default='.' to make '.' contents in all directions.

In [22]:

class Grid(dict):
    """A 2D grid, implemented as a mapping of {(x, y): cell_contents}."""
    def __init__(self, grid=(), directions=directions4, skip=(), default=None):
        """Initialize one of four ways: 
        `Grid({(0, 0): '#', (1, 0): '.', ...})`
        `Grid(another_grid)
        `Grid(["#..", "..#"])
        `Grid("#..\n..#")`."""
        self.directions = directions
        self.skip = skip
        self.default = default
        if isinstance(grid, abc.Mapping): 
            self.update(grid) 
            self.size = (len(cover(Xs(self))), len(cover(Ys(self))))
        else:
            if isinstance(grid, str): 
                grid = grid.splitlines()
            self.size = (max(map(len, grid)), len(grid))
            self.update({(x, y): val 
                         for y, row in enumerate(grid) 
                         for x, val in enumerate(row)
                         if val not in skip})
        
    def __missing__(self, point): 
        """If asked for a point off the grid, either return default or raise error."""
        if self.default == KeyError:
            raise KeyError(point)
        else:
            return self.default

    def in_range(self, point) -> bool:
        """Is the point within the range of the grid's size?"""
        return (0 <= X_(point) < X_(self.size) and
                0 <= Y_(point) < Y_(self.size))

    def follow_line(self, start: Point, direction: Vector) -> Iterable[Point]:
        """All points from start going in direction, until the edge of the grid."""
        while self.in_range(start):
            yield start
            start = add(start, direction)

    def copy(self, updates={}): 
        """Make a copy of this grid, and optionally update some positions with new values."""
        grid = Grid(self, directions=self.directions, skip=self.skip, default=self.default)
        grid.update(updates)
        return grid
    
    def neighbors(self, point) -> List[Point]:
        """Points on the grid that neighbor `point`."""
        return [add(point, Δ) for Δ in self.directions 
                if (add(point, Δ) in self) 
                or (self.default not in (KeyError, None))]
    
    def neighbor_contents(self, point) -> Iterable:
        """The contents of the neighboring points."""
        return (self[p] for p in self.neighbors(point))

    def findall(self, contents: Collection) -> List[Point]:
        """All points that contain one of the given contents, e.g. grid.findall('#')."""
        return [p for p in self if self[p] in contents]
    
    def to_rows(self, xrange=None, yrange=None) -> List[List[object]]:
        """The contents of the grid, as a rectangular list of lists.
        You can define a window with an xrange and yrange; or they default to the whole grid."""
        xrange = xrange or cover(Xs(self))
        yrange = yrange or cover(Ys(self))
        default = ' ' if self.default in (KeyError, None) else self.default
        return [[self.get((x, y), default) for x in xrange] 
                for y in yrange]

    def print(self, sep='', xrange=None, yrange=None):
        """Print a representation of the grid."""
        for row in self.to_rows(xrange, yrange):
            print(*row, sep=sep)

    def __str__(self): return cat(self.to_rows())
    
    def plot(self, markers={'#': 's', '.': ','}, figsize=(14, 14), **kwds):
        """Plot a representation of the grid."""
        plt.figure(figsize=figsize)
        plt.gca().invert_yaxis()
        for m in markers:
            plt.plot(*T(p for p in self if self[p] == m), markers[m], **kwds)
            
def neighbors(point, directions=directions4) -> List[Point]:
    """Neighbors of this point, in the given directions.
    (This function can be used outside of a Grid class.)"""
    return [add(point, Δ) for Δ in directions]

A* Search¶

Many puzzles involve searching over a branching tree of possibilities. For many puzzles, an ad-hoc solution is fine. Different problems require different things from a search:

Some just need to know the final goal state.
Some need to know the sequence of actions that led to the final state.
Some neeed to know the sequence of intermediate states.
Some need to know the number of steps (or the total cost) to get to the final state.

But sometimes you need all of that (or you think you might need it in Part 2), and sometimes you have a good heuristic estimate of the distance to a goal state, and you want to make sure to use it. If that's the case, then my SearchProblem class and A_star_search function may be approopriate.

In [24]:

def A_star_search(problem, h=None):
    """Search nodes with minimum f(n) = path_cost(n) + h(n) value first."""
    h = h or problem.h
    return best_first_search(problem, f=lambda n: n.path_cost + h(n))

def best_first_search(problem, f) -> 'Node':
    "Search nodes with minimum f(node) value first."
    node = Node(problem.initial)
    frontier = PriorityQueue([node], key=f)
    reached = {problem.initial: node}
    while frontier:
        node = frontier.pop()
        if problem.is_goal(node.state):
            return node
        for child in expand(problem, node):
            s = child.state
            if s not in reached or child.path_cost < reached[s].path_cost:
                reached[s] = child
                frontier.add(child)
    return search_failure

In [25]:

class SearchProblem:
    """The abstract class for a search problem. A new domain subclasses this,
    overriding `actions` and perhaps other methods.
    The default heuristic is 0 and the default action cost is 1 for all states.
    When you create an instance of a subclass, specify `initial`, and `goal` states 
    (or give an `is_goal` method) and perhaps other keyword args for the subclass."""

    def __init__(self, initial=None, goal=None, **kwds): 
        self.__dict__.update(initial=initial, goal=goal, **kwds) 
        
    def __str__(self):
        return '{}({!r}, {!r})'.format(type(self).__name__, self.initial, self.goal)
    
    def actions(self, state):        raise NotImplementedError
    def result(self, state, action): return action # Simplest case: action is result state
    def is_goal(self, state):        return state == self.goal
    def action_cost(self, s, a, s1): return 1
    def h(self, node):               return 0 # Never overestimate!
    
class GridProblem(SearchProblem):
    """Problem for searching a grid from a start to a goal location.
    A state is just an (x, y) location in the grid."""
    def actions(self, pos):           return [p for p in self.grid.neighbors(pos) if self.grid[pos] != '#']
    def h(self, node):                return taxi_distance(node.state, self.goal) 

class Node:
    "A Node in a search tree."
    def __init__(self, state, parent=None, action=None, path_cost=0):
        self.__dict__.update(state=state, parent=parent, action=action, path_cost=path_cost)

    def __repr__(self):      return f'Node({self.state}, path_cost={self.path_cost})'
    def __len__(self):       return 0 if self.parent is None else (1 + len(self.parent))
    def __lt__(self, other): return self.path_cost < other.path_cost
    
search_failure = Node('failure', path_cost=inf) # Indicates an algorithm couldn't find a solution.
    
def expand(problem, node):
    "Expand a node, generating the children nodes."
    s = node.state
    for action in problem.actions(s):
        s2 = problem.result(s, action)
        cost = node.path_cost + problem.action_cost(s, action, s2)
        yield Node(s2, node, action, cost)
        
def path_actions(node):
    "The sequence of actions to get to this node."
    if node.parent is None:
        return []  
    return path_actions(node.parent) + [node.action]

def path_states(node):
    "The sequence of states to get to this node."
    if node in (search_failure, None): 
        return []
    return path_states(node.parent) + [node.state]

Other Data Structures¶

Here I define a few data types:

The priority queue, which is needed for A* search.
Hashable versions of dicts and Counters. These can be used in sets or as keys in dicts. Beware: unlike the frozenset, these are not safe: if you modify one after inserting it in a set or dict, it probably will not be found.
Graphs of {node: [neighboring_node, ...]}.
An AttrCounter, which is just like a Counter, but can be accessed with, say, ctr.name as well as ctr['name'].

In [27]:

class PriorityQueue:
    """A queue in which the item with minimum key(item) is always popped first."""

    def __init__(self, items=(), key=lambda x: x): 
        self.key = key
        self.items = [] # a heap of (score, item) pairs
        for item in items:
            self.add(item)
         
    def add(self, item):
        """Add item to the queue."""
        pair = (self.key(item), item)
        heapq.heappush(self.items, pair)

    def pop(self):
        """Pop and return the item with min f(item) value."""
        return heapq.heappop(self.items)[1]
    
    def top(self): return self.items[0][1]

    def __len__(self): return len(self.items)

In [28]:

class Hdict(dict):
    """A dict, but it is hashable."""
    def __hash__(self): return hash(tuple(sorted(self.items())))
    
class HCounter(Counter):
    """A Counter, but it is hashable."""
    def __hash__(self): return hash(tuple(sorted(self.items())))

class EqualityIsIdentity:
    """A mixin to say that objects of this class are equal only if they are identical."""
    def __hash__(self): return id(self)
    def __eq__(self, other): return self is other

In [29]:

class Graph(defaultdict):
    """A graph of {node: [neighboring_nodes...]}. 
    Can store other kwd attributes on it (which you can't do with a dict)."""
    def __init__(self, contents=(), **kwds):
        self.update(contents)
        self.default_factory = list
        self.__dict__.update(**kwds)

In [30]:

class AttrCounter(Counter):
    """A Counter, but `ctr['name']` and `ctr.name` are the same."""
    def __getattr__(self, attr):
        return self[attr]
    def __setattr__(self, attr, value):
        self[attr] = value

In [ ]:

def get_size(obj, seen: Optional[Set[int]] = None) -> int:
  """Recursively finds size of objects."""
  seen = set() if seen is None else seen

  if id(obj) in seen: return 0  # to handle self-referential objects
  seen.add(id(obj))

  size = sys.getsizeof(obj, 0) # pypy3 always returns default (necessary)
  if isinstance(obj, dict):
    size += sum(getSize(v, seen) + getSize(k, seen) for k, v in obj.items())
  elif hasattr(obj, '__dict__'):
    size += getSize(obj.__dict__, seen)
  elif hasattr(obj, '__slots__'): # in case slots are in use
    slotList = [getattr(C, "__slots__", []) for C in obj.__class__.__mro__]
    slotList = [[slot] if isinstance(slot, str) else slot for slot in slotList]
    size += sum(getSize(getattr(obj, a, None), seen) for slot in slotList for a in slot)
  elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
    size += sum(getSize(i, seen) for i in obj)
  return size

Tests¶

In [32]:

def tests():
    """Run tests on utility functions. Also serves as usage examples."""
    
    # PARSER

    assert parse("hello\nworld", show=0) == ('hello', 'world')
    assert parse("123\nabc7", digits, show=0) == ((1, 2, 3), (7,))
    assert truncate('hello world', 99) == 'hello world'
    assert truncate('hello world', 8)  == 'hell ...'

    assert         atoms('hello, cruel_world! 24-7') == ('hello', 'cruel_world', 24, -7)
    assert         words('hello, cruel_world! 24-7') == ('hello', 'cruel', 'world')
    assert        digits('hello, cruel_world! 24-7') == (2, 4, 7)
    assert          ints('hello, cruel_world! 24-7') == (24, -7)
    assert positive_ints('hello, cruel_world! 24-7') == (24, 7)

    # UTILITIES

    assert multimap(((i % 3), i) for i in range(9)) == {0: [0, 3, 6], 1: [1, 4, 7], 2: [2, 5, 8]}
    assert prod([2, 3, 5]) == 30
    assert total(Counter('hello, world')) == 12
    assert cover(3, 1, 4, 1, 5) == range(1, 6)
    assert minmax([3, 1, 4, 1, 5, 9]) == (1, 9)
    assert T([(1, 2, 3), (4, 5, 6)]) == [(1, 4), (2, 5), (3, 6)]
    assert the({1}) == 1
    assert split_at('hello, world', 6) == ('hello,', ' world')
    assert is_int(-42) and not is_int('one')
    assert sign(-42) == -1 and sign(0) == 0 and sign(42) == +1
    assert union([{1, 2}, {3, 4}, {5, 6}]) == {1, 2, 3, 4, 5, 6}
    assert intersection([{1, 2, 3}, {2, 3, 4}, {2, 4, 6, 8}]) == {2}
    assert clock_mod(24, 12) == 12 and 24 % 12 == 0
    assert cat(['hello', 'world']) == 'helloworld'

    # ITERTOOL RECIPES

    assert quantify(words('This is a test'), str.islower) == 3
    assert dotproduct([1, 2, 3, 4], [1000, 100, 10, 1]) == 1234
    assert list(flatten([{1, 2, 3}, (4, 5, 6), [7, 8, 9]])) == [1, 2, 3, 4, 5, 6, 7, 8, 9]
    assert append(([1, 2], [3, 4], [5, 6])) == [1, 2, 3, 4, 5, 6]
    assert list(batched(range(11), 3)) == [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10)]
    assert list(sliding_window('abcdefghi', 3)) == ['abc', 'bcd', 'cde', 'def', 'efg', 'fgh', 'ghi']
    assert first('abc') == 'a'
    assert first('') == None
    assert last('abc') == 'c'
    assert first_true([0, None, False, 42, 99]) == 42
    assert first_true([0, None, '', 0.0]) == False

    # POINTS

    p, q = (0, 3), (4, 0)
    assert Y_(p) == 3 and X_(q) == 4
    assert distance(p, q) == 5
    assert taxi_distance(p, q) == 7
    assert add(p, q) == (4, 3)
    assert sub(p, q) == (-4, 3)
    assert add(North, South) == (0, 0)
    
tests()