Python "RuntimeError: maximum recursion depth exceeded" in depth-first search

Question

I'm trying to implement the depth-first search (DFS) algorithm for directed graphs as described in Cormen et al., Introduction to Algorithms (3rd ed.). Here is my implementation so far:

import pytest
from collections import OrderedDict
import copy


class Node(object):
    def __init__(self, color='white', parent=None, d=None, f=None):
        self.color = color
        self.parent = parent
        self.d = d          # Discovery time
        self.f = f          # Finishing time


class Graph(object):
    def __init__(self, edges, node_indices=None):
        self.edges = edges
        self.nodes = self.initialize_nodes(node_indices )
        self.adj = self.initialize_adjacency_list()

    def initialize_nodes(self, node_indices=None):
        if node_indices is None:
            node_indices = sorted(list(set(node for edge in self.edges for node in edge)))
        return OrderedDict([(node_index, Node()) for node_index in node_indices])

    def initialize_adjacency_list(self):
        A = {node: [] for node in self.nodes}
        for edge in self.edges:
            u, v = edge
            A[u].append(v)
        return A

    def dfs(self):
        self.time = 0
        for u, node in self.nodes.items():
            if node.color == 'white':
                self.dfs_visit(u)

    def dfs_visit(self, u):
        self.time += 1
        self.nodes[u].d = self.time
        self.nodes[u].color = 'gray'
        for v in self.adj[u]:
            if self.nodes[v].color == 'white':
                self.nodes[v].parent = u
                self.dfs_visit(v)
        self.nodes[u].color = 'black'
        self.time += 1
        self.nodes[u].f = self.time

    @staticmethod
    def transpose(edges):
        return [(v,u) for (u,v) in edges]

    def strongly_connected_components(self):
        self.dfs()
        finishing_times = {u: node.f for u, node in self.nodes.items()}
        self.__init__(self.transpose(self.edges))
        node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True)
        self.nodes = self.initialize_nodes(node_indices)
        self.dfs()
        return self.trees()

    def trees(self):
        _trees = []
        nodes = copy.deepcopy(self.nodes)
        while nodes:
            for u, node in nodes.items():
                if node.parent is None:
                    _trees.append([u])
                    nodes.pop(u)
                else:
                    for tree in _trees:
                        if node.parent in tree:
                            tree.append(u)
                            nodes.pop(u)
        return _trees

To test that it works, I've taken an example from Figure 22.9 of the book:

After renaming the nodes a to h 1 to 8, respectively, I ran the following test:

def test_strongly_connected_components():
    edges = [(1,2), (5,1), (2,5), (5,6), (2,6), (6,7), (7,6), (2,3), (3,7), (3,4), (4,3), (4,8), (7,8), (8,8)]
    graph = Graph(edges)
    assert graph.strongly_connected_components() == [[1, 5, 2], [3, 4], [6, 7], [8]]


if __name__ == "__main__":
    pytest.main([__file__+"::test_strongly_connected_components", "-s"])

This test passes, confirming the gray-shaded SCCs in the figure.

For the 'real' exercise, however, I need to use an input file, SCC.txt, which contains 875,714 lines representing edges (as a head-tail pair of integers), and output the size of the five largest SCCs. To this end I tried the following test:

@pytest.fixture
def edges():
    with open('SCC.txt') as f:
        return [tuple(map(int, line.split())) for line in f.read().splitlines()]

def test_SCC_on_full_graph(edges):
    graph = Graph(edges)
    SCCs = graph.strongly_connected_components()
    print([map(len, SCCs)].sort(reverse=True))      # Read off the size of the largest SCCs


if __name__ == "__main__":
    pytest.main([__file__+"::test_SCC_on_full_graph", "-s"])

However, I run into a RuntimeError: maximum recursion depth exceeded in cmp:

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <scc.Graph object at 0x103253690>, u = 209099

    def dfs_visit(self, u):
            self.time += 1
            self.nodes[u].d = self.time
            self.nodes[u].color = 'gray'
            for v in self.adj[u]:
>                   if self.nodes[v].color == 'white':
E                   RuntimeError: maximum recursion depth exceeded in cmp

scc.py:53: RuntimeError
========================== 1 failed in 21.79 seconds ===========================

I've read about increasing sys.setrecursionlimit, but this doesn't seem to be a recommended practice. Other than than I'm not sure how I could improve the code as it fairly literally implements the pseudocode given in the book. Any ideas on how I can overcome this error?

Kurt Peek · Accepted Answer · 2017-08-22 14:16:36Z

I managed to solve the problem using the threading library with an increased stack_size and recursion limit. Here is the code of the solution:

import sys
import pytest
from collections import OrderedDict
import copy
import threading


class Node(object):
    def __init__(self, color='white', parent=None, d=None, f=None):
        self.color = color
        self.parent = parent
        self.d = d          # Discovery time
        self.f = f          # Finishing time


class Graph(object):
    def __init__(self, edges, node_indices=None):
        self.edges = edges
        self.nodes = self.initialize_nodes(node_indices )
        self.adj = self.initialize_adjacency_list()
        self.trees = dict()

    def initialize_nodes(self, node_indices=None):
        if node_indices is None:
            node_indices = sorted(list(set(node for edge in self.edges for node in edge)))
        return OrderedDict([(node_index, Node()) for node_index in node_indices])

    def initialize_adjacency_list(self):
        A = {node: [] for node in self.nodes}
        for edge in self.edges:
            u, v = edge
            A[u].append(v)
        return A

    def dfs(self):
        self.time = 0
        for u, node in self.nodes.items():
            if node.color == 'white':
                self.dfs_visit(u, root=u)

    def dfs_visit(self, u, root=None):
        if u == root:
            self.trees[root] = set()
        self.time += 1
        self.nodes[u].d = self.time
        self.nodes[u].color = 'gray'
        for v in self.adj[u]:
            if self.nodes[v].color == 'white':
                self.nodes[v].parent = u
                self.trees[root].add(v)
                self.dfs_visit(v, root=root)
        self.nodes[u].color = 'black'
        self.time += 1
        self.nodes[u].f = self.time

    @staticmethod
    def transpose(edges):
        return [(v,u) for (u,v) in edges]

    def strongly_connected_components(self):
        self.dfs()
        finishing_times = {u: node.f for u, node in self.nodes.items()}
        self.__init__(self.transpose(self.edges))
        node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True)
        self.nodes = self.initialize_nodes(node_indices)
        self.dfs()

        trees = copy.deepcopy(self.trees)
        for k, v in trees.items():
            v.add(k)
        return trees.values()


@pytest.fixture
def edges():
    with open('SCC.txt') as f:
        return [tuple(map(int, line.split())) for line in f.read().splitlines()]

def SCC_on_full_graph():
    E = edges()
    graph = Graph(E)
    SCCs = graph.strongly_connected_components()

    SCC_sizes = sorted(list(map(len, SCCs)), reverse=True)
    print(SCC_sizes[:5])                            # Read off the size of the 5 largest SCCs


if __name__ == "__main__":
    threading.stack_size(67108864)
    sys.setrecursionlimit(2**20)
    thread = threading.Thread(target=SCC_on_full_graph)
    thread.start()

Abhishek P · Accepted Answer · 2017-08-21 13:22:54Z

1

The DFS has to be logically DFS, but programmatically you can try a work around.

writing the DFS in such a way that you can retry it from one of the top functions, if it reaches a near the recursion limit.
Try to use multiprocessing.

PS: Is it possible that an infinite recursion is occurring for the larger dataset? logical error which comes forth when using a larger dataset. If you have datasets of incremental sizes, you could also identify the limit of the algorithm's implementation in python.

answered Aug 21, 2017 at 13:22

Abhishek P

1872 silver badges9 bronze badges

Collectives™ on Stack Overflow

Python "RuntimeError: maximum recursion depth exceeded" in depth-first search

2 Answers 2

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related