1

I'm testing functional style programming with C++. Having the compiler to optimize recursion to a loop when possible is necessary to avoid stack overflows. The below program crashes due to stack overflow when BlendingTable::print() is called. I don't know why it doesn't crash in the constructor of BlendingTable, which does a same form of recursion as print. I need to allocate 1GB of stack to avoid stack overflow, and this is not acceptable.

Do I need to pass some special options to hint gcc to optimize the recursion? How do functional programming languages handle the problem of deep recursion? Or is it simply an unsolvable problem with current compiler technology?


EDIT1

I kind of solved the problem by placing __attribute__((always_inline)) at Stdout::print. Then gcc does remove the recursion in BlendingTable::print.

#include <cstdlib>
#include <iostream>
#include <memory>
#include <array>
#include <limits>

class Stdout {
public:
    template<typename T, typename... Ts>
    static void print(const T& t, const Ts&... ts) {
        std::cout << t << std::flush;
        print(ts...);
    }

    static void print() {}

    template<typename... Ts>
    static void printLine(const Ts&... ts) {
        print(ts..., '\n');
    }
};

class Error {
public:
    template<typename... Ts>
    static void assert(bool b, const Ts&... ts) {
        if (!b) {
            Stdout::printLine(ts...);
            throw;
        }
    }
};

template<typename T, int dimension = 1>
class Array {
private:
    std::unique_ptr<T[]> pointer;
    std::array<int, dimension> sizes;
    int realSize;

public:
    template<typename... Ns>
    Array(Ns... ns):
    realSize(1) {
        checkArguments(ns...);
        create(1, ns...);
    }

    Array() {}

private:
    template<typename... Ns>
    static constexpr void checkArguments(Ns...) {
        static_assert(sizeof...(Ns) == dimension, "dimension mismatch");
    }

    template<typename... Ns>
    void create(int d, int n, Ns... ns) {
        realSize *= n;
        sizes[d - 1] = n;
        create(d + 1, ns...);
    }

    void create(int) {
        pointer = std::unique_ptr<T[]>(new T[realSize]);
    }

    int computeSubSize(int d) const {
        if (d == dimension) {
            return 1;
        }
        return sizes[d] * computeSubSize(d + 1);
    }

    template<typename... Ns>
    int getIndex(int d, int n, Ns... ns) const {
        return n * computeSubSize(d) + getIndex(d + 1, ns...);
    }

    int getIndex(int) const {
        return 0;
    }

public:
    template<typename... Ns>
    T& operator()(Ns... ns) const {
        checkArguments(ns...);
        return pointer[getIndex(1, ns...)];
    }

    int getSize(int d = 1) const {
        return sizes[d - 1];
    }
};

class BlendingTable : public Array<unsigned char, 3> {
private:
    enum {
        SIZE = 0x100,
        FF = SIZE - 1,
    };

public:
    BlendingTable():
    Array<unsigned char, 3>(SIZE, SIZE, SIZE) {
        static_assert(std::numeric_limits<unsigned char>::max() == FF, "unsupported byte format");
        create(FF, FF, FF);
    }

private:
    void create(int dst, int src, int a) {
        (*this)(dst, src, a) = (src * a + dst * (FF - a)) / FF;
        if (a > 0) {
            create(dst, src, a - 1);
        } else if (src > 0) {
            create(dst, src - 1, FF);
        } else if (dst > 0) {
            create(dst - 1, FF, FF);
        } else {
            return;
        }
    }

    void print(int dst, int src, int a) {
        Stdout::print(static_cast<int>((*this)(FF - dst, FF - src, FF - a)), ' ');
        if (a > 0) {
            print(dst, src, a - 1);
        } else if (src > 0) {
            print(dst, src - 1, FF);
        } else if (dst > 0) {
            print(dst - 1, FF, FF);
        } else {
            Stdout::printLine();
            return;
        }
    }

public:
    void print() {
        print(FF, FF, FF);
    }
};

int main() {
    BlendingTable().print();
    return EXIT_SUCCESS;
}
8
  • 1
    Check this stackoverflow.com/questions/15897452/tail-recursion-in-gcc-g Commented Jul 7, 2015 at 9:27
  • @pablochan print() and create() already uses the tail recursion mechanism. Commented Jul 7, 2015 at 9:28
  • Yes, but the important part is the -O2 switch when invoking the compiler Commented Jul 7, 2015 at 9:36
  • I would welcome such a missed-optimization report in gcc's bugzilla (your code does not compile with clang, you may want to fix that first). Though it seems that it considers the calls to operator<< too heavy to make it worth turning into a loop, and the function needs enough stack that replacing call by jmp is harder. Commented Jul 7, 2015 at 9:39
  • @pablochan compiled with -fwhole-program -s -O3 Commented Jul 7, 2015 at 9:40

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.