0

In Python, I have a pair of classes like

class Internal:
    def __init__(self, ref):
        self.ref = ref

class External:
    def __init__(self):
        self.storage = [1,2,3]
        self.int = Internal(self.storage)

    def change(self):
        self.storage[1] = 10

Deep copy of External works incredibly well

from copy import deepcopy

s = External()
s1 = deepcopy(s)
s.change()
print(s.int.ref)  # [1,10,3]
print(s1.int.ref)   # [1,2,3]

I want to implement Internal using Python C Extension. As far as I understand I have to implement either __reduce__ or __deepcopy__. I decided to go with __deepcopy__. Currently, I implemented it as

PyObject* internal_deepcopy(InternalObj* self, PyObject* memo) {
    InternalObj* obj = reinterpret_cast<InternalObj*>(PyType_GenericNew(Py_TYPE(self), nullptr, nullptr));
    if (!obj) return nullptr;
    obj->ref = self->ref;
    Py_INCREF(obj->ref);
    return reinterpret_cast<PyObject*>(obj);
}

And obviously, it is wrong. If I use such deep copy then I do not get the new reference to the new storage in copied External.

from internals import Internal

class External:
    def __init__(self):
        self.storage = [1,2,3]
        self.int = Internal(self.storage)

    def change(self):
        self.storage[1] = 10

s = External()
s1 = deepcopy(s)
s.change()
s.int.print()  # 1 10 3
s1.int.print()  # 1 10 3 

That is objects were not separated.

The question: how can I track that External class is copied and set a correct (like it works in pure Python code) reference to its storage in Internal?


Other parts of my Python module

#include <Python.h>
#include <iostream>

struct InternalObj {
    PyObject_HEAD
    PyObject* ref;
};

PyObject* internal_print(InternalObj* self, PyObject* unused) {
    const int size = PySequence_Fast_GET_SIZE(self->ref);
    PyObject** items = PySequence_Fast_ITEMS(self->ref);
    for (int i{}; i < size; ++i) std::cout << PyLong_AsLong(items[i]) << ' ';
    std::cout << std::endl;
    return Py_None;
}

PyMethodDef internal_methods[]{
    {"__deepcopy__", (PyCFunction)internal_deepcopy, METH_O, 0},
    {"print", (PyCFunction)internal_print, METH_NOARGS, 0},
    {nullptr, nullptr},
};

void internal_dealloc(InternalObj* self) {
    Py_DECREF(self->ref);  // release
    Py_TYPE(self)->tp_free(self);
}

PyObject* internal_new(PyTypeObject* subtype, PyObject* args, PyObject* kwds) {
    PyObject* ref;
    const char* kwlist[]{"ref", nullptr};
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:__init__", const_cast<char**>(kwlist), &ref)) return nullptr;
    InternalObj* obj = reinterpret_cast<InternalObj*>(PyType_GenericNew(subtype, nullptr, nullptr));
    if (!obj) return nullptr;
    obj->ref = ref;
    Py_INCREF(ref);  // capture
    return reinterpret_cast<PyObject*>(obj);
}

PyTypeObject internal_type{
    PyVarObject_HEAD_INIT(nullptr, 0)
    "internals.Internal",  // tp_name
    sizeof(InternalObj),  // tp_basicsize
    0,  // tp_itemsize
    (destructor)internal_dealloc,  // tp_dealloc
    0,  // tp_vectorcall_offset
    0,  // tp_getattr
    0,  // tp_setattr
    0,  // tp_as_async
    0,  // tp_repr
    0,  // tp_as_number
    0,  // tp_as_sequence
    0,  // tp_as_mapping
    0,  // tp_hash
    0,  // tp_call
    0,  // tp_str
    0,  // tp_getattro
    0,  // tp_setattro
    0,  // tp_as_buffer
    Py_TPFLAGS_DEFAULT,  // tp_flags
    0,  // tp_doc
    0,  // tp_traverse
    0,  // tp_clear
    0,  // tp_richcompare
    0,  // tp_weaklistoffset
    0,  // tp_iter
    0,  // tp_iternext
    internal_methods,  // tp_methods
    0,  // tp_members
    0,  // tp_getset
    0,  // tp_base
    0,  // tp_dict
    0,  // tp_descr_get
    0,  // tp_descr_set
    0,  // tp_dictoffset
    0,  // tp_init
    0,  // tp_alloc
    internal_new,  // tp_new
};

PyModuleDef internals_module{
    PyModuleDef_HEAD_INIT,
    "internals",
    "Python interface for internals",
    -1,
    nullptr,
    nullptr,
    nullptr,
    nullptr,
    nullptr
};

PyMODINIT_FUNC PyInit_internals() {
    PyObject *module = PyModule_Create(&internals_module);
    if (PyType_Ready(&internal_type) < 0) return nullptr;
    PyModule_AddObject(module, "Internal", Py_NewRef(&internal_type));
    return module;
}

Meson script that I use to build.

project('ints', 'c', 'cpp', version: '0.1', default_options: ['c_std=c18', 'cpp_std=c++20', 'b_ndebug=if-release'])
py_installation = import('python').find_installation('python3', required: true)
py_installation.extension_module('internals', 'internal.cpp', dependencies: py_installation.dependency())

1 Answer 1

0

It looks scary but technically is quite straightforward. There are two parts of the answer.

  1. How does Python deep copy loops? E.g.:
l = []
l.append(l)
q = deepcopy(l)

works without issue.

deepcopy temporarily memorizes copied objects in its second argument memo in the form {id(old): new}. If the object was already processed deepcopy returns a created copy from memo. If not, it creates a new object and deepcopy all its references.

  1. How should this be applied in C API?

Import deepcopy (in my case this part can be reduced because I know that my ref must be already in memo, but in case it is not)

PyObject *copy = PyImport_ImportModule("copy");
deepcopy = PyObject_GetAttrString(copy, "deepcopy");

because list (and many other objects) does not have __deepcopy__. Its deep copy is implemented in Python in the copy module. I've seen that some people are not afraid to import in deep copy, but I would suggest doing it in module init (PyInit_internals in my case).

Implement the correct version of deep copy )))

PyObject* internal_deepcopy(InternalObj* self, PyObject* memo) {
    PyObject* id = PyLong_FromLong(static_cast<long>(self));  // need this id as an object to interact with `memo`
    if (!id) return nullptr;
    if (memo && memo != Py_None) {
        PyObject* memed = PyDict_GetItem(memo, id);
        if (memed) {
            Py_DECREF(id);
            return memed;
        }
        Py_INCREF(memo);  // to unify exit code with next branch where `memo` is created. 
    } else memo = PyDict_New();  // top-level call
    InternalObj* obj = reinterpret_cast<InternalObj*>(PyType_GenericNew(Py_TYPE(self), nullptr, nullptr));    // create copy of internals of this object
    if (!obj) {
        Py_DECREF(id);
        Py_DECREF(memo);
        return nullptr;
    }
    if (PyDict_SetItem(memo, id, reinterpret_cast<PyObject*>(obj)) < 0) {  // update `memo`
        Py_DECREF(id);
        Py_DECREF(memo);
        Py_DECREF(obj);
        return nullptr;
    }
    Py_DECREF(id);
    obj->ref = PyObject_CallFunctionObjArgs(deepcopy, self->ref, memo, nullptr);  // call deepcopy for `ref`
    Py_DECREF(memo);  // delete map if it was created in this deepcopy
    if (!obj->ref) {
        Py_DECREF(obj);
        return nullptr;
    }
    return reinterpret_cast<PyObject*>(obj);  // return copied object.
}

I'm not sure, the code above covered a generic case. For instance, in copy.deepcopy I see some manipulation with objects live time that is missed here. But for simple cases it should be correct.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.