91

I want to print the memory size of all variables in my scope simultaneously.

Something similar to:

for obj in locals().values():
    print sys.getsizeof(obj)

But with variable names before each value so I can see which variables I need to delete or split into batches.

Ideas?

5 Answers 5

145

A bit more code, but works in Python 3 and gives a sorted, human readable output:

import sys
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in list(
                          locals().items())), key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

Example output:

                  umis:   3.6 GiB
       barcodes_sorted:   3.6 GiB
          barcodes_idx:   3.6 GiB
              barcodes:   3.6 GiB
                  cbcs:   3.6 GiB
         reads_per_umi:   1.3 GiB
          umis_per_cbc:  59.1 MiB
         reads_per_cbc:  59.1 MiB
                   _40:  12.1 KiB
                     _:   1.6 KiB

Note, that this will only print the 10 largest variables and remains silent about the rest. If you really want all printed, remove the [:10] from the second last line.

Sign up to request clarification or add additional context in comments.

15 Comments

Nice! Can you explain what these _40 are? To me it shows multiple _\d+ rows. Some seem to have the exact same size like a named variable, others don't.
@MoRe these are (probably) temporary variables holding the output of jupyter notebook cells. see documentation
"This system obviously can potentially put heavy memory demands on your system, since it prevents Python’s garbage collector from removing any previously computed results. You can control how many results are kept in memory with the configuration option InteractiveShell.cache_size. If you set it to 0, output caching is disabled. You can also use the %reset and %xdel magics to clear large items from memory"
@gocen: yes. 6340*200*200 doubles *64 bit/double / (8 bit/byte) / (2^30 bytes per GB) = 1.889 GB
I get a "dictionary changed size during iteration" when running the code above. Here is a modification exporting to a list first: import sys def sizeof_fmt(num, suffix='B'): ... (truncated due char limit) local_vars = list(locals().items()) variables = [(var, (sys.getsizeof(obj))) for var, obj in local_vars] variables = sorted(((var, size_value) for var, size_value in variables), key= lambda x: -x[1]) variables = [(var, sizeof_fmt(size_value)) for var, size_value in variables] for var, size_fmt in variables[:10]: print("{:>30}: {:>8}".format(var, size_fmt))
|
75

You can iterate over both the key and value of a dictionary using .items()

from __future__ import print_function  # for Python2
import sys

local_vars = list(locals().items())
for var, obj in local_vars:
    print(var, sys.getsizeof(obj))

3 Comments

This resulted in RuntimeError: dictionary changed size during iteration
Use local_vars = list(locals().items()) to avoid RuntimeError.
for someone wondering, what's the unit of size: Its Bytes. sys.getsizeof().
1

Extending on previous answers, collapsing all annoying system's variables into one bucket, adding recursive compute of a random object size (not just its pointer), sorting for the largest object first, and finally summarizing the whole:

import sys
def get_real_size(obj):
    """Recursively calculate the real memory size of an object."""
    size = sys.getsizeof(obj)
    if isinstance(obj, (list, tuple, set, frozenset)):
        size += sum(get_real_size(item) for item in obj)
    elif isinstance(obj, dict):
        size += sum(get_real_size(key) + get_real_size(value) for key, value in obj.items())
    return size

def get_memory_usage():
    # Separate user-defined and system variables
    user_vars = {k: v for k, v in globals().items() if not k.startswith('_') and not callable(v)}
    system_vars = {k: v for k, v in globals().items() if k.startswith('_') and not callable(v)}

    # Calculate memory usage using custom get_real_size function
    memory_usage = {k: get_real_size(v) for k, v in user_vars.items()}
    system_memory = sum(get_real_size(v) for v in system_vars.values())

    # Convert bytes to human-readable format
    def sizeof_fmt(num, suffix='B'):
        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
            if abs(num) < 1024.0:
                return f"{num:3.1f}{unit}{suffix}"
            num /= 1024.0
        return f"{num:.1f}Yi{suffix}"

    # Format memory usage
    formatted_usage = {k: sizeof_fmt(v) for k, v in memory_usage.items()}
    formatted_usage['_system_vars'] = sizeof_fmt(system_memory)

    # Sort by size in descending order and extract values
    sorted_sizes = sorted(formatted_usage.items(), key=lambda x: get_real_size(globals().get(x[0], 0)), reverse=True)

    # Print the sorted list
    for var, size in sorted_sizes:
        print("{:>30}: {:>8}".format(var, size))

    # Print totals
    print("\nTotal Memory Usage:")
    print(f"User Variables: {sizeof_fmt(sum(memory_usage.values()))}")
    print(f"System Variables: {sizeof_fmt(system_memory)}")
    print(f"Combined Total:     {sizeof_fmt(system_memory+sum(memory_usage.values()))}")

Simply call

get_memory_usage()

Example:

                     Times:  887.5MB
                         t:  156.4KB
                        In:   13.3KB
                     tones:    1.5KB
                        EX:    1.5KB
                    extraS:    1.5KB
                       Out:   752.0B
                        ap:    72.0B
                    pickle:    72.0B
                       plt:    72.0B
                  warnings:    72.0B
                        np:    72.0B
                    signal:    72.0B
                        cm:    72.0B
                   asizeof:    72.0B
                       sys:    72.0B
                    pprint:    72.0B
                      pool:    48.0B
                        AP:    48.0B
                  wish_cpu:    28.0B
                        Fs:    28.0B
                        dt:    24.0B
                        tl:    24.0B
                toneLength:    24.0B
              _system_vars:   32.0KB

Total Memory Usage:
User Variables: 887.6MB
System Variables: 32.0KB
Combined Total: 887.7MB

Comments

0

I found that for some containers I wasn't getting the correct answer (overheads only?).
Combining @jan_Glx's ans above to a snippet from the post below
How to know bytes size of python object like arrays and dictionaries? - The simple way

from __future__ import print_function
from sys import getsizeof, stderr, getsizeof
from itertools import chain
from collections import deque
try:
    from reprlib import repr
except ImportError:
    pass

def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def total_size(o, handlers={}, verbose=False):
    """ Returns the approximate memory footprint an object and all of its contents.

    Automatically finds the contents of the following builtin containers and
    their subclasses:  tuple, list, deque, dict, set and frozenset.
    To search other containers, add handlers to iterate over their contents:

        handlers = {SomeContainerClass: iter,
                    OtherContainerClass: OtherContainerClass.get_elements}

    """
    dict_handler = lambda d: chain.from_iterable(d.items())
    all_handlers = {tuple: iter,
                    list: iter,
                    deque: iter,
                    dict: dict_handler,
                    set: iter,
                    frozenset: iter,
                   }
    all_handlers.update(handlers)     # user handlers take precedence
    seen = set()                      # track which object id's have already been seen
    default_size = getsizeof(0)       # estimate sizeof object without __sizeof__

    def sizeof(o):
        if id(o) in seen:       # do not double count the same object
            return 0
        seen.add(id(o))
        s = getsizeof(o, default_size)

        if verbose:
            print(s, type(o), repr(o), file=stderr)

        for typ, handler in all_handlers.items():
            if isinstance(o, typ):
                s += sum(map(sizeof, handler(o)))
                break
        return s

    return sizeof(o)


##### Example call #####

for name, size in sorted(((name, total_size(value, verbose=False)) for name, value in list(
                          locals().items())), key= lambda x: -x[1])[:20]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

    

Comments

0

I made some more edits based on dinatrina's answer, and incorporating the not-double-counting from Dara O h's.

This includes special handling for pandas DataFrames. It prints out the types of objects that aren't handled, in case one of them is a container and you're missing big chunks of memory.

import sys
try:
    import pandas as pd
    has_pandas = True
except ImportError:
    has_pandas = False

types_seen = set()

def get_real_size(obj, ids_seen):
    """Recursively calculate the real memory size of an object."""
    if obj is types_seen or id(obj) in ids_seen:
        return 0, ids_seen

    ids_seen = ids_seen | {id(obj)}

    if has_pandas and isinstance(obj, pd.DataFrame):
        size = obj.memory_usage(deep=True).sum()
        return size, ids_seen

    size = sys.getsizeof(obj)
    
    if isinstance(obj, (list, tuple, set, frozenset)):
        for item in obj:
            item_size, ids_seen = get_real_size(item, ids_seen)
            size += item_size
    elif isinstance(obj, dict):
        for key, value in obj.items():
            key_size, ids_seen = get_real_size(key, ids_seen)
            value_size, ids_seen = get_real_size(value, ids_seen)
            size += key_size + value_size
    else:
        obj_type = type(obj)
        if obj_type not in types_seen:
            print(f'Type not handled: {obj_type}')
            types_seen.add(obj_type)
    return size, ids_seen

def get_memory_usage():
    import inspect
    # Separate user-defined and system variables
    user_vars = {k: v for k, v in globals().items() if not k.startswith('_') and not callable(v) and not inspect.ismodule(v)}
    system_vars = {k: v for k, v in globals().items() if k.startswith('_') and not callable(v) and not inspect.ismodule(v)}

    ids_seen = frozenset()

    memory_usage = {}
    for k, v in user_vars.items():
        size, ids_seen = get_real_size(v, ids_seen)
        memory_usage[k] = size

    system_memory = 0
    for v in system_vars.values():
        size, ids_seen = get_real_size(v, ids_seen)
        system_memory += size

    # Convert bytes to human-readable format
    def sizeof_fmt(num, suffix='B'):
        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
            if abs(num) < 1024.0:
                return f"{num:3.1f}{unit}{suffix}"
            num /= 1024.0
        return f"{num:.1f}Yi{suffix}"

    # Format memory usage
    memory_usage['_system_vars'] = system_memory
    memory_usage_items = sorted(memory_usage.items(), key=lambda x: x[1], reverse=True)
    formatted_usage = [(k, sizeof_fmt(v)) for k, v in memory_usage_items]

    # Print the sorted list
    for var, size in formatted_usage:
        print("{:>30}: {:>8}".format(var, size))

    # Print totals
    print("\nTotal Memory Usage:")
    print(f"User Variables: {sizeof_fmt(sum(memory_usage.values()))}")
    print(f"System Variables: {sizeof_fmt(system_memory)}")
    print(f"Combined Total:     {sizeof_fmt(system_memory+sum(memory_usage.values()))}")

get_memory_usage()

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.