I have an existing DataFrame that looks like this:
import pandas as pd
import numpy as np
d = pd.DataFrame({"x": ["a", "b", "c"],
"y": [7, 8, 9],
"value": [np.array([2, 3]), np.array([3, 4, 5]), np.array([4, 5, 6])]},
index=[0, 0, 0])
d
#> x y value
#> 0 a 7 [2, 3]
#> 0 b 8 [3, 4, 5]
#> 0 c 9 [4, 5, 6]
Now let's say I want to append a 9 to the value where x=="b". I can create the replacement array simply enough:
ix = d['x'] == "b"
np.append(d.loc[ix, "value"].iloc[0], 9)
#> array([3, 4, 5, 9])
But the most obvious solutions for inserting into the DataFrame don't seem to work:
d[ix, 'value'] = np.append(d.loc[ix, "value"].iloc[0], 9)
#> Traceback (most recent call last):
#> File "<stdin>", line 1, in <module>
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3163, in __setitem__
#> self._set_item(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3242, in _set_item
#> value = self._sanitize_column(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3899, in _sanitize_column
#> value = sanitize_index(value, self.index)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/internals/construction.py", line 752, in sanitize_index
#> "Length of values "
#> ValueError: Length of values (4) does not match length of index (3)
d.loc[ix, 'value'] = np.append(d.loc[ix, "value"].iloc[0], 9)
#> Traceback (most recent call last):
#> File "<stdin>", line 1, in <module>
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/indexing.py", line 692, in __setitem__
#> iloc._setitem_with_indexer(indexer, value, self.name)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/indexing.py", line 1635, in _setitem_with_indexer
#> self._setitem_with_indexer_split_path(indexer, value, name)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/indexing.py", line 1689, in _setitem_with_indexer_split_path
#> "Must have equal len keys and value "
#> ValueError: Must have equal len keys and value when setting with an iterable
d.loc[ix]['value'] = np.append(d.loc[ix, "value"].iloc[0], 9)
#> Traceback (most recent call last):
#> File "<stdin>", line 1, in <module>
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3163, in __setitem__
#> self._set_item(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3242, in _set_item
#> value = self._sanitize_column(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3899, in _sanitize_column
#> value = sanitize_index(value, self.index)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/internals/construction.py", line 752, in sanitize_index
#> "Length of values "
#> ValueError: Length of values (4) does not match length of index (1)
# No error, but doesn't have any effect either:
d.loc[ix, 'value'].iloc[0] = np.append(d.loc[ix, "value"].iloc[0], 9)
d
#> x y value
#> 0 a 7 [2, 3]
#> 0 b 8 [3, 4, 5]
#> 0 c 9 [4, 5, 6]
The only thing I've figured out is to extract the row, modify the row, and then stick the row back in:
row = d.loc[ix]
# Seems like it should work, but doesn't:
row['value'] = np.append(d.loc[ix, "value"].iloc[0], 9)
#> Traceback (most recent call last):
#> File "<stdin>", line 1, in <module>
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3163, in __setitem__
#> self._set_item(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3242, in _set_item
#> value = self._sanitize_column(key, value)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3899, in _sanitize_column
#> value = sanitize_index(value, self.index)
#> File "/private/tmp/venv/lib/python3.7/site-packages/pandas/core/internals/construction.py", line 752, in sanitize_index
#> "Length of values "
#> ValueError: Length of values (4) does not match length of index (1)
# Wrap it in a list for no good reason I can figure out - I get a warning, but hey, it works...
row['value'] = [np.append(d.loc[ix, "value"].iloc[0], 9)]
#> __main__:1: SettingWithCopyWarning:
#> A value is trying to be set on a copy of a slice from a DataFrame.
#> Try using .loc[row_indexer,col_indexer] = value instead
#>
#> See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
d.loc[ix] = row
d
#> x y value
#> 0 a 7 [2, 3]
#> 0 b 8 [3, 4, 5, 9]
#> 0 c 9 [4, 5, 6]
What's a better way?
Here are my versions:
sys.version
#> '3.7.3 (default, Mar 27 2019, 16:54:48) \n[Clang 4.0.1 (tags/RELEASE_401/final)]'
pd.__version__
#> '1.2.3'
np.__version__
#> '1.20.2'