From e8bca03f745ea80b2b59b2d306be898ed10867cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 14 Apr 2026 12:14:15 +0200 Subject: [PATCH 1/6] CLN: remove unused code --- larray/core/group.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/larray/core/group.py b/larray/core/group.py index fa9accfa2..35830a2c3 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -334,10 +334,6 @@ def _can_have_groups(seq) -> bool: return _is_object_array(seq) or isinstance(seq, (tuple, list)) -def _contain_group_ticks(ticks) -> bool: - return _can_have_groups(ticks) and any(isinstance(tick, Group) for tick in ticks) - - def _seq_group_to_name(seq) -> Sequence[Any]: if _can_have_groups(seq): return [v.name if isinstance(v, Group) else v for v in seq] From d9b2d55bc2dae700de5c541315daed68e9f5e0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 14 Apr 2026 12:29:45 +0200 Subject: [PATCH 2/6] FIX: translating array keys on mixed type labels Axis (fixes #1194) --- doc/source/changes/version_0_35_1.rst.inc | 4 ++++ larray/core/axis.py | 7 ++++++- larray/tests/test_axis.py | 19 ++++++++++++++----- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/source/changes/version_0_35_1.rst.inc b/doc/source/changes/version_0_35_1.rst.inc index 6e1b67d54..de5c5b115 100644 --- a/doc/source/changes/version_0_35_1.rst.inc +++ b/doc/source/changes/version_0_35_1.rst.inc @@ -70,3 +70,7 @@ Fixes (closes :issue:`1166`). * fixed `sequence()` when both `inc` and `mult` are defined. + +* fixed taking the subset of an Axis with mixed type labels (or of an Array + using such an axis) when specifying the labels as a numpy array + (closes :issue:`1194`). \ No newline at end of file diff --git a/larray/core/axis.py b/larray/core/axis.py index ed1c52b8f..ce37014d4 100644 --- a/larray/core/axis.py +++ b/larray/core/axis.py @@ -244,6 +244,8 @@ def _mapping(self) -> Dict[Scalar, int]: def _update_key_values(self) -> Tuple[np.ndarray, np.ndarray]: mapping = self._mapping + assert self.dtype.kind != 'O', ("Axis with object dtype should not use " + "the sorted_keys/values code path") if mapping: sorted_keys, sorted_values = tuple(zip(*sorted(mapping.items()))) else: @@ -1100,7 +1102,10 @@ def index(self, key) -> Union[int, np.ndarray, slice]: # stop is inclusive in the input key and exclusive in the output ! stop = mapping[key.stop] + 1 if key.stop is not None else None return slice(start, stop, key.step) - elif isinstance(key, (tuple, list, OrderedSet)): + elif (isinstance(key, (tuple, list, OrderedSet)) or + # object axis labels can contain mixed-types and those are not + # supported by the array_lookup2 code path + (isinstance(key, np.ndarray) and self.dtype.kind == 'O')): # TODO: the result should be cached # Note that this is faster than array_lookup(np.array(key), mapping) res = np.empty(len(key), int) diff --git a/larray/tests/test_axis.py b/larray/tests/test_axis.py index f023486c4..a813e58fe 100644 --- a/larray/tests/test_axis.py +++ b/larray/tests/test_axis.py @@ -122,11 +122,7 @@ def test_getitem(): def test_index(): - # an axis with labels having the object dtype - a = Axis(np.array(["a0", "a1"], dtype=object), 'a') - assert a.index('a1') == 1 - assert a.index('a1 >> A1') == 1 - + # a normal axis time = Axis([2007, 2009], 'time') res = time.index(time.i[1]) assert res == 1 @@ -137,6 +133,19 @@ def test_index(): res = time.index('time.i[1]') assert res == 1 + # an axis with labels having the object dtype (but homogeneous types) + a = Axis(np.array(["a0", "a1"], dtype=object), 'a') + assert a.index('a1') == 1 + assert a.index('a1 >> A1') == 1 + + # an axis with labels having the object dtype and mixed types + a = Axis(np.array(["a0", 1], dtype=object), 'a') + assert a.index('a0') == 0 + assert a.index(['a0']) == [0] + assert a.index(1) == 1 + # issue #1194 + assert a.index(np.array(['a0'])) == [0] + def test_astype(): arr = ndtest(Axis('time=2015..2020,total')).drop('total') From 58625647cf73c3f70171bc3e8c68bfebb7f342d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 14 Apr 2026 12:35:49 +0200 Subject: [PATCH 3/6] DOC: fix type hint --- larray/core/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larray/core/group.py b/larray/core/group.py index 35830a2c3..abfe79a9b 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -383,7 +383,7 @@ def _to_tick(v) -> Scalar: return str(v) -def _to_ticks(s, parse_single_int=False) -> Iterable[Scalar]: +def _to_ticks(s, parse_single_int=False) -> np.ndarray: r""" Make a (list of) value(s) usable as the collection of labels for an Axis (ie hashable). From 7d5ea69e1ebcc09753a4f3f87ee37cd87cd6b91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 14 Apr 2026 14:57:17 +0200 Subject: [PATCH 4/6] FIX: Pandas Dataframes with mixed-type indexes come out as mixed-type labels axis (fixes #1193) --- larray/inout/pandas.py | 53 ++++++++++++++++++++++++++++---------- larray/tests/test_array.py | 11 ++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 6f7c257f1..fd13981e1 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -39,27 +39,51 @@ def parse(s): return s -def index_to_labels(idx, sort=True): # -> list[np.ndarray]: +def simple_index_to_labels(idx: pd.Index, sort=True, keep_object=True) -> np.ndarray: + r""" + Return unique labels for a simple index as a numpy array + + keep_object is an option which shouldn't exist (it should be always True) + It is a bug to use keep_object=False, but I introduced the option on purpose + to avoid breaking our users existing code in a bug-fix release (see #1193). + """ + if sort: + dtype = 'O' if keep_object and idx.dtype.kind == 'O' else None + # this will fail for mixed-type labels (as does np.sort(idx.to_numpy())) + labels = np.asarray(sorted(idx.to_list()), dtype=dtype) + else: + if keep_object: + # this is NOT the same as idx.to_numpy() (which we should always + # use) because it converts mixed str-numbers object indexes to a + # single str type. + labels = idx.to_numpy() + else: + labels = np.asarray(idx.to_list()) + return labels + + +def index_to_labels(idx, sort=True, keep_object=True): # -> list[np.ndarray]: r""" Return unique labels for each dimension as a list of numpy arrays + + keep_object means that object dtype indexes will be returned as object + dtype arrays, even if they contain only strings or numbers (see #1193). """ if isinstance(idx, pd.MultiIndex): if sort: - return [level.to_numpy() for level in idx.levels] + # idx.levels is a FrozenList of Index objects (which are already + # sorted) + return [simple_index_to_labels(idx_for_level, sort=False, + keep_object=keep_object) + for idx_for_level in idx.levels] else: # requires Pandas >= 0.23 (and it does NOT sort the values) - return [idx.unique(level_num).to_numpy() + return [simple_index_to_labels(idx.unique(level_num), sort=False, + keep_object=keep_object) for level_num in range(idx.nlevels)] else: assert isinstance(idx, pd.Index) - if sort: - # TODO: we should probably only pass via Python when the dtype is - # object - # For object arrays, it is often faster to sort the labels - # in Python than to use np.sort, which is very slow in that case - return [np.asarray(sorted(idx.to_list()))] - else: - return [idx.to_numpy()] + return [simple_index_to_labels(idx, sort=sort, keep_object=keep_object)] def product_index(idx, sort=False): @@ -330,8 +354,11 @@ def from_frame(df, if sort_rows or sort_columns: raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. ' 'Please call the method sort_labels on the returned array to sort rows or columns') - index_labels = index_to_labels(df.index, sort=False) - column_labels = index_to_labels(df.columns, sort=False) + # keep_object=False is an intentional bug to avoid breaking backwards + # compatibility (see issue #1193) + index_labels = index_to_labels(df.index, sort=False, keep_object=False) + column_labels = index_to_labels(df.columns, sort=False, + keep_object=False) axes_labels = index_labels + column_labels # Pandas treats column labels as column names (strings) so we need to convert them to values diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 3886c41f2..4d22ac7b7 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -4852,6 +4852,17 @@ def test_from_frame(): assert_larray_equal(res, expected) assert res.data.flags.writeable + # test that mixed-type (str-number) multi index come out as a mixed type + # labels axis (see issue #1193) + df = pd.DataFrame([['s0', 'a', 0], ['s0', 1, 1]], + columns=['str', 'obj', 'value']).set_index(['str', 'obj']) + res = from_frame(df) + expected_axes = [Axis('str=s0'), + Axis(np.array(['a', 1], dtype=object), 'obj'), + Axis(['value'])] + expected = Array([[[0], [1]]], expected_axes) + assert_larray_equal(res, expected) + def test_asarray(): series = pd.Series([0, 1, 2], ['a0', 'a1', 'a2'], name='a') From 579bf89e4f4f024f7d7d8fca5e975c0c9ac3ea35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Tue, 14 Apr 2026 16:56:38 +0200 Subject: [PATCH 5/6] FIX: make sure Dataframes with a DatetimeIndex come out as str labels as before (fixes #1187) In fact, this is about reintroducing a bug I fixed by accident to avoid breaking users existing code in a bugfix release --- larray/inout/pandas.py | 4 ++++ larray/tests/test_array.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index fd13981e1..785ff1918 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -59,6 +59,10 @@ def simple_index_to_labels(idx: pd.Index, sort=True, keep_object=True) -> np.nda labels = idx.to_numpy() else: labels = np.asarray(idx.to_list()) + # this is a bug introduced on purpose to keep backward compatibility + # (see issue #1187) + if isinstance(idx, pd.DatetimeIndex): + labels = labels.astype(str) return labels diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 4d22ac7b7..343c6daa6 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -4134,6 +4134,8 @@ def test_read_excel_pandas(): def test_from_lists(): + from datetime import datetime + expected = ndtest((2, 2, 3)) # simple @@ -4200,6 +4202,15 @@ def test_from_lists(): ['c1', 'FO', 0, 0, 2]], sort_columns=True) assert_larray_equal(sorted_arr, expected) + # with datetime labels + res = from_lists([['str', 'date', 'value'], + ['abc', datetime.now(), 1]], + nb_axes=3) + # this is what we SHOULD return but we do not so far to avoid breaking + # backward compatibility (see issue #1187) + # assert res.axes[1].dtype == 'datetime64[ns]' + assert res.axes[1].dtype == ' Date: Tue, 14 Apr 2026 17:10:43 +0200 Subject: [PATCH 6/6] TEST: make the test for issue #1187 less picky as the exact length can vary by platform --- larray/tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 343c6daa6..8e841568d 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -4209,7 +4209,7 @@ def test_from_lists(): # this is what we SHOULD return but we do not so far to avoid breaking # backward compatibility (see issue #1187) # assert res.axes[1].dtype == 'datetime64[ns]' - assert res.axes[1].dtype == '