From 6f7dc4a44da0784c89aaaafe96a869c2c00dd7b7 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:20:03 +0300
Subject: [PATCH 1/6] (improvement) metadata: add lightweight _RowView and
 _row_factory

Introduce _RowView, a __slots__-based read-only row wrapper that stores
data as tuples with a shared column-name-to-index map, and _row_factory
that creates these views. _RowView inherits from collections.abc.Mapping,
providing a complete dict-like read interface.

This eliminates per-row dict allocation during schema parsing. All rows
from the same result set share a single index map object.
---
 cassandra/metadata.py | 50 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/cassandra/metadata.py b/cassandra/metadata.py
index 43399b7152..b49b47d0c2 100644
--- a/cassandra/metadata.py
+++ b/cassandra/metadata.py
@@ -46,6 +46,56 @@
 from cassandra.tablets import Tablets
 from cassandra.util import maybe_add_timeout_to_query
 
+
+class _RowView(Mapping):
+    """
+    Lightweight read-only view over a row tuple, supporting dict-like access.
+    Shares a single index map across all rows from the same result set,
+    avoiding per-row dict allocation overhead.
+
+    Implements the :class:`collections.abc.Mapping` protocol, providing
+    ``__getitem__``, ``__iter__``, ``__len__``, ``get``, ``keys``,
+    ``values``, ``items``, and ``__contains__`` for free.
+    """
+
+    __slots__ = ("_row", "_index_map")
+
+    def __init__(self, row, index_map):
+        self._row = row
+        self._index_map = index_map
+
+    def __getitem__(self, key):
+        return self._row[self._index_map[key]]
+
+    def __iter__(self):
+        return iter(self._index_map)
+
+    def __len__(self):
+        return len(self._index_map)
+
+    def get(self, key, default=None):
+        idx = self._index_map.get(key)
+        if idx is not None:
+            return self._row[idx]
+        return default
+
+    def __contains__(self, key):
+        return key in self._index_map
+
+    def __repr__(self):
+        return repr({k: self._row[i] for k, i in self._index_map.items()})
+
+
+def _row_factory(colnames, rows):
+    """
+    Lightweight replacement for dict_factory used internally by schema parsers.
+    Returns a list of _RowView objects that support row["key"] and row.get("key")
+    but store data as tuples with a shared column-name-to-index map.
+    """
+    index_map = {name: i for i, name in enumerate(colnames)}
+    return [_RowView(row, index_map) for row in rows]
+
+
 log = logging.getLogger(__name__)
 
 cql_keywords = set((

From 4a8d055f240036a2eaa67a7753da0cb6ca1a905b Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:20:38 +0300
Subject: [PATCH 2/6] (improvement) metadata: replace OrderedDict with dict

Python 3.7+ guarantees dict preserves insertion order, making OrderedDict
unnecessary. Replace OrderedDict() with {} in TableMetadata.columns,
TableMetadata.triggers, and MaterializedViewMetadata.columns. Remove the
now-unused OrderedDict import.
---
 cassandra/metadata.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cassandra/metadata.py b/cassandra/metadata.py
index b49b47d0c2..1c34a24988 100644
--- a/cassandra/metadata.py
+++ b/cassandra/metadata.py
@@ -40,7 +40,7 @@
 from cassandra.marshal import varint_unpack
 from cassandra.protocol import QueryMessage
 from cassandra.query import dict_factory, bind_params
-from cassandra.util import OrderedDict, Version
+from cassandra.util import Version
 from cassandra.pool import HostDistance
 from cassandra.connection import EndPoint
 from cassandra.tablets import Tablets
@@ -1380,11 +1380,11 @@ def __init__(self, keyspace_name, name, partition_key=None, clustering_key=None,
         self.name = name
         self.partition_key = [] if partition_key is None else partition_key
         self.clustering_key = [] if clustering_key is None else clustering_key
-        self.columns = OrderedDict() if columns is None else columns
+        self.columns = {} if columns is None else columns
         self.indexes = {}
         self.options = {} if options is None else options
         self.comparator = None
-        self.triggers = OrderedDict() if triggers is None else triggers
+        self.triggers = {} if triggers is None else triggers
         self.views = {}
         self.virtual = virtual
 
@@ -2796,7 +2796,7 @@ def _build_table_columns(self, meta, col_rows, compact_static=False, is_dense=Fa
             partition_rows = sorted(partition_rows, key=lambda row: row.get('position'))
         for r in partition_rows:
             # we have to add meta here (and not in the later loop) because TableMetadata.columns is an
-            # OrderedDict, and it assumes keys are inserted first, in order, when exporting CQL
+            # dict (ordered since Python 3.7), and it assumes keys are inserted first, in order, when exporting CQL
             column_meta = self._build_column_metadata(meta, r)
             meta.columns[column_meta.name] = column_meta
             meta.partition_key.append(meta.columns[r.get('column_name')])
@@ -3378,7 +3378,7 @@ def __init__(self, keyspace_name, view_name, base_table_name, include_all_column
         self.base_table_name = base_table_name
         self.partition_key = []
         self.clustering_key = []
-        self.columns = OrderedDict()
+        self.columns = {}
         self.include_all_columns = include_all_columns
         self.where_clause = where_clause
         self.options = options or {}

From d5f0ae0efc1a118bcd6c40f56a13bb2a511d2ab7 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:21:14 +0300
Subject: [PATCH 3/6] (improvement) metadata: select only needed columns from
 system_schema.columns

Replace SELECT * with an explicit column list for the system_schema.columns
query in SchemaParserV3 (inherited by V4). Only the 7 columns actually
consumed by the parser are fetched: keyspace_name, table_name, column_name,
clustering_order, kind, position, type. This reduces network transfer and
deserialization overhead during schema refresh.
---
 cassandra/metadata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cassandra/metadata.py b/cassandra/metadata.py
index 1c34a24988..24b9901c06 100644
--- a/cassandra/metadata.py
+++ b/cassandra/metadata.py
@@ -2619,7 +2619,10 @@ class SchemaParserV3(SchemaParserV22):
     """
     _SELECT_KEYSPACES = "SELECT * FROM system_schema.keyspaces"
     _SELECT_TABLES = "SELECT * FROM system_schema.tables"
-    _SELECT_COLUMNS = "SELECT * FROM system_schema.columns"
+    # Only fetch the columns used by _build_column_metadata / _build_table_columns.
+    # If _build_column_metadata or _build_table_columns needs more columns, this query
+    # should be updated accordingly.
+    _SELECT_COLUMNS = "SELECT keyspace_name, table_name, column_name, clustering_order, kind, position, type FROM system_schema.columns"
     _SELECT_INDEXES = "SELECT * FROM system_schema.indexes"
     _SELECT_TRIGGERS = "SELECT * FROM system_schema.triggers"
     _SELECT_TYPES = "SELECT * FROM system_schema.types"

From b23c7ae19d20d73182bf8e92c750e030f9107611 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:22:15 +0300
Subject: [PATCH 4/6] (improvement) metadata: replace dict_factory with
 lightweight _RowView

Replace dict_factory in _SchemaParser._handle_results and
get_column_from_system_local with _row_factory, eliminating per-row
dict allocation during schema parsing.

Also refactor SchemaParserV4._build_keyspace_metadata_internal to read
from the row without mutating it, since _RowView is read-only.

Note: V22-only dict_factory call sites are left unchanged as they do not
affect the V3/V4 code path (V3 and V4 fully override _query_all).
---
 cassandra/metadata.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/cassandra/metadata.py b/cassandra/metadata.py
index 24b9901c06..195a3fa50e 100644
--- a/cassandra/metadata.py
+++ b/cassandra/metadata.py
@@ -2057,7 +2057,7 @@ def get_next_pages():
                         yield next_result.parsed_rows
 
                 result.parsed_rows += itertools.chain(*get_next_pages())
-            return dict_factory(result.column_names, result.parsed_rows) if result else []
+            return _row_factory(result.column_names, result.parsed_rows) if result else []
         else:
             raise result
 
@@ -3109,11 +3109,12 @@ def get_all_keyspaces(self):
 
     @staticmethod
     def _build_keyspace_metadata_internal(row):
-        # necessary fields that aren't int virtual ks
-        row["durable_writes"] = row.get("durable_writes", None)
-        row["replication"] = row.get("replication", {})
-        row["replication"]["class"] = row["replication"].get("class", None)
-        return super(SchemaParserV4, SchemaParserV4)._build_keyspace_metadata_internal(row)
+        # Read without mutating the row, since _RowView is read-only
+        name = row["keyspace_name"]
+        durable_writes = row.get("durable_writes", None)
+        replication = dict(row.get("replication")) if 'replication' in row else {}
+        replication_class = replication.pop("class") if 'class' in replication else None
+        return KeyspaceMetadata(name, durable_writes, replication_class, replication)
 
 
 class SchemaParserDSE67(SchemaParserV4):
@@ -3517,7 +3518,7 @@ def get_column_from_system_local(connection, column_name: str, timeout, metadata
         , timeout=timeout, fail_on_error=False)
     if not success or not local_result.parsed_rows:
         return ""
-    local_rows = dict_factory(local_result.column_names, local_result.parsed_rows)
+    local_rows = _row_factory(local_result.column_names, local_result.parsed_rows)
     local_row = local_rows[0]
     return local_row.get(column_name)
 

From a38bbfe7e30b450e49b5adbac1369e303b6b28e2 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:26:17 +0300
Subject: [PATCH 5/6] (improvement) metadata: single-pass _build_table_columns

Replace three list comprehension passes over col_rows with a single
classification loop that sorts columns into partition, clustering, and
other buckets. Also use in-place sort() instead of sorted() and reuse
the already-built column_meta instead of a redundant dict lookup.
---
 cassandra/metadata.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/cassandra/metadata.py b/cassandra/metadata.py
index 195a3fa50e..f6ad7f55b5 100644
--- a/cassandra/metadata.py
+++ b/cassandra/metadata.py
@@ -2792,31 +2792,40 @@ def _build_table_options(self, row):
         return dict((o, row.get(o)) for o in self.recognized_table_options if o in row)
 
     def _build_table_columns(self, meta, col_rows, compact_static=False, is_dense=False, virtual=False):
-        # partition key
-        partition_rows = [r for r in col_rows
-                          if r.get('kind', None) == "partition_key"]
+        # Single-pass classification of column rows by kind
+        partition_rows = []
+        clustering_rows = []
+        other_rows = []
+        for r in col_rows:
+            kind = r.get('kind', None)
+            if kind == "partition_key":
+                partition_rows.append(r)
+            elif kind == "clustering":
+                if not compact_static:
+                    clustering_rows.append(r)
+                # else: skip clustering rows entirely for compact_static tables
+            else:
+                other_rows.append(r)
+
+        # partition key - must be inserted first into meta.columns for CQL export ordering
         if len(partition_rows) > 1:
-            partition_rows = sorted(partition_rows, key=lambda row: row.get('position'))
+            partition_rows.sort(key=lambda row: row.get('position'))
         for r in partition_rows:
-            # we have to add meta here (and not in the later loop) because TableMetadata.columns is an
-            # dict (ordered since Python 3.7), and it assumes keys are inserted first, in order, when exporting CQL
             column_meta = self._build_column_metadata(meta, r)
             meta.columns[column_meta.name] = column_meta
-            meta.partition_key.append(meta.columns[r.get('column_name')])
+            meta.partition_key.append(column_meta)
 
         # clustering key
-        if not compact_static:
-            clustering_rows = [r for r in col_rows
-                               if r.get('kind', None) == "clustering"]
+        if clustering_rows:
             if len(clustering_rows) > 1:
-                clustering_rows = sorted(clustering_rows, key=lambda row: row.get('position'))
+                clustering_rows.sort(key=lambda row: row.get('position'))
             for r in clustering_rows:
                 column_meta = self._build_column_metadata(meta, r)
                 meta.columns[column_meta.name] = column_meta
-                meta.clustering_key.append(meta.columns[r.get('column_name')])
+                meta.clustering_key.append(column_meta)
 
-        for col_row in (r for r in col_rows
-                        if r.get('kind', None) not in ('partition_key', 'clustering')):
+        # remaining columns (static, regular, etc.)
+        for col_row in other_rows:
             column_meta = self._build_column_metadata(meta, col_row)
             if is_dense and column_meta.cql_type == types.cql_empty_type:
                 continue

From eda10866497186efe97b15ead968a1a76a40adb1 Mon Sep 17 00:00:00 2001
From: Yaniv Michael Kaul <yaniv.kaul@scylladb.com>
Date: Fri, 3 Apr 2026 18:27:18 +0300
Subject: [PATCH 6/6] tests: add _RowView and _row_factory unit tests

Cover __getitem__, get(), __contains__, __repr__, shared index map,
read-only enforcement, empty input, single-column, and multi-row
scenarios.
---
 tests/unit/test_metadata.py | 66 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py
index dcbb840447..6d33ccec93 100644
--- a/tests/unit/test_metadata.py
+++ b/tests/unit/test_metadata.py
@@ -32,7 +32,8 @@
                                 _UnknownStrategy, ColumnMetadata, TableMetadata,
                                 IndexMetadata, Function, Aggregate,
                                 Metadata, TokenMap, ReplicationFactor,
-                                SchemaParserDSE68)
+                                SchemaParserDSE68,
+                                _RowView, _row_factory)
 from cassandra.policies import SimpleConvictionPolicy
 from cassandra.pool import Host
 from cassandra.protocol import QueryMessage
@@ -846,3 +847,66 @@ def test_strip_frozen(self):
         for argument, expected_result in argument_to_expected_results:
             result = strip_frozen(argument)
             assert result == expected_result, "strip_frozen() arg: {}".format(argument)
+
+class RowViewTest(unittest.TestCase):
+    """Tests for the internal _RowView and _row_factory helpers."""
+
+    def test_getitem(self):
+        rv = _RowView(("a_val", "b_val"), {"a": 0, "b": 1})
+        self.assertEqual(rv["a"], "a_val")
+        self.assertEqual(rv["b"], "b_val")
+
+    def test_getitem_missing_key(self):
+        rv = _RowView(("a_val",), {"a": 0})
+        with self.assertRaises(KeyError):
+            rv["missing"]
+
+    def test_get_present(self):
+        rv = _RowView(("a_val", "b_val"), {"a": 0, "b": 1})
+        self.assertEqual(rv.get("a"), "a_val")
+        self.assertEqual(rv.get("b"), "b_val")
+
+    def test_get_missing_returns_default(self):
+        rv = _RowView(("a_val",), {"a": 0})
+        self.assertIsNone(rv.get("missing"))
+        self.assertEqual(rv.get("missing", 42), 42)
+
+    def test_contains(self):
+        rv = _RowView(("a_val",), {"a": 0})
+        self.assertIn("a", rv)
+        self.assertNotIn("b", rv)
+
+    def test_repr(self):
+        rv = _RowView(("a_val", "b_val"), {"a": 0, "b": 1})
+        r = repr(rv)
+        self.assertIn("'a'", r)
+        self.assertIn("'a_val'", r)
+
+    def test_shared_index_map(self):
+        """All _RowView objects from the same _row_factory call share one index map."""
+        rows = _row_factory(["x", "y"], [("x1", "y1"), ("x2", "y2")])
+        self.assertIs(rows[0]._index_map, rows[1]._index_map)
+
+    def test_read_only(self):
+        """_RowView must not allow item assignment or deletion."""
+        rv = _RowView(("val",), {"col": 0})
+        with self.assertRaises(TypeError):
+            rv["col"] = "new"
+        with self.assertRaises(TypeError):
+            del rv["col"]
+
+    def test_row_factory_empty(self):
+        result = _row_factory(["a", "b"], [])
+        self.assertEqual(result, [])
+
+    def test_row_factory_single_column(self):
+        rows = _row_factory(["only"], [("v1",), ("v2",)])
+        self.assertEqual(rows[0]["only"], "v1")
+        self.assertEqual(rows[1]["only"], "v2")
+
+    def test_row_factory_values(self):
+        rows = _row_factory(["id", "name"], [(1, "alice"), (2, "bob")])
+        self.assertEqual(rows[0]["id"], 1)
+        self.assertEqual(rows[0]["name"], "alice")
+        self.assertEqual(rows[1]["id"], 2)
+        self.assertEqual(rows[1]["name"], "bob")