Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Also accept any iterables for row_ids
  • Loading branch information
plamut committed Jul 1, 2021
commit 3940498a30306b9312de5541e9894441242ceae9
27 changes: 22 additions & 5 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3350,7 +3350,7 @@ def insert_rows_json(
self,
table: Union[Table, TableReference, str],
json_rows: Sequence[Dict],
row_ids: Union[Sequence[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID,
row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID,
skip_invalid_rows: bool = None,
ignore_unknown_values: bool = None,
template_suffix: str = None,
Expand All @@ -3372,15 +3372,19 @@ def insert_rows_json(
json_rows (Sequence[Dict]):
Row data to be inserted. Keys must match the table schema fields
and values must be JSON-compatible representations.
row_ids (Union[Sequence[str], AutoRowIDs, None]):
row_ids (Union[Iterable[str], AutoRowIDs, None]):
Unique IDs, one per row being inserted. An ID can also be
``None``, indicating that an explicit insert ID should **not**
be used for that row. If the argument is omitted altogether,
unique IDs are created automatically.

.. versionchanged:: 2.21.0
Can also be an iterable, not just a sequence, or an
:class:`AutoRowIDs` enum member.

.. deprecated:: 2.21.0
Passing ``None`` to explicitly request autogenerating insert IDs is
deprecated, use :attr:`.AutoRowIDs.GENERATE_UUID` instead.
deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead.

skip_invalid_rows (Optional[bool]):
Insert all valid rows of a request, even if invalid rows exist.
Expand Down Expand Up @@ -3429,15 +3433,28 @@ def insert_rows_json(
)
row_ids = AutoRowIDs.GENERATE_UUID

for index, row in enumerate(json_rows):
if not isinstance(row_ids, AutoRowIDs):
try:
row_ids_iter = iter(row_ids)
except TypeError:
msg = "row_ids is neither an iterable nor an AutoRowIDs enum member"
raise TypeError(msg)

for i, row in enumerate(json_rows):
info = {"json": row}

if row_ids is AutoRowIDs.GENERATE_UUID:
info["insertId"] = str(uuid.uuid4())
elif row_ids is AutoRowIDs.DISABLED:
info["insertId"] = None
else:
info["insertId"] = row_ids[index]
try:
insert_id = next(row_ids_iter)
except StopIteration:
msg = f"row_ids did not generate enough IDs, error at index {i}"
raise ValueError(msg)
else:
info["insertId"] = insert_id

rows_info.append(info)

Expand Down
49 changes: 42 additions & 7 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5544,29 +5544,64 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self):
timeout=None,
)

def test_insert_rows_json_with_string_id(self):
rows = [{"col1": "val1"}]
def test_insert_rows_json_with_iterator_row_ids(self):
rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}]
creds = _make_credentials()
http = object()
client = self._make_one(
project="default-project", credentials=creds, _http=http
)
conn = client._connection = make_connection({})

with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))):
errors = client.insert_rows_json("proj.dset.tbl", rows)
row_ids_iter = map(str, itertools.count(42))
errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=row_ids_iter)

self.assertEqual(len(errors), 0)
expected = {
"rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)]
expected_row_data = {
"rows": [
{"json": {"col1": "val1"}, "insertId": "42"},
{"json": {"col2": "val2"}, "insertId": "43"},
{"json": {"col3": "val3"}, "insertId": "44"},
]
}
conn.api_request.assert_called_once_with(
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected,
data=expected_row_data,
timeout=None,
)

def test_insert_rows_json_with_non_iterable_row_ids(self):
rows = [{"col1": "val1"}]
creds = _make_credentials()
http = object()
client = self._make_one(
project="default-project", credentials=creds, _http=http
)
client._connection = make_connection({})

with self.assertRaises(TypeError) as exc:
client.insert_rows_json("proj.dset.tbl", rows, row_ids=object())

err_msg = str(exc.exception)
self.assertIn("row_ids", err_msg)
self.assertIn("iterable", err_msg)

def test_insert_rows_json_with_too_few_row_ids(self):
rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}]
creds = _make_credentials()
http = object()
client = self._make_one(
project="default-project", credentials=creds, _http=http
)
client._connection = make_connection({})

insert_ids = ["10", "20"]

error_msg_pattern = "row_ids did not generate enough IDs.*index 2"
with self.assertRaisesRegex(ValueError, error_msg_pattern):
client.insert_rows_json("proj.dset.tbl", rows, row_ids=insert_ids)

def test_insert_rows_json_w_explicit_none_insert_ids(self):
rows = [{"col1": "val1"}, {"col2": "val2"}]
creds = _make_credentials()
Expand Down