Skip to content

Commit 1074e9f

Browse files
committed
Support for empty InsertIds when inserting rows.
1 parent 2af8fc2 commit 1074e9f

File tree

5 files changed

+77
-24
lines changed

5 files changed

+77
-24
lines changed

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.IntegrationTests/InsertTest.cs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@
1313
// limitations under the License.
1414

1515
using Google.Cloud.ClientTesting;
16-
using Google.Cloud.Storage.V1;
1716
using System;
1817
using System.Collections.Generic;
1918
using System.Linq;
20-
using System.Reflection;
21-
using System.Threading;
2219
using Xunit;
2320

2421
namespace Google.Cloud.BigQuery.V2.IntegrationTests
@@ -70,6 +67,30 @@ public void InsertRows()
7067
Assert.Contains(rowsAfter, r => (string)r["player"] == "Lisa");
7168
}
7269

70+
[Fact]
71+
public void InsertRows_AllowEmptyInsertIds()
72+
{
73+
var client = BigQueryClient.Create(_fixture.ProjectId);
74+
var dataset = client.GetDataset(_fixture.DatasetId);
75+
var table = dataset.GetTable(_fixture.HighScoreTableId);
76+
var options = new InsertOptions { AllowEmptyInsertIds = true };
77+
78+
var rows = new[]
79+
{
80+
BuildRow("Helen", 125, new DateTime(2012, 5, 22, 1, 20, 30, DateTimeKind.Utc)),
81+
BuildRow("Henry", 90, new DateTime(2011, 10, 12, 0, 0, 0, DateTimeKind.Utc))
82+
};
83+
84+
_fixture.InsertAndWait(table, () => table.InsertRows(rows, options), 2);
85+
86+
Assert.Null(rows[0].InsertId);
87+
Assert.Null(rows[1].InsertId);
88+
89+
var rowsAfter = table.ListRows().ToList();
90+
Assert.Contains(rowsAfter, r => (string)r["player"] == "Helen");
91+
Assert.Contains(rowsAfter, r => (string)r["player"] == "Henry");
92+
}
93+
7394
public static IEnumerable<object[]> BadDataThrowsOptions
7495
{
7596
get

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.Tests/BigQueryInsertRowTest.cs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ private void AssertInvalidValueWithListCheck<T>(string name, T value)
4242
var list = new List<T>();
4343
var row = new BigQueryInsertRow { { name, list } };
4444
list.Add(value);
45-
Assert.Throws<InvalidOperationException>(() => row.ToRowsData());
45+
Assert.Throws<InvalidOperationException>(() => row.ToRowsData(false));
4646
}
4747

4848
private void AssertInvalid(string name, object value)
@@ -54,7 +54,7 @@ private void AssertInvalid(string name, object value)
5454
}
5555

5656
[Fact]
57-
public void ToRowsData_NoSpecifiedInsertId()
57+
public void ToRowsData_NoSpecifiedInsertId_NoEmptyInsertIdAllowed()
5858
{
5959
var row = new BigQueryInsertRow
6060
{
@@ -63,22 +63,40 @@ public void ToRowsData_NoSpecifiedInsertId()
6363
};
6464
Assert.Equal("value1", row["field1"]);
6565
row["field3"] = 2;
66-
var rowData = row.ToRowsData();
66+
var rowData = row.ToRowsData(false);
6767
Assert.Equal("value1", rowData.Json["field1"]);
6868
Assert.Null(rowData.Json["field2"]);
6969
Assert.Equal(2, rowData.Json["field3"]);
7070
// The insert ID should be populated automatically if not supplied by the user.
7171
Assert.NotNull(rowData.InsertId);
7272
}
7373

74+
[Fact]
75+
public void ToRowsData_NoSpecifiedInsertId_EmptyInsertIdAllowed()
76+
{
77+
var row = new BigQueryInsertRow
78+
{
79+
{ "field1", "value1" },
80+
{ "field2", null }
81+
};
82+
Assert.Equal("value1", row["field1"]);
83+
row["field3"] = 2;
84+
var rowData = row.ToRowsData(true);
85+
Assert.Equal("value1", rowData.Json["field1"]);
86+
Assert.Null(rowData.Json["field2"]);
87+
Assert.Equal(2, rowData.Json["field3"]);
88+
// The insert ID won't be populated automatically.
89+
Assert.Null(rowData.InsertId);
90+
}
91+
7492
[Fact]
7593
public void ToRowsData_WithInsertId()
7694
{
7795
var row = new BigQueryInsertRow("my-id")
7896
{
7997
{ "field1", "value1" },
8098
};
81-
var rowData = row.ToRowsData();
99+
var rowData = row.ToRowsData(false);
82100
Assert.Equal("value1", rowData.Json["field1"]);
83101
Assert.Equal("my-id", rowData.InsertId);
84102
}
@@ -115,7 +133,7 @@ public void SupportedValueTypes_Passthrough(Type type)
115133
{
116134
object value = Activator.CreateInstance(type);
117135
var row = new BigQueryInsertRow { { "field", value } };
118-
var rowData = row.ToRowsData();
136+
var rowData = row.ToRowsData(false);
119137
Assert.Equal(value, rowData.Json["field"]);
120138
}
121139

@@ -124,7 +142,7 @@ public void Numeric_Json()
124142
{
125143
object value = BigQueryNumeric.Parse("123.456");
126144
var row = new BigQueryInsertRow { { "field", value } };
127-
var rowData = row.ToRowsData();
145+
var rowData = row.ToRowsData(false);
128146
Assert.Equal("123.456", rowData.Json["field"]);
129147
}
130148

@@ -136,7 +154,7 @@ public void DateTimeOffsetFormatting()
136154
// 3am UTC
137155
{ "field", new DateTimeOffset(2000, 1, 1, 5, 0, 0, TimeSpan.FromHours(2)) },
138156
};
139-
var rowData = row.ToRowsData();
157+
var rowData = row.ToRowsData(false);
140158
Assert.Equal("2000-01-01T03:00:00Z", rowData.Json["field"]);
141159
}
142160

@@ -160,7 +178,7 @@ public void UnspecifiedDateTimeFormatting()
160178
{
161179
{ "field", new DateTime(2000, 1, 1, 5, 0, 0, DateTimeKind.Unspecified) },
162180
};
163-
var rowData = row.ToRowsData();
181+
var rowData = row.ToRowsData(false);
164182
Assert.Equal("2000-01-01T05:00:00", rowData.Json["field"]);
165183
}
166184

@@ -171,7 +189,7 @@ public void UtcDateTimeFormatting()
171189
{
172190
{ "field", new DateTime(2000, 1, 1, 5, 0, 0, DateTimeKind.Utc) },
173191
};
174-
var rowData = row.ToRowsData();
192+
var rowData = row.ToRowsData(false);
175193
Assert.Equal("2000-01-01T05:00:00Z", rowData.Json["field"]);
176194
}
177195

@@ -182,7 +200,7 @@ public void TimespanFormatting()
182200
{
183201
{ "field", new TimeSpan(1, 2, 3) },
184202
};
185-
var rowData = row.ToRowsData();
203+
var rowData = row.ToRowsData(false);
186204
Assert.Equal("01:02:03", rowData.Json["field"]);
187205
}
188206

@@ -191,7 +209,7 @@ public void NestedRecordFormatting()
191209
{
192210
var nested = new BigQueryInsertRow { { "inner", "value" } };
193211
var outer = new BigQueryInsertRow { { "outer", nested } };
194-
var rowData = outer.ToRowsData();
212+
var rowData = outer.ToRowsData(false);
195213
var obj = (IDictionary<string, object>)rowData.Json["outer"];
196214
Assert.Equal("value", obj["inner"]);
197215
}
@@ -200,15 +218,15 @@ public void NestedRecordFormatting()
200218
public void RepeatedValue()
201219
{
202220
var row = new BigQueryInsertRow { { "numbers", new[] { 1, 2 } } };
203-
var rowData = row.ToRowsData();
221+
var rowData = row.ToRowsData(false);
204222
Assert.Equal(new object[] { 1, 2 }, rowData.Json["numbers"]);
205223
}
206224

207225
[Fact]
208226
public void RepeatedValue_NullRejectedOnConversion()
209227
{
210228
var row = new BigQueryInsertRow { { "names", new[] { "a", null, "b" } } };
211-
Assert.Throws<InvalidOperationException>(() => row.ToRowsData());
229+
Assert.Throws<InvalidOperationException>(() => row.ToRowsData(false));
212230
}
213231

214232
[Fact]

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/BigQueryClientImpl.InsertData.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ private InsertAllRequest CreateInsertAllRequest(TableReference tableReference, I
324324
var insertRows = rows.Select(row =>
325325
{
326326
GaxPreconditions.CheckArgument(row != null, nameof(rows), "Entries must not be null");
327-
return row.ToRowsData();
327+
return row.ToRowsData(options?.AllowEmptyInsertIds ?? false);
328328
}).ToList();
329329
var body = new TableDataInsertAllRequest
330330
{
@@ -334,7 +334,10 @@ private InsertAllRequest CreateInsertAllRequest(TableReference tableReference, I
334334
hasRows = body.Rows.Any();
335335
options?.ModifyRequest(body);
336336
var request = Service.Tabledata.InsertAll(body, tableReference.ProjectId, tableReference.DatasetId, tableReference.TableId);
337-
// We ensure that every row has an insert ID, so we can always retry.
337+
// Even though empty InsertIds might be allowed, this can be retried as per guidance from
338+
// the API team. Previous de-duplicating was on a best effort basis anyways and client code
339+
// needs to explicitly allow for empty InsertId and should be aware that doing so will be at
340+
// the expense of de-duplication efforts.
338341
RetryHandler.MarkAsRetriable(request);
339342
return request;
340343
}

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/BigQueryInsertRow.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,11 @@ public sealed class BigQueryInsertRow : IEnumerable
101101
/// To help ensure data consistency, you can supply an <see cref="InsertId" /> for each inserted row.
102102
/// BigQuery remembers this ID for at least one minute. If you try to stream the same set of rows within
103103
/// that time period and the insertId property is set, BigQuery uses the property to de-duplicate
104-
/// your data on a best effort basis. If no ID is specified, one will be generated to allow all
105-
/// insert operations to be retried.
104+
/// your data on a best effort basis. By default if no ID is specified, one will be generated to allow
105+
/// de-duplicating efforts if insert operations need to be retried.
106+
/// You can allow empty <see cref="InsertId"/> by setting <see cref="InsertOptions.AllowEmptyInsertIds"/> to true.
107+
/// This will allow for faster row inserts at the expense of possible record duplication if the operation needs to be retried.
108+
/// See https://cloud.google.com/bigquery/quotas#streaming_inserts for more information.
106109
/// </summary>
107110
public string InsertId { get; set; }
108111

@@ -184,12 +187,10 @@ public object this[string name]
184187
/// <summary>
185188
/// Converts an insert row into the API representation.
186189
/// </summary>
187-
internal TableDataInsertAllRequest.RowsData ToRowsData()
190+
internal TableDataInsertAllRequest.RowsData ToRowsData(bool allowEmptyInsertIds)
188191
=> new TableDataInsertAllRequest.RowsData
189192
{
190-
// Always provide an insert ID. If this logic is changed, the CreateInsertAllRequest() method
191-
// will need to be changed, as we now assume every insert has an ID.
192-
InsertId = InsertId ?? Guid.NewGuid().ToString(),
193+
InsertId = allowEmptyInsertIds ? InsertId : InsertId ?? Guid.NewGuid().ToString(),
193194
Json = GetJsonValues()
194195
};
195196

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/InsertOptions.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ public sealed class InsertOptions
5353
/// </summary>
5454
public string TemplateSuffix { get; set; }
5555

56+
/// <summary>
57+
/// When true allows <see cref="BigQueryInsertRow.InsertId"/> to be
58+
/// unspecified. This in turns allows for faster inserts, at the expense
59+
/// of possible record duplication if the operation needs to be retried.
60+
/// See https://cloud.google.com/bigquery/quotas#streaming_inserts for
61+
/// more information.
62+
/// The default value is false.
63+
/// </summary>
64+
public bool AllowEmptyInsertIds { get; set; }
65+
5666
internal void ModifyRequest(TableDataInsertAllRequest body)
5767
{
5868
if (AllowUnknownFields != null)

0 commit comments

Comments
 (0)