Skip to content

Commit 575c61a

Browse files
committed
Convert BigQuery field validation from a regex to hand-written code
This is entirely for the sake of performance. Benchmark results on my machine: netcoreapp2.0: - Short: 180ns => 9ns - Long: 939ns => 96ns netcoreapp3.1: - Short: 159ns => 8ns - Long: 887ns => 79ns net461: - Short: 197ns => 8ns - Long: 1425ns => 86ns Fixes #4975
1 parent 372df03 commit 575c61a

File tree

5 files changed

+67
-10
lines changed

5 files changed

+67
-10
lines changed

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.Benchmarks/Google.Cloud.BigQuery.V2.Benchmarks.csproj

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFrameworks>netcoreapp2.0;net461</TargetFrameworks>
5-
<TargetFrameworks Condition=" '$(OS)' != 'Windows_NT' ">netcoreapp2.0</TargetFrameworks>
4+
<TargetFrameworks>netcoreapp2.0;netcoreapp3.1;net461</TargetFrameworks>
65
<OutputType>Exe</OutputType>
76
<IsPackable>False</IsPackable>
87
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
98
<LangVersion>7.2</LangVersion>
109
</PropertyGroup>
1110

1211
<ItemGroup>
13-
<PackageReference Include="BenchmarkDotNet" Version="0.10.14" />
12+
<PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
1413
<ProjectReference Include="../Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.csproj" />
1514
</ItemGroup>
1615

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2020 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
using BenchmarkDotNet.Attributes;
16+
17+
namespace Google.Cloud.BigQuery.V2.Benchmarks
18+
{
19+
public class TableSchemaBuilderBenchmark
20+
{
21+
private const string ShortFieldName = "abc";
22+
private const string LongFieldName = "abcdefghijklmnopqrstuvwxyz1234567890_ABCDEFGHIJKLMNOPQRSTUVWXYZ";
23+
24+
[Benchmark]
25+
public void ValidateFieldName_Short() => TableSchemaBuilder.ValidateFieldName(ShortFieldName, "param");
26+
27+
[Benchmark]
28+
public void ValidateFieldName_Long() => TableSchemaBuilder.ValidateFieldName(LongFieldName, "param");
29+
}
30+
}

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/AssemblyInfo.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@
1313
// limitations under the License.
1414
using System.Runtime.CompilerServices;
1515

16-
[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Tests,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]
16+
[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Tests,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]
17+
[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Benchmarks,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/BigQueryNumeric.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public struct BigQueryNumeric : IEquatable<BigQueryNumeric>, IComparable<BigQuer
3636

3737
private static readonly BigInteger s_integerScalingFactor = new BigInteger(1_000_000_000L);
3838
// TODO: Don't require a 0 before the decimal point.
39+
// TODO: Replace with manual validation if we find this is a performance bottleneck (as it was with field name validation).
3940
private static readonly Regex s_validation = new Regex(@"^-?[0-9]+\.?[0-9]*$");
4041

4142
// Note: the following properties must be declared *after* s_maxValue and s_minValue. Initialization order matters.

apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2/TableSchemaBuilder.cs

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@ namespace Google.Cloud.BigQuery.V2
3131
/// </remarks>
3232
public sealed class TableSchemaBuilder : IEnumerable
3333
{
34-
// From BigQuery documentation:
35-
// The name must contain only letters (a-z, A-Z), numbers (0-9),
36-
// or underscores (_), and must start with a letter or underscore.
37-
// The maximum length is 128 characters.
38-
private static readonly Regex s_fieldNamePattern = new Regex("^[a-zA-Z_][a-zA-Z0-9_]{0,127}$");
3934
private readonly List<TableFieldSchema> _fields = new List<TableFieldSchema>();
4035

4136
/// <summary>
@@ -76,7 +71,38 @@ public void Add(string name, BigQueryDbType type, BigQueryFieldMode mode = BigQu
7671
internal static void ValidateFieldName(string name, string paramName)
7772
{
7873
GaxPreconditions.CheckNotNull(name, paramName);
79-
GaxPreconditions.CheckArgument(s_fieldNamePattern.IsMatch(name), paramName, "Invalid field name '{0}'", name);
74+
75+
GaxPreconditions.CheckArgument(IsValidFieldName(name), paramName, "Invalid field name '{0}'", name);
76+
}
77+
78+
// From BigQuery documentation:
79+
// The name must contain only letters (a-z, A-Z), numbers (0-9),
80+
// or underscores (_), and must start with a letter or underscore.
81+
// The maximum length is 128 characters.
82+
// This was originally a regular expression, but the manual code is very significantly faster.
83+
// (Roughly 10x faster with the benchmarks I've run.)
84+
private static bool IsValidFieldName(string name)
85+
{
86+
if (name.Length < 1 || name.Length > 128)
87+
{
88+
return false;
89+
}
90+
char first = name[0];
91+
bool validFirst = (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_';
92+
if (!validFirst)
93+
{
94+
return false;
95+
}
96+
for (int i = 1; i < name.Length; i++)
97+
{
98+
char c = name[i];
99+
bool valid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
100+
if (!valid)
101+
{
102+
return false;
103+
}
104+
}
105+
return true;
80106
}
81107

82108
/// <summary>

0 commit comments

Comments
 (0)