From bb443e4c7a02e0c0c371654711486247aec2863e Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 10:17:35 -0700 Subject: [PATCH 1/6] Ignore .vs folder for VS2015 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 16569ab..1e9a901 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ ###################### *.suo *.user +.vs # VS File # ########### From d02182507a8d8268fb3883ee862bd6c545d2a04b Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 10:17:45 -0700 Subject: [PATCH 2/6] Enable loading table columns in parallel --- .../Model/StorageAccountProperties.cs | 14 +++++++++ Madd0.AzureStorageDriver/SchemaBuilder.cs | 30 ++++++++++++------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs b/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs index 43d93cd..0fb3a5f 100644 --- a/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs +++ b/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs @@ -135,6 +135,20 @@ public int NumberOfRows set { this._driverData.SetElementValue("NumberOfRows", value); } } + /// + /// Returns the maximum number of parallel model loading + /// operations can occur when loading schema for the azure table storage tables + /// + public int ModelLoadMaxParallelism + { + get { return (int?)this._driverData.Element("ModelLoadMaxParallelism") ?? + (System.Environment.ProcessorCount * 2); } + set + { + this._driverData.SetElementValue("ModelLoadMaxParallelism", value); + } + } + /// /// Gets a instace for the current connection. /// diff --git a/Madd0.AzureStorageDriver/SchemaBuilder.cs b/Madd0.AzureStorageDriver/SchemaBuilder.cs index b6ec952..e7ad918 100644 --- a/Madd0.AzureStorageDriver/SchemaBuilder.cs +++ b/Madd0.AzureStorageDriver/SchemaBuilder.cs @@ -13,11 +13,13 @@ namespace Madd0.AzureStorageDriver using System.IO; using System.Linq; using System.Reflection; + using System.Threading.Tasks; using LINQPad.Extensibility.DataContext; using Madd0.AzureStorageDriver.Properties; using Microsoft.CSharp; using Microsoft.WindowsAzure.Storage.Table; + /// /// Provides the methods necessary to determining the storage account's schema and to building /// the typed data context . @@ -63,7 +65,7 @@ public static List GetSchemaAndBuildAssembly(StorageAccountPropert private static IEnumerable GetModel(StorageAccountProperties properties) { var tableClient = properties.GetStorageAccount().CreateCloudTableClient(); - + // First get a list of all tables var model = (from tableName in tableClient.ListTables() select new CloudTable @@ -71,18 +73,24 @@ private static IEnumerable GetModel(StorageAccountProperties propert Name = tableName.Name }).ToList(); - // Then go through them - foreach (var table in model) + var options = new ParallelOptions() + { + MaxDegreeOfParallelism = properties.ModelLoadMaxParallelism + }; + + Parallel.ForEach(model, options, table => { - var tableColumns = tableClient.GetTableReference(table.Name).ExecuteQuery(new TableQuery().Take(properties.NumberOfRows)) + var threadTableClient = properties.GetStorageAccount().CreateCloudTableClient(); + + var tableColumns = threadTableClient.GetTableReference(table.Name).ExecuteQuery(new TableQuery().Take(properties.NumberOfRows)) .SelectMany(row => row.Properties) .GroupBy(column => column.Key) .Select(grp => new TableColumn - { - Name = grp.Key, - TypeName = GetType(grp.First().Value.PropertyType) - }); - + { + Name = grp.Key, + TypeName = GetType(grp.First().Value.PropertyType) + }); + var baseColumns = new List { new TableColumn { Name = "PartitionKey", TypeName = GetType(EdmType.String) }, @@ -92,8 +100,8 @@ private static IEnumerable GetModel(StorageAccountProperties propert }; table.Columns = tableColumns.Concat(baseColumns).ToArray(); - } - + }); + return model; } From 181cedd765c3199d22154e08c61852c045a48b2d Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 16:01:04 -0700 Subject: [PATCH 3/6] Add setting in properties for parallel schema loading --- .../ConnectionDialog.xaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Madd0.AzureStorageDriver/ConnectionDialog.xaml b/Madd0.AzureStorageDriver/ConnectionDialog.xaml index 9de7b89..34959e8 100644 --- a/Madd0.AzureStorageDriver/ConnectionDialog.xaml +++ b/Madd0.AzureStorageDriver/ConnectionDialog.xaml @@ -80,6 +80,28 @@ Text="?" /> + + Schema loading parallelism: + + + + + The number of concurrent calls that will be made to Azure Table Storage to determine the columns available for each table. + + + + + + From cbbd62040dbb702be731c2c2bd0d3bd82ff6a3d8 Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 16:04:22 -0700 Subject: [PATCH 4/6] Up the connection limit set by ServicePointManager to speed up parallel schema loading --- Madd0.AzureStorageDriver/SchemaBuilder.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Madd0.AzureStorageDriver/SchemaBuilder.cs b/Madd0.AzureStorageDriver/SchemaBuilder.cs index e7ad918..a6413a9 100644 --- a/Madd0.AzureStorageDriver/SchemaBuilder.cs +++ b/Madd0.AzureStorageDriver/SchemaBuilder.cs @@ -12,6 +12,7 @@ namespace Madd0.AzureStorageDriver using System.Collections.Generic; using System.IO; using System.Linq; + using System.Net; using System.Reflection; using System.Threading.Tasks; using LINQPad.Extensibility.DataContext; @@ -64,6 +65,10 @@ public static List GetSchemaAndBuildAssembly(StorageAccountPropert /// storage model. private static IEnumerable GetModel(StorageAccountProperties properties) { + // make sure that we can make at least ModelLoadMaxParallelism concurrent + // cals to azure table storage + ServicePointManager.DefaultConnectionLimit = properties.ModelLoadMaxParallelism; + var tableClient = properties.GetStorageAccount().CreateCloudTableClient(); // First get a list of all tables From dc1ba0e5989ba41b244ec577c90103043a23a2e3 Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 16:06:15 -0700 Subject: [PATCH 5/6] Enable pruning of tables using date rollover format so that less schema data needs to be loaded --- .../ConnectionDialog.xaml | 22 ++++++++++++++ .../Model/StorageAccountProperties.cs | 16 +++++++++- Madd0.AzureStorageDriver/SchemaBuilder.cs | 30 +++++++++++++++++-- 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/Madd0.AzureStorageDriver/ConnectionDialog.xaml b/Madd0.AzureStorageDriver/ConnectionDialog.xaml index 34959e8..7283917 100644 --- a/Madd0.AzureStorageDriver/ConnectionDialog.xaml +++ b/Madd0.AzureStorageDriver/ConnectionDialog.xaml @@ -102,6 +102,28 @@ Text="?" /> + + Table rollover date format: + + + + + The date format that is appended to table names in table rollover. This is used to reduce the number of azure tables that need to be inspected to load schema information. + + + + + + diff --git a/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs b/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs index 0fb3a5f..1603037 100644 --- a/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs +++ b/Madd0.AzureStorageDriver/Model/StorageAccountProperties.cs @@ -148,7 +148,21 @@ public int ModelLoadMaxParallelism this._driverData.SetElementValue("ModelLoadMaxParallelism", value); } } - + + /// + /// A date format which is table names can potentially end in + /// for table rollover. This prevents loading duplicate schemas + /// for rollover tables + /// + public string TableRolloverDateFormat + { + get { return (string)this._driverData.Element("TableRolloverDateFormat") ?? "yyyyMMdd"; } + set + { + this._driverData.SetElementValue("TableRolloverDateFormat", value); + } + } + /// /// Gets a instace for the current connection. /// diff --git a/Madd0.AzureStorageDriver/SchemaBuilder.cs b/Madd0.AzureStorageDriver/SchemaBuilder.cs index a6413a9..de62d81 100644 --- a/Madd0.AzureStorageDriver/SchemaBuilder.cs +++ b/Madd0.AzureStorageDriver/SchemaBuilder.cs @@ -21,6 +21,7 @@ namespace Madd0.AzureStorageDriver using Microsoft.WindowsAzure.Storage.Table; + /// /// Provides the methods necessary to determining the storage account's schema and to building /// the typed data context . @@ -71,6 +72,8 @@ private static IEnumerable GetModel(StorageAccountProperties propert var tableClient = properties.GetStorageAccount().CreateCloudTableClient(); + string rolloverFormat = properties.TableRolloverDateFormat; + // First get a list of all tables var model = (from tableName in tableClient.ListTables() select new CloudTable @@ -78,16 +81,34 @@ private static IEnumerable GetModel(StorageAccountProperties propert Name = tableName.Name }).ToList(); + var schemas = model + .GroupBy(table => + { + string schemaName = table.Name; + DateTime rollover; + for (int i = table.Name.Length - 1; i > 0; i--) + { + string tail = schemaName.Substring(i); + if(DateTime.TryParseExact(tail, rolloverFormat, null, System.Globalization.DateTimeStyles.None, out rollover)) + { + schemaName = schemaName.Substring(0, i); + break; + } + } + return schemaName; + }) + .ToList(); + var options = new ParallelOptions() { MaxDegreeOfParallelism = properties.ModelLoadMaxParallelism }; - Parallel.ForEach(model, options, table => + Parallel.ForEach(schemas, options, group => { var threadTableClient = properties.GetStorageAccount().CreateCloudTableClient(); - var tableColumns = threadTableClient.GetTableReference(table.Name).ExecuteQuery(new TableQuery().Take(properties.NumberOfRows)) + var tableColumns = threadTableClient.GetTableReference(group.Last().Name).ExecuteQuery(new TableQuery().Take(properties.NumberOfRows)) .SelectMany(row => row.Properties) .GroupBy(column => column.Key) .Select(grp => new TableColumn @@ -104,7 +125,10 @@ private static IEnumerable GetModel(StorageAccountProperties propert new TableColumn { Name = "ETag", TypeName = GetType(EdmType.String) } }; - table.Columns = tableColumns.Concat(baseColumns).ToArray(); + foreach(var table in group) + { + table.Columns = tableColumns.Concat(baseColumns).ToArray(); + } }); return model; From f71b7fb077885f92c7c3433c587da0fe6ff0d56c Mon Sep 17 00:00:00 2001 From: Loren Van Spronsen Date: Sat, 28 Mar 2015 16:08:36 -0700 Subject: [PATCH 6/6] Formatting improvements for date format textbox --- Madd0.AzureStorageDriver/ConnectionDialog.xaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Madd0.AzureStorageDriver/ConnectionDialog.xaml b/Madd0.AzureStorageDriver/ConnectionDialog.xaml index 7283917..6286da3 100644 --- a/Madd0.AzureStorageDriver/ConnectionDialog.xaml +++ b/Madd0.AzureStorageDriver/ConnectionDialog.xaml @@ -104,10 +104,10 @@ Table rollover date format: - + TextAlignment="Left" />