はじめに
Guid を主キーに使っているせいで、現在進行中でインデックスの断片化に悩まされている。
ソート可能な ULID を使えば、この悩みが軽減できたりするんだろうか。
気になったので実験してみた。
以下は実験に使ったサンプル
using System;
using System.Data.SqlClient;
using System.Threading.Tasks;
using System.Linq;
using Dapper;
namespace UlidVsGuid
{
class Program
{
const int Count = 1000;
const int DefaultIntervalMilliseconds = 10;
static readonly string MasterConnectionString = new SqlConnectionStringBuilder()
{
DataSource = "(local)",
InitialCatalog = "master",
IntegratedSecurity = true,
}.ToString();
static readonly string ConnectionString = new SqlConnectionStringBuilder()
{
DataSource = "(local)",
InitialCatalog = "ulid_vs_guid",
IntegratedSecurity = true,
}.ToString();
static async Task Main(string[] args)
{
DefaultTypeMap.MatchNamesWithUnderscores = true;
var intervalMilliseconds = int.TryParse(args.FirstOrDefault(), out var value)
? value
: DefaultIntervalMilliseconds;
await CleanUpAsync();
await SetUpAsync();
await Task.WhenAll(
UlidTestAsync(intervalMilliseconds),
GuidTestAsync(intervalMilliseconds),
UlidToStringTestAsync(intervalMilliseconds),
GuidToStringTestAsync(intervalMilliseconds));
await AverageFragmentationInPercentAsync();
Console.ReadLine();
}
static async Task CleanUpAsync()
{
using (var connection = new SqlConnection(MasterConnectionString))
{
await connection.OpenAsync();
await connection.ExecuteAsync(
@"IF (DB_ID('ulid_vs_guid') IS NOT NULL)
BEGIN
DROP DATABASE ulid_vs_guid
END");
}
}
static async Task SetUpAsync()
{
using (var connection = new SqlConnection(MasterConnectionString))
{
await connection.OpenAsync();
await connection.ExecuteAsync(
@"CREATE DATABASE [ulid_vs_guid];");
}
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
await connection.ExecuteAsync(
@"CREATE TABLE [dbo].[ulid_test] (
[id] [uniqueidentifier] NOT NULL,
[name] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_ulid_test] PRIMARY KEY CLUSTERED
(
[id] ASC
)
);
CREATE TABLE [dbo].[guid_test] (
[id] [uniqueidentifier] NOT NULL,
[name] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_guid_test] PRIMARY KEY CLUSTERED
(
[id] ASC
)
);
CREATE TABLE [dbo].[ulid_str_test] (
[id] [nvarchar](50) NOT NULL,
[name] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_ulid_str_test] PRIMARY KEY CLUSTERED
(
[id] ASC
)
);
CREATE TABLE [dbo].[guid_str_test] (
[id] [nvarchar](50) NOT NULL,
[name] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_guid_str_test] PRIMARY KEY CLUSTERED
(
[id] ASC
)
);
");
}
}
static async Task UlidTestAsync(int intervalMilliseconds)
{
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
for (var i = 0; i < Count; i++)
{
var id = new Guid(Ulid.NewUlid().ToByteArray());
var name = $"No.{i}";
await connection.ExecuteAsync(
@"INSERT INTO ulid_test
(id, name)
VALUES
(@id, @name)",
new
{
id,
name,
});
await Task.Delay(intervalMilliseconds);
}
}
}
static async Task GuidTestAsync(int intervalMilliseconds)
{
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
for (var i = 0; i < Count; i++)
{
var id = Guid.NewGuid();
var name = $"No.{i}";
await connection.ExecuteAsync(
@"INSERT INTO guid_test
(id, name)
VALUES
(@id, @name)",
new
{
id,
name,
});
await Task.Delay(intervalMilliseconds);
}
}
}
static async Task UlidToStringTestAsync(int intervalMilliseconds)
{
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
for (var i = 0; i < Count; i++)
{
var id = Ulid.NewUlid().ToString();
var name = $"No.{i}";
await connection.ExecuteAsync(
@"INSERT INTO ulid_str_test
(id, name)
VALUES
(@id, @name)",
new
{
id,
name,
});
await Task.Delay(intervalMilliseconds);
}
}
}
static async Task GuidToStringTestAsync(int intervalMilliseconds)
{
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
for (var i = 0; i < Count; i++)
{
var id = Guid.NewGuid().ToString();
var name = $"No.{i}";
await connection.ExecuteAsync(
@"INSERT INTO guid_str_test
(id, name)
VALUES
(@id, @name)",
new
{
id,
name,
});
await Task.Delay(intervalMilliseconds);
}
}
}
static async Task AverageFragmentationInPercentAsync()
{
using (var connection = new SqlConnection(ConnectionString))
{
await connection.OpenAsync();
var results = await connection.QueryAsync<Fragmentation>(
@"SELECT
t2.name AS table_name,
t3.name AS index_name,
t1.avg_fragmentation_in_percent
FROM
sys.dm_db_index_physical_stats(DB_ID(), NULL, NULL, NULL, 'LIMITED') AS t1
JOIN
sys.all_objects AS t2
ON t1.object_id = t2.object_id
JOIN
sys.indexes AS t3
ON t1.object_id = t3.object_id AND
t1.index_id = t3.index_id
WHERE
t2.is_ms_shipped = 0
ORDER BY
t1.avg_fragmentation_in_percent DESC");
foreach (var result in results)
{
Console.WriteLine(
$"{result.IndexName} = {result.AvgFragmentationInPercent}%");
}
}
}
}
class Fragmentation
{
public string TableName { get; set; }
public string IndexName { get; set; }
public double AvgFragmentationInPercent { get; set; }
}
}
下記の4パターンで、1000件登録したときのインデックスの断片化の具合を調べてみた。
- uniqueidentifier 型の主キー列に Guid を格納
- uniqueidentifier 型の主キー列に、Ulid を Guid に変換した値を格納
- nvarchar(50) 型の主キー列に、Guid を string に変換した値を格納
- nvarchar(50) 型の主キー列に、Ulid を string に変換した値を格納
1件登録するたびにインターバルをとっているが、その間隔も 1ms・10ms・100ms・1000ms の4パターンで試してみた。
実験結果は下記の通り
インターバル 1ms のとき
インターバル 10 ms のとき
インターバル 100ms のとき
インターバル 1000ms のとき
まとめ
Ulid は ToString して nvarchar 型の列に格納する必要がある。
互換性あるからといって、Guid に変換して uniqueidentifier 型の列に格納しても効果ない。