|
| 1 | +using HLL.NET.Hashing; |
| 2 | +using HLL.NET.Models; |
| 3 | +using System; |
| 4 | +using System.Text; |
| 5 | + |
| 6 | +namespace HLL.NET.Serialization |
| 7 | +{ |
| 8 | + |
| 9 | + //docs/serialization_format.md |
| 10 | + internal static class HllSerializer |
| 11 | + { |
| 12 | + private const byte FormatVersion = 2; |
| 13 | + |
| 14 | + public static byte[] Serialize<T>(HyperLogLog<T> hll) |
| 15 | + { |
| 16 | + var typeNameBytes = EncodeTypeName(typeof(T)); |
| 17 | + byte typeNameLength = (byte)typeNameBytes.Length; |
| 18 | + |
| 19 | + var data = new byte[1 + 1 + typeNameLength + 1 + hll.Registers.Length]; |
| 20 | + int index = 0; |
| 21 | + |
| 22 | + data[index++] = FormatVersion; |
| 23 | + data[index++] = typeNameLength; |
| 24 | + Array.Copy(typeNameBytes, 0, data, index, typeNameLength); |
| 25 | + index += typeNameLength; |
| 26 | + |
| 27 | + data[index++] = (byte)hll.Precision.Value; |
| 28 | + WriteRegisters(hll, data, ref index); |
| 29 | + |
| 30 | + return data; |
| 31 | + } |
| 32 | + |
| 33 | + public static HyperLogLog<T> Deserialize<T>(byte[] data, IHasher<T> hasher) |
| 34 | + { |
| 35 | + if (data == null || data.Length < 4) |
| 36 | + throw new ArgumentException("Invalid serialized data"); |
| 37 | + |
| 38 | + int index = 0; |
| 39 | + |
| 40 | + ValidateVersion(data[index++]); |
| 41 | + |
| 42 | + var typeName = ReadTypeName(data, ref index); |
| 43 | + EnsureCorrectType<T>(typeName); |
| 44 | + |
| 45 | + var precision = new HllPrecision(data[index++]); |
| 46 | + var hll = new HyperLogLog<T>(precision, hasher); |
| 47 | + |
| 48 | + ReadRegisters(hll, data, ref index); |
| 49 | + |
| 50 | + return hll; |
| 51 | + } |
| 52 | + |
| 53 | + |
| 54 | + private static byte[] EncodeTypeName(Type type) => |
| 55 | + Encoding.UTF8.GetBytes(type.FullName); |
| 56 | + |
| 57 | + private static void WriteRegisters<T>(HyperLogLog<T> hll, byte[] data, ref int index) |
| 58 | + { |
| 59 | + foreach (var reg in hll.Registers) |
| 60 | + data[index++] = reg.Value; |
| 61 | + } |
| 62 | + |
| 63 | + private static void ReadRegisters<T>(HyperLogLog<T> hll, byte[] data, ref int index) |
| 64 | + { |
| 65 | + for (int i = 0; i < hll.Registers.Length; i++) |
| 66 | + hll.Registers[i] = new HllRegister(data[index++]); |
| 67 | + } |
| 68 | + |
| 69 | + private static void ValidateVersion(byte version) |
| 70 | + { |
| 71 | + if (version != FormatVersion) |
| 72 | + throw new NotSupportedException($"Unsupported HLL format version: {version}"); |
| 73 | + } |
| 74 | + |
| 75 | + private static string ReadTypeName(byte[] data, ref int index) |
| 76 | + { |
| 77 | + var length = data[index++]; |
| 78 | + var typeName = Encoding.UTF8.GetString(data, index, length); |
| 79 | + index += length; |
| 80 | + return typeName; |
| 81 | + } |
| 82 | + |
| 83 | + private static void EnsureCorrectType<T>(string serializedTypeName) |
| 84 | + { |
| 85 | + var expected = typeof(T).FullName; |
| 86 | + if (serializedTypeName != expected) |
| 87 | + throw new InvalidOperationException($"Type mismatch. Serialized for '{serializedTypeName}', but deserializing as '{expected}'."); |
| 88 | + } |
| 89 | + } |
| 90 | +} |
0 commit comments