|
25 | 25 | #ifndef DUCKDB_BUFFERPOOL_H |
26 | 26 | #define DUCKDB_BUFFERPOOL_H |
27 | 27 |
|
28 | | -#include <iostream> |
29 | | -#include <vector> |
| 28 | +#include "exception/InvalidArgumentException.h" |
| 29 | +#include "physical/BufferPool/Bitmap.h" |
| 30 | +#include "physical/BufferPool/BufferPoolEntry.h" |
30 | 31 | #include "physical/natives/ByteBuffer.h" |
31 | | -#include <memory> |
32 | 32 | #include "physical/natives/DirectIoLib.h" |
33 | | -#include "exception/InvalidArgumentException.h" |
34 | 33 | #include "utils/ColumnSizeCSVReader.h" |
| 34 | +#include <cstdio> |
| 35 | +#include <iostream> |
35 | 36 | #include <map> |
36 | | - |
37 | | -// when allocating buffer pool, we use the size of the first pxl file. Consider that |
38 | | -// the remaining pxl file has larger size than the first file, we allocate some extra |
39 | | -// size (10MB) to each column. |
| 37 | +#include <memory> |
| 38 | +#include <mutex> |
| 39 | +#include <thread> |
| 40 | +#include <vector> |
| 41 | +// when allocating buffer pool, we use the size of the first pxl file. Consider |
| 42 | +// that the remaining pxl file has larger size than the first file, we allocate |
| 43 | +// some extra size (10MB) to each column. |
40 | 44 | // TODO: how to evaluate the maximal pool size |
41 | | -#define EXTRA_POOL_SIZE 3*1024*1024 |
| 45 | +#define EXTRA_POOL_SIZE 10 * 1024 * 1024 |
42 | 46 |
|
43 | 47 | class DirectUringRandomAccessFile; |
| 48 | + |
44 | 49 | // This class is global class. The variable is shared by each thread |
45 | 50 | class BufferPool |
46 | 51 | { |
47 | 52 | public: |
48 | | - static void |
49 | | - Initialize(std::vector <uint32_t> colIds, std::vector <uint64_t> bytes, std::vector <std::string> columnNames); |
50 | 53 |
|
51 | | - static std::shared_ptr <ByteBuffer> GetBuffer(uint32_t colId); |
| 54 | + class BufferPoolManagedEntry |
| 55 | + { |
| 56 | + public: |
| 57 | + enum class State |
| 58 | + { |
| 59 | + InitizaledNotAllocated, |
| 60 | + AllocatedAndInUse, |
| 61 | + UselessButNotFree |
| 62 | + }; |
| 63 | + |
| 64 | + private: |
| 65 | + std::shared_ptr<BufferPoolEntry> bufferPoolEntry; |
| 66 | + int ringIndex; |
| 67 | + size_t currentSize; |
| 68 | + int offset; |
| 69 | + State state; |
| 70 | + |
| 71 | + public: |
| 72 | + BufferPoolManagedEntry(std::shared_ptr<BufferPoolEntry> entry, int ringIdx, |
| 73 | + size_t currSize, off_t off) |
| 74 | + : bufferPoolEntry(std::move(entry)), ringIndex(ringIdx), |
| 75 | + currentSize(currSize), offset(off), |
| 76 | + state(State::InitizaledNotAllocated) |
| 77 | + { |
| 78 | + } |
| 79 | + |
| 80 | + std::shared_ptr<BufferPoolEntry> getBufferPoolEntry() const |
| 81 | + { |
| 82 | + return bufferPoolEntry; |
| 83 | + } |
| 84 | + |
| 85 | + int getRingIndex() const |
| 86 | + { |
| 87 | + return ringIndex; |
| 88 | + } |
| 89 | + |
| 90 | + void setRingIndex(int index) |
| 91 | + { |
| 92 | + ringIndex = index; |
| 93 | + } |
| 94 | + |
| 95 | + size_t getCurrentSize() const |
| 96 | + { |
| 97 | + return currentSize; |
| 98 | + } |
| 99 | + |
| 100 | + void setCurrentSize(size_t size) |
| 101 | + { |
| 102 | + currentSize = size; |
| 103 | + } |
| 104 | + |
| 105 | + int getOffset() const |
| 106 | + { |
| 107 | + return offset; |
| 108 | + } |
52 | 109 |
|
53 | | - static int64_t GetBufferId(uint32_t index); |
| 110 | + void setOffset(int off) |
| 111 | + { |
| 112 | + offset = off; |
| 113 | + } |
| 114 | + |
| 115 | + State getStatus() const |
| 116 | + { |
| 117 | + return state; |
| 118 | + } |
| 119 | + |
| 120 | + void setStatus(State newStatus) |
| 121 | + { |
| 122 | + state = newStatus; |
| 123 | + } |
| 124 | + }; |
| 125 | + |
| 126 | + static void Initialize(std::vector<uint32_t> colIds, |
| 127 | + std::vector<uint64_t> bytes, |
| 128 | + std::vector<std::string> columnNames); |
| 129 | + |
| 130 | + static void InitializeBuffers(); |
| 131 | + |
| 132 | + static std::shared_ptr<ByteBuffer> GetBuffer(uint32_t colId, uint64_t byte, |
| 133 | + std::string columnName); |
| 134 | + |
| 135 | + static int64_t GetBufferId(); |
54 | 136 |
|
55 | 137 | static void Switch(); |
56 | 138 |
|
57 | 139 | static void Reset(); |
58 | 140 |
|
| 141 | + static std::shared_ptr<BufferPoolEntry> AddNewBuffer(size_t size); |
| 142 | + |
| 143 | + static int getRingIndex(uint32_t colId); |
| 144 | + |
| 145 | + static std::shared_ptr<ByteBuffer> AllocateNewBuffer( |
| 146 | + std::shared_ptr<BufferPoolManagedEntry> currentBufferManagedEntry, |
| 147 | + uint32_t colId, uint64_t byte, std::string columnName); |
| 148 | + |
| 149 | + static std::shared_ptr<ByteBuffer> ReusePreviousBuffer( |
| 150 | + std::shared_ptr<BufferPoolManagedEntry> currentBufferManagedEntry, |
| 151 | + uint32_t colId, uint64_t byte, std::string columnName); |
| 152 | + |
| 153 | + static void PrintStats() |
| 154 | + { |
| 155 | + // Get the ID of the current thread |
| 156 | + std::thread::id tid = std::this_thread::get_id(); |
| 157 | + |
| 158 | + // Print global buffer usage: used size / free size |
| 159 | + // Convert thread ID to integer for readability using hash |
| 160 | + printf("Thread %zu -> Global buffer usage: %ld / %ld\n", |
| 161 | + std::hash<std::thread::id>{}(tid), globalUsedSize, |
| 162 | + globalFreeSize); |
| 163 | + |
| 164 | + // Print thread-local statistics for Buffer0 |
| 165 | + printf("Thread %zu -> Buffer0 usage: %zu, Buffer count: %d\n", |
| 166 | + std::hash<std::thread::id>{}(tid), threadLocalUsedSize[0], |
| 167 | + threadLocalBufferCount[0]); |
| 168 | + |
| 169 | + // Print thread-local statistics for Buffer1 |
| 170 | + printf("Thread %zu -> Buffer1 usage: %zu, Buffer count: %d\n", |
| 171 | + std::hash<std::thread::id>{}(tid), threadLocalUsedSize[1], |
| 172 | + threadLocalBufferCount[1]); |
| 173 | + } |
59 | 174 | private: |
60 | 175 | BufferPool() = default; |
| 176 | + // global |
| 177 | + static std::mutex bufferPoolMutex; |
61 | 178 |
|
62 | | - static thread_local int colCount; |
63 | | - static thread_local std::map<uint32_t, uint64_t> |
64 | | - nrBytes; |
| 179 | + // thread local |
65 | 180 | static thread_local bool isInitialized; |
66 | | - static thread_local std::map<uint32_t, std::shared_ptr < ByteBuffer>> |
67 | | - buffers[2]; |
68 | | - static std::shared_ptr <DirectIoLib> directIoLib; |
| 181 | + static thread_local std::vector<std::shared_ptr<BufferPoolEntry>> |
| 182 | + registeredBuffers[2]; |
| 183 | + static thread_local long globalUsedSize; |
| 184 | + static thread_local long globalFreeSize; |
| 185 | + static thread_local std::shared_ptr<DirectIoLib> directIoLib; |
| 186 | + static thread_local int nextRingIndex; |
| 187 | + static thread_local std::shared_ptr<BufferPoolEntry> |
| 188 | + nextEmptyBufferPoolEntry[2]; |
| 189 | + static thread_local int colCount; |
69 | 190 | static thread_local int currBufferIdx; |
70 | 191 | static thread_local int nextBufferIdx; |
| 192 | + static thread_local std::map<uint32_t, std::shared_ptr<ByteBuffer>> |
| 193 | + buffersAllocated[2]; |
71 | 194 | friend class DirectUringRandomAccessFile; |
| 195 | + |
| 196 | + static thread_local std::unordered_map< |
| 197 | + uint32_t, std::shared_ptr<BufferPoolManagedEntry>> |
| 198 | + ringBufferMap[2]; |
| 199 | + |
| 200 | + static thread_local size_t threadLocalUsedSize[2]; |
| 201 | + static thread_local int threadLocalBufferCount[2]; |
72 | 202 | }; |
73 | 203 | #endif // DUCKDB_BUFFERPOOL_H |
0 commit comments