- optimize memory usage

This commit is contained in:
2026-06-21 14:02:40 +03:00
parent 1e020a7b5f
commit e8a71d5b03
14 changed files with 296 additions and 44 deletions
+179
View File
@@ -0,0 +1,179 @@
#pragma once
/// @file
/// @brief A thread-local fixed-block memory pool plus a std-conforming Allocator wrapper, used to
/// pool the small, short-lived, per-RTP-packet shared_ptr nodes produced by std::allocate_shared
/// (e.g. allocate_shared<jrtplib::RTPPacket> on the capture path and allocate_shared<RtpBuffer::
/// Packet> in the jitter buffer). Those objects are fixed-size and churn at the packet rate, so a
/// pool removes them from the general allocator's hot path.
///
/// Pooling is active by default. Define HL_RTP_POOL=0 at compile time to make hl::PoolAllocator a
/// transparent passthrough to the global allocator (i.e. allocate_shared behaves like make_shared)
/// for A/B benchmarking without touching the call sites.
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <new>
#include <vector>
#ifndef HL_RTP_POOL
# define HL_RTP_POOL 1
#endif
namespace hl
{
#if HL_RTP_POOL
/// @class FixedBlockPool
/// A process-wide, fixed-block pool with a lock-free thread-local fast path. Identical in
/// design to the pcpp Layer pool: uniform 256-byte blocks carved from 64 KB chunks, an
/// intrusive thread-local free list, and a per-block header tag so deallocate() is O(1) and
/// lock-free for any block (and can tell pooled blocks from the global-allocator fallback used
/// for oversized requests) regardless of the freeing thread. Uniform block size makes a block
/// allocated on one thread safe to free on another (it joins the freeing thread's free list).
class FixedBlockPool
{
public:
/// Usable bytes handed back to the caller from a pooled block. Comfortably covers the
/// shared_ptr nodes we pool (control block + RTPPacket / RtpBuffer::Packet, ~90-130 bytes).
static constexpr std::size_t PayloadSize = 240;
static constexpr std::size_t BlocksPerChunk = 256;
static void* allocate(std::size_t size)
{
if (size > PayloadSize)
{
uint8_t* raw = static_cast<uint8_t*>(::operator new(size + HeaderSize));
tagOf(raw) = TagGlobal;
return raw + HeaderSize;
}
void*& head = freeListHead();
if (head == nullptr)
head = registry().refill();
uint8_t* block = static_cast<uint8_t*>(head);
head = nextOf(block);
return block + HeaderSize;
}
static void deallocate(void* ptr) noexcept
{
if (ptr == nullptr)
return;
uint8_t* block = static_cast<uint8_t*>(ptr) - HeaderSize;
if (tagOf(block) == TagPool)
{
void*& head = freeListHead();
nextOf(block) = head;
head = block;
}
else
{
::operator delete(static_cast<void*>(block));
}
}
private:
static constexpr std::size_t HeaderSize =
alignof(std::max_align_t) >= sizeof(uint64_t) ? alignof(std::max_align_t) : sizeof(uint64_t);
static constexpr std::size_t BlockSize = HeaderSize + PayloadSize;
static constexpr uint64_t TagPool = 0x504F4F4C52545008ULL; // "POOLRTP\b"
static constexpr uint64_t TagGlobal = 0x474C4F42524C0808ULL; // "GLOBRL\b\b"
static uint64_t& tagOf(void* block) noexcept
{
return *reinterpret_cast<uint64_t*>(block);
}
static void*& nextOf(void* block) noexcept
{
return *reinterpret_cast<void**>(static_cast<uint8_t*>(block) + HeaderSize);
}
static void*& freeListHead() noexcept
{
static thread_local void* head = nullptr;
return head;
}
class ChunkRegistry
{
public:
~ChunkRegistry()
{
std::lock_guard<std::mutex> lock(m_Mutex);
for (uint8_t* chunk : m_Chunks)
::operator delete(chunk);
m_Chunks.clear();
}
void* refill()
{
const std::size_t chunkBytes = BlockSize * BlocksPerChunk;
uint8_t* chunk = static_cast<uint8_t*>(::operator new(chunkBytes));
{
std::lock_guard<std::mutex> lock(m_Mutex);
m_Chunks.push_back(chunk);
}
void* list = nullptr;
for (std::size_t i = 0; i < BlocksPerChunk; ++i)
{
uint8_t* block = chunk + i * BlockSize;
tagOf(block) = TagPool;
nextOf(block) = list;
list = block;
}
return list;
}
private:
std::mutex m_Mutex;
std::vector<uint8_t*> m_Chunks;
};
static ChunkRegistry& registry()
{
static ChunkRegistry instance;
return instance;
}
};
#endif // HL_RTP_POOL
/// @class PoolAllocator
/// A stateless, std-conforming Allocator suitable for std::allocate_shared. When HL_RTP_POOL is
/// enabled it serves single-node allocations from FixedBlockPool; otherwise (and for any request
/// that does not fit a pooled block) it delegates to the global allocator, matching make_shared.
template <class T> struct PoolAllocator
{
using value_type = T;
PoolAllocator() noexcept = default;
template <class U> PoolAllocator(const PoolAllocator<U>&) noexcept {}
T* allocate(std::size_t n)
{
#if HL_RTP_POOL
return static_cast<T*>(FixedBlockPool::allocate(n * sizeof(T)));
#else
return static_cast<T*>(::operator new(n * sizeof(T)));
#endif
}
void deallocate(T* p, std::size_t /*n*/) noexcept
{
#if HL_RTP_POOL
FixedBlockPool::deallocate(p);
#else
::operator delete(static_cast<void*>(p));
#endif
}
template <class U> bool operator==(const PoolAllocator<U>&) const noexcept { return true; }
template <class U> bool operator!=(const PoolAllocator<U>&) const noexcept { return false; }
};
} // namespace hl