#pragma once /// @file /// @brief A thread-local fixed-block memory pool plus a std-conforming Allocator wrapper, used to /// pool the small, short-lived, per-RTP-packet shared_ptr nodes produced by std::allocate_shared /// (e.g. allocate_shared on the capture path and allocate_shared in the jitter buffer). Those objects are fixed-size and churn at the packet rate, so a /// pool removes them from the general allocator's hot path. /// /// Pooling is active by default. Define HL_RTP_POOL=0 at compile time to make hl::PoolAllocator a /// transparent passthrough to the global allocator (i.e. allocate_shared behaves like make_shared) /// for A/B benchmarking without touching the call sites. #include #include #include #include #include #ifndef HL_RTP_POOL # define HL_RTP_POOL 1 #endif namespace hl { #if HL_RTP_POOL /// @class FixedBlockPool /// A process-wide, fixed-block pool with a lock-free thread-local fast path. Identical in /// design to the pcpp Layer pool: uniform 256-byte blocks carved from 64 KB chunks, an /// intrusive thread-local free list, and a per-block header tag so deallocate() is O(1) and /// lock-free for any block (and can tell pooled blocks from the global-allocator fallback used /// for oversized requests) regardless of the freeing thread. Uniform block size makes a block /// allocated on one thread safe to free on another (it joins the freeing thread's free list). class FixedBlockPool { public: /// Usable bytes handed back to the caller from a pooled block. Comfortably covers the /// shared_ptr nodes we pool (control block + RTPPacket / RtpBuffer::Packet, ~90-130 bytes). static constexpr std::size_t PayloadSize = 240; static constexpr std::size_t BlocksPerChunk = 256; static void* allocate(std::size_t size) { if (size > PayloadSize) { uint8_t* raw = static_cast(::operator new(size + HeaderSize)); tagOf(raw) = TagGlobal; return raw + HeaderSize; } void*& head = freeListHead(); if (head == nullptr) head = registry().refill(); uint8_t* block = static_cast(head); head = nextOf(block); return block + HeaderSize; } static void deallocate(void* ptr) noexcept { if (ptr == nullptr) return; uint8_t* block = static_cast(ptr) - HeaderSize; if (tagOf(block) == TagPool) { void*& head = freeListHead(); nextOf(block) = head; head = block; } else { ::operator delete(static_cast(block)); } } private: static constexpr std::size_t HeaderSize = alignof(std::max_align_t) >= sizeof(uint64_t) ? alignof(std::max_align_t) : sizeof(uint64_t); static constexpr std::size_t BlockSize = HeaderSize + PayloadSize; static constexpr uint64_t TagPool = 0x504F4F4C52545008ULL; // "POOLRTP\b" static constexpr uint64_t TagGlobal = 0x474C4F42524C0808ULL; // "GLOBRL\b\b" static uint64_t& tagOf(void* block) noexcept { return *reinterpret_cast(block); } static void*& nextOf(void* block) noexcept { return *reinterpret_cast(static_cast(block) + HeaderSize); } static void*& freeListHead() noexcept { static thread_local void* head = nullptr; return head; } class ChunkRegistry { public: ~ChunkRegistry() { std::lock_guard lock(m_Mutex); for (uint8_t* chunk : m_Chunks) ::operator delete(chunk); m_Chunks.clear(); } void* refill() { const std::size_t chunkBytes = BlockSize * BlocksPerChunk; uint8_t* chunk = static_cast(::operator new(chunkBytes)); { std::lock_guard lock(m_Mutex); m_Chunks.push_back(chunk); } void* list = nullptr; for (std::size_t i = 0; i < BlocksPerChunk; ++i) { uint8_t* block = chunk + i * BlockSize; tagOf(block) = TagPool; nextOf(block) = list; list = block; } return list; } private: std::mutex m_Mutex; std::vector m_Chunks; }; static ChunkRegistry& registry() { static ChunkRegistry instance; return instance; } }; #endif // HL_RTP_POOL /// @class PoolAllocator /// A stateless, std-conforming Allocator suitable for std::allocate_shared. When HL_RTP_POOL is /// enabled it serves single-node allocations from FixedBlockPool; otherwise (and for any request /// that does not fit a pooled block) it delegates to the global allocator, matching make_shared. template struct PoolAllocator { using value_type = T; PoolAllocator() noexcept = default; template PoolAllocator(const PoolAllocator&) noexcept {} T* allocate(std::size_t n) { #if HL_RTP_POOL return static_cast(FixedBlockPool::allocate(n * sizeof(T))); #else return static_cast(::operator new(n * sizeof(T))); #endif } void deallocate(T* p, std::size_t /*n*/) noexcept { #if HL_RTP_POOL FixedBlockPool::deallocate(p); #else ::operator delete(static_cast(p)); #endif } template bool operator==(const PoolAllocator&) const noexcept { return true; } template bool operator!=(const PoolAllocator&) const noexcept { return false; } }; } // namespace hl