Skip to content

Commit 6960aba

Browse files
committed
fix: add Boyer–Moore string-search algorithm instead of SSE2
1 parent 023776e commit 6960aba

File tree

3 files changed

+158
-170
lines changed

3 files changed

+158
-170
lines changed

cmake/platform/linux.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ set(PLATFORM_COMPILE_OPTIONS
88
-Wall
99
-Wno-array-bounds -Wno-attributes
1010

11-
-mtune=generic -mmmx -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2
1211
-fvisibility=default -fPIC
1312
)
1413

include/dynlibutils/module.hpp

Lines changed: 44 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111

1212
#include "memaddr.hpp"
1313

14-
#include <emmintrin.h>
15-
1614
#include <array>
1715
#include <cassert>
1816
#include <cmath>
17+
#include <limits>
18+
#include <tuple>
1919
#include <string>
2020
#include <string_view>
2121
#include <type_traits>
@@ -53,24 +53,14 @@ struct Section_t : public CMemory // Start address of the section.
5353
std::string m_svSectionName; // Name of the section.
5454
}; // struct Section_t
5555

56-
static constexpr std::size_t s_nDefaultPatternSize = 256;
57-
static constexpr std::size_t s_nMaxSimdBlocks = 1 << 6; // 64 blocks = 1024 bytes per chunk.
56+
static constexpr std::size_t s_nDefaultPatternSize = std::numeric_limits<std::uint8_t>::max() + 1;
5857

59-
template<std::size_t SIZE = 0l>
58+
template<std::size_t SIZE>
6059
struct Pattern_t
6160
{
62-
static constexpr std::size_t sm_nMaxSize = SIZE;
63-
64-
// Constructors.
65-
constexpr Pattern_t(const Pattern_t<SIZE>& copyFrom) noexcept : m_nSize(copyFrom.m_nSize), m_aBytes(copyFrom.m_aBytes), m_aMask(copyFrom.m_aMask) {}
66-
constexpr Pattern_t(Pattern_t<SIZE>&& moveFrom) noexcept : m_nSize(std::move(moveFrom.m_nSize)), m_aBytes(std::move(moveFrom.m_aBytes)), m_aMask(std::move(moveFrom.m_aMask)) {}
67-
constexpr Pattern_t(std::size_t size = 0, const std::array<uint8_t, SIZE>& bytes = {}, const std::array<char, SIZE>& mask = {}) noexcept : m_nSize(size), m_aBytes(bytes), m_aMask(mask) {} // Default one.
68-
constexpr Pattern_t(std::size_t &&size, std::array<uint8_t, SIZE>&& bytes, const std::array<char, SIZE>&& mask) noexcept : m_nSize(std::move(size)), m_aBytes(std::move(bytes)), m_aMask(std::move(mask)) {}
69-
70-
// Fields. Available to anyone (so structure).
71-
std::size_t m_nSize;
72-
std::array<std::uint8_t, SIZE> m_aBytes;
73-
std::array<char, SIZE> m_aMask;
61+
std::array<std::uint8_t, SIZE == 0 ? 1 : SIZE> m_aBytes{};
62+
std::array<char, SIZE == 0 ? 1 : SIZE> m_aMask{};
63+
std::size_t m_nSize = SIZE;
7464
}; // struct Pattern_t
7565

7666
// Concept for pattern callback.
@@ -337,7 +327,7 @@ class CAssemblyModule : public CMemory
337327

338328
public:
339329
constexpr CSignatureView() : m_pModule(nullptr) {}
340-
constexpr CSignatureView(CSignatureView&& moveFrom) : Base_t(std::move(moveFrom)), m_pModule(std::move(moveFrom.m_pModule)) {}
330+
constexpr CSignatureView(CSignatureView&& other) : Base_t(other), m_pModule(std::exchange(other.m_pModule, DYNLIB_INVALID_MEMORY)) {}
341331
constexpr CSignatureView(const Base_t& pattern, CAssemblyModule* module) : Base_t(pattern), m_pModule(module) {}
342332
constexpr CSignatureView(Base_t&& pattern, CAssemblyModule* module) : Base_t(std::move(pattern)), m_pModule(module) {}
343333

@@ -351,26 +341,27 @@ class CAssemblyModule : public CMemory
351341

352342
[[nodiscard]] CMemory Find(const CMemory pStart, const Section_t* pSection = nullptr) const
353343
{
354-
return m_pModule->FindPattern<SIZE>(CMemory(Base_t::m_aBytes.data()), std::string_view(Base_t::m_aMask.data(), Base_t::m_nSize), pStart, pSection);
344+
return m_pModule->FindPattern<SIZE>(*this, pStart, pSection);
355345
}
356346
[[nodiscard]] CMemory OffsetAndFind(const std::ptrdiff_t offset, CMemory pStart, const Section_t* pSection = nullptr) const { return Find(pStart + offset, pSection); }
357347
[[nodiscard]] CMemory OffsetFromSelfAndFind(const CMemory pStart, const Section_t* pSection = nullptr) const { return OffsetAndFind(Base_t::m_nSize, pStart, pSection); }
358348
[[nodiscard]] CMemory DerefAndFind(const std::uintptr_t deref, CMemory pStart, const Section_t* pSection = nullptr) const { return Find(pStart.Deref(deref), pSection); }
359-
}; // class CSignatureView<SIZE>
349+
}; // class CSignatureView<N>
350+
351+
template<std::size_t SIZE>
352+
[[nodiscard]]
353+
inline auto CreateSignature(const Pattern_t<SIZE> &pattern)
354+
{
355+
static_assert(SIZE > 0, "Pattern size must be > 0");
356+
357+
return CSignatureView<SIZE>(pattern, this);
358+
}
360359

361360
private:
361+
[[nodiscard]] CMemory FindSignature(const std::string_view svSignature, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const;
362362
[[nodiscard]] CMemory GetVirtualTable(const std::string_view svTableName, bool bDecorated = false) const;
363363
[[nodiscard]] CMemory GetFunction(const std::string_view svFunctionName) const noexcept;
364-
CMemory GetAddress(const CCache& hKey) const noexcept;
365-
366-
std::string m_sPath;
367-
std::string m_sLastError;
368-
std::vector<Section_t> m_vecSections;
369-
370-
const Section_t *m_pExecutableSection;
371-
372-
mutable std::unordered_map<CCache, CMemory, CHash> m_mapCached;
373-
DYNLIB_NUA mutable Mutex m_mutex;
364+
[[nodiscard]] CMemory GetAddress(const CCache& hKey) const noexcept;
374365

375366
public:
376367
CAssemblyModule() : m_pExecutableSection(nullptr) {}
@@ -389,158 +380,34 @@ class CAssemblyModule : public CMemory
389380
bool InitFromName(const std::string_view svModuleName, bool bExtension = false);
390381
bool InitFromMemory(const CMemory pModuleMemory, bool bForce = true);
391382

392-
template<std::size_t N>
393-
[[nodiscard]]
394-
inline auto CreateSignature(const Pattern_t<N> &copyFrom)
395-
{
396-
static_assert(N > 0, "Pattern size must be > 0");
397-
398-
return CSignatureView<N>(copyFrom, this);
399-
}
400-
401-
template<std::size_t N>
402-
[[nodiscard]]
403-
inline auto CreateSignature(Pattern_t<N> &&moveFrom)
404-
{
405-
static_assert(N > 0, "Pattern size must be > 0");
406-
407-
return CSignatureView<N>(std::move(moveFrom), this);
408-
}
409-
410-
//-----------------------------------------------------------------------------
411-
// Purpose: Finds an array of bytes in process memory using SIMD instructions
412-
// Input : *pPattern
413-
// svMask
414-
// pStartAddress
415-
// *pModuleSection
416-
// Output : CMemory
417-
//-----------------------------------------------------------------------------
418-
template<std::size_t SIZE = (s_nDefaultPatternSize - 1) / 2>
419-
inline CMemory FindPattern(const CMemoryView<std::uint8_t> pPatternMem, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const
420-
{
421-
const auto* pPattern = pPatternMem.RCastView();
422-
423-
CCache sKey(pPattern, svMask.size(), pStartAddress, pModuleSection);
424-
if (auto pAddr = GetAddress(sKey))
425-
{
426-
return pAddr;
427-
}
428-
429-
const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
430-
431-
if (!pSection || !pSection->IsValid())
432-
return DYNLIB_INVALID_MEMORY;
433-
434-
const std::uintptr_t base = pSection->GetAddr();
435-
const std::size_t sectionSize = pSection->m_nSectionSize;
436-
const std::size_t patternSize = svMask.size();
437-
438-
auto* pData = reinterpret_cast<std::uint8_t*>(base);
439-
const auto* pEnd = pData + sectionSize - patternSize;
440-
441-
if (pStartAddress)
442-
{
443-
auto* start = pStartAddress.RCast<std::uint8_t*>();
444-
if (start < pData || start > pEnd)
445-
return DYNLIB_INVALID_MEMORY;
446-
447-
pData = start;
448-
}
449-
450-
#if !DYNLIBUTILS_ARCH_ARM
451-
std::array<int, 64> masks = {};// 64*16 = enough masks for 1024 bytes.
452-
auto numMasks = static_cast<std::uint8_t>(std::ceil(static_cast<float>(patternSize) / 16.f));
453-
454-
for (std::uint8_t i = 0; i < numMasks; ++i)
455-
{
456-
for (std::int8_t j = static_cast<std::int8_t>(std::min<std::size_t>(patternSize - i * 16, 16)) - 1; j >= 0; --j)
457-
{
458-
if (svMask[static_cast<std::size_t>(i * 16 + j)] == 'x')
459-
{
460-
masks[i] |= 1 << j;
461-
}
462-
}
463-
}
464-
465-
const __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pPattern));
466-
__m128i xmm2, xmm3, msks;
467-
for (; pData != pEnd; _mm_prefetch(reinterpret_cast<const char*>(++pData + 64), _MM_HINT_NTA))
468-
{
469-
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pData));
470-
msks = _mm_cmpeq_epi8(xmm1, xmm2);
471-
if ((_mm_movemask_epi8(msks) & masks[0]) == masks[0])
472-
{
473-
bool found = true;
474-
for (uint8_t i = 1; i < numMasks; ++i)
475-
{
476-
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pData + i * 16)));
477-
xmm3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pPattern + i * 16)));
478-
msks = _mm_cmpeq_epi8(xmm2, xmm3);
479-
if ((_mm_movemask_epi8(msks) & masks[i]) != masks[i])
480-
{
481-
found = false;
482-
break;
483-
}
484-
}
485-
486-
if (found)
487-
{
488-
UniqueLock_t lock(m_mutex);
489-
m_mapCached[std::move(sKey)] = pData;
490-
return pData;
491-
}
492-
}
493-
}
494-
#else
495-
for (; pData != pEnd; ++pData)
496-
{
497-
bool found = false;
498-
499-
for (size_t i = 0; i < maskLen; ++i)
500-
{
501-
if (mask[i] == 'x' || pPattern[i] == *(pData + i))
502-
{
503-
found = true;
504-
}
505-
else
506-
{
507-
found = false;
508-
break;
509-
}
510-
}
511-
512-
if (found)
513-
{
514-
UniqueLock_t lock(m_mutex);
515-
m_mapCached[std::move(sKey)] = pData;
516-
return pData;
517-
}
518-
}
519-
#endif // !DYNLIBUTILS_ARCH_ARM
520-
521-
return DYNLIB_INVALID_MEMORY;
522-
}
523-
524-
template<std::size_t SIZE>
525383
[[nodiscard]]
526-
inline CMemory FindPattern(const Pattern_t<SIZE>& copyPattern, const CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
384+
inline CMemory FindPattern(const CMemoryView<char> memory, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const
527385
{
528-
return FindPattern<SIZE>(copyPattern.m_aBytes.data(), std::string_view(copyPattern.m_aMask.data(), copyPattern.m_nSize), pStartAddress, pModuleSection);
386+
return FindSignature(
387+
std::string_view(memory.RCastView(), svMask.size()),
388+
svMask,
389+
pStartAddress,
390+
pModuleSection
391+
);
529392
}
530393

531394
template<std::size_t SIZE>
532395
[[nodiscard]]
533-
inline CMemory FindPattern(Pattern_t<SIZE>&& movePattern, const CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
396+
inline CMemory FindPattern(const Pattern_t<SIZE>& pattern, const CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
534397
{
535-
return FindPattern<SIZE>(std::move(movePattern.m_aBytes).data(), std::string_view(std::move(movePattern.m_aMask).data(), std::move(movePattern.m_nSize)), pStartAddress, pModuleSection);
398+
return FindSignature(
399+
std::string_view(pattern.m_aBytes.data(), pattern.m_nSize),
400+
std::string_view(pattern.m_aMask.data(), pattern.m_nSize),
401+
pStartAddress,
402+
pModuleSection
403+
);
536404
}
537405

538406
template<std::size_t SIZE, PatternCallback_t FUNC>
539407
[[nodiscard]]
540408
std::size_t FindAllPatterns(const CSignatureView<SIZE>& sig, const FUNC& callback, CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
541409
{
542410
const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
543-
544411
if (!pSection || !pSection->IsValid())
545412
return 0;
546413

@@ -586,6 +453,15 @@ class CAssemblyModule : public CMemory
586453

587454
protected:
588455
void SaveLastError();
456+
457+
std::string m_sPath;
458+
std::string m_sLastError;
459+
std::vector<Section_t> m_vecSections;
460+
461+
const Section_t *m_pExecutableSection;
462+
463+
mutable std::unordered_map<CCache, CMemory, CHash> m_mapCached;
464+
DYNLIB_NUA mutable Mutex m_mutex;
589465
}; // class CAssemblyModule
590466

591467
using CModule = CAssemblyModule<CNullMutex>;

0 commit comments

Comments
 (0)