Skip to content

Commit

Permalink
improve
Browse files Browse the repository at this point in the history
  • Loading branch information
beats-dh committed Aug 18, 2024
1 parent aad48d6 commit e367e87
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 83 deletions.
12 changes: 6 additions & 6 deletions src/creatures/combat/condition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,12 +443,12 @@ void ConditionAttributes::addCondition(std::shared_ptr<Creature> creature, const
endCondition(creature);

// Apply the new one
memcpy(skills, conditionAttrs->skills, sizeof(skills));
memcpy(skillsPercent, conditionAttrs->skillsPercent, sizeof(skillsPercent));
memcpy(stats, conditionAttrs->stats, sizeof(stats));
memcpy(statsPercent, conditionAttrs->statsPercent, sizeof(statsPercent));
memcpy(buffs, conditionAttrs->buffs, sizeof(buffs));
memcpy(buffsPercent, conditionAttrs->buffsPercent, sizeof(buffsPercent));
simd_memcpy(skills, conditionAttrs->skills, std::size(skills));
simd_memcpy(skillsPercent, conditionAttrs->skillsPercent, std::size(skillsPercent));
simd_memcpy(stats, conditionAttrs->stats, std::size(stats));
simd_memcpy(statsPercent, conditionAttrs->statsPercent, std::size(statsPercent));
simd_memcpy(buffs, conditionAttrs->buffs, std::size(buffs));
simd_memcpy(buffsPercent, conditionAttrs->buffsPercent, std::size(buffsPercent));

// Using std::array can only increment to the new instead of use memcpy
absorbs = conditionAttrs->absorbs;
Expand Down
48 changes: 48 additions & 0 deletions src/creatures/combat/condition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,54 @@ class Condition : public SharedObject {
bool isPersistent() const;
bool isRemovableOnDeath() const;

template <typename T>
static void simd_memcpy(T* dest, const T* src, size_t count) {
size_t remaining = count * sizeof(T);
auto* dst = reinterpret_cast<unsigned char*>(dest);
const auto* source = reinterpret_cast<const unsigned char*>(src);

#if defined(__AVX2__)
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(source)));
dst += 32;
source += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(source)));
dst += 16;
source += 16;
remaining -= 16;
}
while (remaining >= 8) {
*reinterpret_cast<uint64_t*>(dst) = *reinterpret_cast<const uint64_t*>(source);
dst += 8;
source += 8;
remaining -= 8;
}
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(source);
dst += 4;
source += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(source);
dst += 2;
source += 2;
remaining -= 2;
}
if (remaining == 1) {
*dst = *source;
}
#else
std::memcpy(dest, src, count * sizeof(T));
#endif
}

protected:
uint8_t drainBodyStage = 0;
int64_t endTime {};
Expand Down
20 changes: 10 additions & 10 deletions src/game/scheduling/dispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void Dispatcher::executeScheduledEvents() {

if (task->execute() && task->isCycle()) {
task->updateTime();
threadScheduledTasks.emplace_back(task);
threadScheduledTasks.emplace(task);
} else {
scheduledTasksRef.erase(task->getId());
}
Expand Down Expand Up @@ -174,14 +174,16 @@ void Dispatcher::mergeEvents() {
constexpr uint8_t serial = static_cast<uint8_t>(TaskGroup::Serial);

for (const auto &thread : threads) {
std::scoped_lock lock(thread->mutex);
if (!thread->tasks[serial].empty()) {
m_tasks[serial].insert(m_tasks[serial].end(), make_move_iterator(thread->tasks[serial].begin()), make_move_iterator(thread->tasks[serial].end()));
thread->tasks[serial].clear();
{
std::scoped_lock lock(thread->mutex);
if (!thread->tasks[serial].empty()) {
m_tasks[serial].insert(m_tasks[serial].end(), make_move_iterator(thread->tasks[serial].begin()), make_move_iterator(thread->tasks[serial].end()));
thread->tasks[serial].clear();
}
}

if (!thread->scheduledTasks.empty()) {
scheduledTasks.insert(make_move_iterator(thread->scheduledTasks.begin()), make_move_iterator(thread->scheduledTasks.end()));
scheduledTasks.insert(thread->scheduledTasks.begin(), thread->scheduledTasks.end());
thread->scheduledTasks.clear();
}
}
Expand Down Expand Up @@ -211,11 +213,9 @@ void Dispatcher::addEvent(std::function<void(void)> &&f, std::string_view contex

uint64_t Dispatcher::scheduleEvent(const std::shared_ptr<Task> &task) {
const auto &thread = getThreadTask();
std::scoped_lock lock(thread->mutex);

auto eventId = scheduledTasksRef
.emplace(task->getId(), thread->scheduledTasks.emplace_back(task))
.first->first;
thread->scheduledTasks.emplace(task);

Check warning on line 217 in src/game/scheduling/dispatcher.cpp

View workflow job for this annotation

GitHub Actions / Qodana for C/C++

misra-cpp2008-0-1-7

MISRA 0-1-7: The value returned by a function having non-void return type that is not an overloaded operator shall always be used
auto eventId = scheduledTasksRef.emplace(task->getId(), task).first->first;

notify();
return eventId;
Expand Down
13 changes: 6 additions & 7 deletions src/game/scheduling/dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ class Dispatcher {
for (uint_fast16_t i = 0; i < threads.capacity(); ++i) {
threads.emplace_back(std::make_unique<ThreadTask>());
}
};

// Ensures that we don't accidentally copy it
scheduledTasksRef.reserve(2000);
}

Dispatcher(const Dispatcher &) = delete;
Dispatcher operator=(const Dispatcher &) = delete;

Expand Down Expand Up @@ -195,9 +196,8 @@ class Dispatcher {
ThreadPool &threadPool;
std::condition_variable signalSchedule;
std::atomic_bool hasPendingTasks = false;
std::mutex dummyMutex; // This is only used for signaling the condition variable and not as an actual lock.
std::mutex dummyMutex;

// Thread Events
struct ThreadTask {
ThreadTask() {
for (auto &task : tasks) {
Expand All @@ -207,12 +207,11 @@ class Dispatcher {
}

std::array<std::vector<Task>, static_cast<uint8_t>(TaskGroup::Last)> tasks;
std::vector<std::shared_ptr<Task>> scheduledTasks;
phmap::parallel_flat_hash_set_m<std::shared_ptr<Task>> scheduledTasks;
std::mutex mutex;
};
std::vector<std::unique_ptr<ThreadTask>> threads;

// Main Events
std::vector<std::unique_ptr<ThreadTask>> threads;
std::array<std::vector<Task>, static_cast<uint8_t>(TaskGroup::Last)> m_tasks;
phmap::btree_multiset<std::shared_ptr<Task>, Task::Compare> scheduledTasks;
phmap::parallel_flat_hash_map_m<uint64_t, std::shared_ptr<Task>> scheduledTasksRef;
Expand Down
57 changes: 28 additions & 29 deletions src/game/scheduling/task.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,49 +69,48 @@ class Task {
}

bool hasTraceableContext() const {
const static auto tasksContext = std::unordered_set<std::string_view>({ "Decay::checkDecay",
"Dispatcher::asyncEvent",
"Game::checkCreatureAttack",
"Game::checkCreatureWalk",
"Game::checkCreatures",
"Game::checkImbuements",
"Game::checkLight",
"Game::createFiendishMonsters",
"Game::createInfluencedMonsters",
"Game::updateCreatureWalk",
"Game::updateForgeableMonsters",
"GlobalEvents::think",
"LuaEnvironment::executeTimerEvent",
"Modules::executeOnRecvbyte",
"OutputMessagePool::sendAll",
"ProtocolGame::addGameTask",
"ProtocolGame::parsePacketFromDispatcher",
"Raids::checkRaids",
"SpawnMonster::checkSpawnMonster",
"SpawnMonster::scheduleSpawn",
"SpawnMonster::startup",
"SpawnNpc::checkSpawnNpc",
"Webhook::run",
"Protocol::sendRecvMessageCallback" });
const static std::unordered_set<std::string_view> tasksContext = {
"Decay::checkDecay",
"Dispatcher::asyncEvent",
"Game::checkCreatureAttack",
"Game::checkCreatureWalk",
"Game::checkCreatures",
"Game::checkImbuements",
"Game::checkLight",
"Game::createFiendishMonsters",
"Game::createInfluencedMonsters",
"Game::updateCreatureWalk",
"Game::updateForgeableMonsters",
"GlobalEvents::think",
"LuaEnvironment::executeTimerEvent",
"Modules::executeOnRecvbyte",
"OutputMessagePool::sendAll",
"ProtocolGame::addGameTask",
"ProtocolGame::parsePacketFromDispatcher",
"Raids::checkRaids",
"SpawnMonster::checkSpawnMonster",
"SpawnMonster::scheduleSpawn",
"SpawnMonster::startup",
"SpawnNpc::checkSpawnNpc",
"Webhook::run",
"Protocol::sendRecvMessageCallback"
};

return tasksContext.contains(context);
}

struct Compare {
bool operator()(const std::shared_ptr<Task> &a, const std::shared_ptr<Task> &b) const {
return a->utime < b->utime;
return a->getTime() < b->getTime();
}
};

std::function<void(void)> func = nullptr;
std::function<void(void)> func;
std::string context;

int64_t utime = 0;
int64_t expiration = 0;

uint64_t id = 0;
uint32_t delay = 0;

bool cycle = false;
bool log = true;

Expand Down
3 changes: 0 additions & 3 deletions src/io/fileloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ namespace OTB {
Identifier fileIdentifier;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez (caso aplicável)
_mm256_storeu_si256(reinterpret_cast<__m256i*>(fileIdentifier.data()), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(fileContents.data())));

Check warning on line 27 in src/io/fileloader.cpp

View workflow job for this annotation

GitHub Actions / Qodana for C/C++

pro-type-reinterpret-cast

do not use reinterpret_cast

Check warning on line 27 in src/io/fileloader.cpp

View workflow job for this annotation

GitHub Actions / Qodana for C/C++

pro-type-reinterpret-cast

do not use reinterpret_cast
#elif defined(__SSE2__)
// Use SSE2 para copiar 16 bytes de cada vez
_mm_storeu_si128(reinterpret_cast<__m128i*>(fileIdentifier.data()), _mm_loadu_si128(reinterpret_cast<const __m128i*>(fileContents.data())));
#else
// Fallback para std::copy se nem AVX2 nem SSE2 estiverem disponíveis
std::copy(fileContents.begin(), fileContents.begin() + fileIdentifier.size(), fileIdentifier.begin());
#endif

Expand Down
3 changes: 0 additions & 3 deletions src/io/fileloader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ class PropStream {
remaining -= 1;
}
#else
// Fallback para memcpy se nem AVX2 nem SSE2 estiverem disponíveis
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -180,7 +179,6 @@ class PropStream {
remaining -= 1;
}
#else
// Fallback para memcpy se nem AVX2 nem SSE2 estiverem disponíveis
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -271,7 +269,6 @@ class PropWriteStream {
*dst = *addr;
}
#else
// Fallback para std::copy se nem AVX2 nem SSE2 estiverem disponíveis
std::copy(addr, addr + remaining, std::back_inserter(buffer));
#endif
}
Expand Down
6 changes: 6 additions & 0 deletions src/security/rsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,13 @@ uint16_t RSA::decodeLength(char*&pos) const {
default:
break;
}

#if defined(__SSE2__)
__m128i temp = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buffer));

Check warning on line 186 in src/security/rsa.cpp

View workflow job for this annotation

GitHub Actions / Qodana for C/C++

pro-type-reinterpret-cast

do not use reinterpret_cast
length = _mm_cvtsi128_si32(temp);

Check warning on line 187 in src/security/rsa.cpp

View workflow job for this annotation

GitHub Actions / Qodana for C/C++

misra-cpp2008-5-0-6

MISRA 5-0-6: An implicit integral or floating-point conversion shall not reduce the size of the underlying type
#else
std::memcpy(&length, buffer, sizeof(length));
#endif
}
return length;
}
Expand Down
3 changes: 0 additions & 3 deletions src/server/network/message/networkmessage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ void NetworkMessage::addString(const std::string &value, const std::string &func
remaining -= 1;
}
#else
// Fallback to original method using memcpy if neither AVX2 nor SSE2 are available
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -173,7 +172,6 @@ void NetworkMessage::addBytes(const char* bytes, size_t size) {
remaining -= 1;
}
#else
// Fallback to original method using memcpy if neither AVX2 nor SSE2 are available
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -231,7 +229,6 @@ void NetworkMessage::addPaddingBytes(size_t n) {
remaining -= 1;
}
#else
// Fallback para o método original usando memset se nem AVX2 nem SSE2 estiverem disponíveis
memset(dst, 0x33, remaining);
#endif

Expand Down
4 changes: 0 additions & 4 deletions src/server/network/message/networkmessage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ class NetworkMessage {
remaining -= 1;
}
#else
// Fallback para o método original usando memcpy se nem AVX2 nem SSE2 estiverem disponíveis
memcpy(&v, buffer + info.position, sizeof(T));
spdlog::info("[{}]: AVX2/SSE2 not available, used memcpy to load {} bytes.", __FUNCTION__, sizeof(T));
#endif

info.position += sizeof(T);
Expand Down Expand Up @@ -178,9 +176,7 @@ class NetworkMessage {
remaining -= 1;
}
#else
// Fallback to original method using memcpy if neither AVX2 nor SSE2 are available
memcpy(dst, src, remaining);
spdlog::info("[{}]: AVX2/SSE2 not available, used memcpy to copy {} bytes.", __FUNCTION__, remaining);
#endif

info.position += sizeof(T);
Expand Down
3 changes: 0 additions & 3 deletions src/server/network/message/outputmessage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ class OutputMessage : public NetworkMessage {
remaining -= 1;
}
#else
// Fallback to original method using memcpy if neither AVX2 nor SSE2 are available
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -123,7 +122,6 @@ class OutputMessage : public NetworkMessage {
remaining -= 1;
}
#else
// Fallback to original method using memcpy if neither AVX2 nor SSE2 are available
memcpy(dst, src, remaining);
#endif

Expand Down Expand Up @@ -160,7 +158,6 @@ class OutputMessage : public NetworkMessage {
*reinterpret_cast<uint8_t*>(buffer + outputBufferStart) = *reinterpret_cast<const uint8_t*>(&addHeader);
}
#else
// Fallback to original method using memcpy if SSE2 is not available
memcpy(buffer + outputBufferStart, &addHeader, sizeof(T));
#endif

Expand Down
Loading

0 comments on commit e367e87

Please sign in to comment.