Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
beats-dh committed Aug 18, 2024
1 parent 7583913 commit aad48d6
Show file tree
Hide file tree
Showing 8 changed files with 682 additions and 20 deletions.
16 changes: 15 additions & 1 deletion src/creatures/creature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,14 @@ void Creature::onCreatureMove(const std::shared_ptr<Creature> &creature, const s
if (oldPos.y > newPos.y) { // north
// shift y south
for (int32_t y = mapWalkHeight - 1; --y >= 0;) {
// Copiando linhas usando SIMD
#if defined(__AVX2__)
_mm256_storeu_si256(reinterpret_cast<__m256i*>(localMapCache[y + 1]), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(localMapCache[y])));
#elif defined(__SSE2__)
_mm_storeu_si128(reinterpret_cast<__m128i*>(localMapCache[y + 1]), _mm_loadu_si128(reinterpret_cast<const __m128i*>(localMapCache[y])));
#else
memcpy(localMapCache[y + 1], localMapCache[y], sizeof(localMapCache[y]));
#endif
}

// update 0
Expand All @@ -521,7 +528,14 @@ void Creature::onCreatureMove(const std::shared_ptr<Creature> &creature, const s
} else if (oldPos.y < newPos.y) { // south
// shift y north
for (int32_t y = 0; y <= mapWalkHeight - 2; ++y) {
// Copiando linhas usando SIMD
#if defined(__AVX2__)
_mm256_storeu_si256(reinterpret_cast<__m256i*>(localMapCache[y]), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(localMapCache[y + 1])));
#elif defined(__SSE2__)
_mm_storeu_si128(reinterpret_cast<__m128i*>(localMapCache[y]), _mm_loadu_si128(reinterpret_cast<const __m128i*>(localMapCache[y + 1])));
#else
memcpy(localMapCache[y], localMapCache[y + 1], sizeof(localMapCache[y]));
#endif
}

// update mapWalkHeight - 1
Expand Down Expand Up @@ -614,7 +628,7 @@ void Creature::onCreatureMove(const std::shared_ptr<Creature> &creature, const s
onCreatureDisappear(attackedCreature, false);
} else {
if (hasExtraSwing()) {
// our target is moving lets see if we can get in hit
// our target is moving, let's see if we can get a hit
g_dispatcher().addEvent([creatureId = getID()] { g_game().checkCreatureAttack(creatureId); }, "Game::checkCreatureAttack");
}

Expand Down
11 changes: 11 additions & 0 deletions src/io/fileloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@ namespace OTB {
}

Identifier fileIdentifier;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez (caso aplicável)
_mm256_storeu_si256(reinterpret_cast<__m256i*>(fileIdentifier.data()), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(fileContents.data())));
#elif defined(__SSE2__)
// Use SSE2 para copiar 16 bytes de cada vez
_mm_storeu_si128(reinterpret_cast<__m128i*>(fileIdentifier.data()), _mm_loadu_si128(reinterpret_cast<const __m128i*>(fileContents.data())));
#else
// Fallback para std::copy se nem AVX2 nem SSE2 estiverem disponíveis
std::copy(fileContents.begin(), fileContents.begin() + fileIdentifier.size(), fileIdentifier.begin());
#endif

if (fileIdentifier != acceptedIdentifier && fileIdentifier != wildcard) {
throw InvalidOTBFormat {};
}
Expand Down
204 changes: 199 additions & 5 deletions src/io/fileloader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,55 @@ class PropStream {
return false;
}

memcpy(&ret, p, sizeof(T));
const char* src = p;
char* dst = reinterpret_cast<char*>(&ret);
size_t remaining = sizeof(T);

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)));
src += 32;
dst += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar os bytes restantes
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
src += 16;
dst += 16;
remaining -= 16;
}
while (remaining >= 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)));
src += 8;
dst += 8;
remaining -= 8;
}
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(src);
src += 4;
dst += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(src);
src += 2;
dst += 2;
remaining -= 2;
}
while (remaining == 1) {
*dst = *src;
remaining -= 1;
}
#else
// Fallback para memcpy se nem AVX2 nem SSE2 estiverem disponíveis
memcpy(dst, src, remaining);
#endif

p += sizeof(T);
return true;
}
Expand All @@ -87,8 +135,56 @@ class PropStream {
}

char* str = new char[strLen + 1];
memcpy(str, p, strLen);
str[strLen] = 0;
const char* src = p;
char* dst = str;
size_t remaining = strLen;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)));
src += 32;
dst += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar os bytes restantes
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
src += 16;
dst += 16;
remaining -= 16;
}
while (remaining >= 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)));
src += 8;
dst += 8;
remaining -= 8;
}
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(src);
src += 4;
dst += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(src);
src += 2;
dst += 2;
remaining -= 2;
}
while (remaining == 1) {
*dst = *src;
remaining -= 1;
}
#else
// Fallback para memcpy se nem AVX2 nem SSE2 estiverem disponíveis
memcpy(dst, src, remaining);
#endif

str[strLen] = 0; // Null-terminate the string
ret.assign(str, strLen);
delete[] str;
p += strLen;
Expand Down Expand Up @@ -129,7 +225,55 @@ class PropWriteStream {
template <typename T>
void write(T add) {
char* addr = reinterpret_cast<char*>(&add);
std::copy(addr, addr + sizeof(T), std::back_inserter(buffer));
size_t remaining = sizeof(T);
size_t pos = buffer.size();
buffer.resize(pos + remaining);

char* dst = buffer.data() + pos;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(addr)));
addr += 32;
dst += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar os bytes restantes
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(addr)));
addr += 16;
dst += 16;
remaining -= 16;
}
while (remaining >= 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(addr)));
addr += 8;
dst += 8;
remaining -= 8;
}
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(addr);
addr += 4;
dst += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(addr);
addr += 2;
dst += 2;
remaining -= 2;
}
if (remaining == 1) {
*dst = *addr;
}
#else
// Fallback para std::copy se nem AVX2 nem SSE2 estiverem disponíveis
std::copy(addr, addr + remaining, std::back_inserter(buffer));
#endif
}

void writeString(const std::string &str) {
Expand All @@ -140,7 +284,57 @@ class PropWriteStream {
}

write(static_cast<uint16_t>(strLength));
std::copy(str.begin(), str.end(), std::back_inserter(buffer));

const char* src = str.data();
size_t remaining = strLength;
size_t pos = buffer.size();
buffer.resize(pos + remaining);

char* dst = buffer.data() + pos;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)));
src += 32;
dst += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar os bytes restantes
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
src += 16;
dst += 16;
remaining -= 16;
}
while (remaining >= 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)));
src += 8;
dst += 8;
remaining -= 8;
}
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(src);
src += 4;
dst += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(src);
src += 2;
dst += 2;
remaining -= 2;
}
if (remaining == 1) {
*dst = *src;
}
#else
// Fallback para std::copy se nem AVX2 nem SSE2 estiverem disponíveis
std::copy(src, src + remaining, std::back_inserter(buffer));
#endif
}

private:
Expand Down
83 changes: 81 additions & 2 deletions src/io/filestream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,97 @@ bool FileStream::read(T &ret, bool escape) {
throw std::ios_base::failure("Read failed");
}

std::array<uint8_t, sizeof(T)> array;
if (escape) {
for (int_fast8_t i = -1; ++i < size;) {
std::array<uint8_t, sizeof(T)> array;
size_t i = 0;
while (i < size) {
if (m_data[m_pos] == OTB::Node::ESCAPE) {
++m_pos;
}
array[i] = m_data[m_pos];
++m_pos;
++i;
}
uint8_t* dst = reinterpret_cast<uint8_t*>(&ret);

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
for (i = 0; i + 32 <= size; i += 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst + i), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(array.data() + i)));
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar 16 bytes de cada vez
for (i = 0; i + 16 <= size; i += 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst + i), _mm_loadu_si128(reinterpret_cast<const __m128i*>(array.data() + i)));
}

for (; i + 8 <= size; i += 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst + i), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(array.data() + i)));
}
for (; i + 4 <= size; i += 4) {
*reinterpret_cast<uint32_t*>(dst + i) = *reinterpret_cast<const uint32_t*>(array.data() + i);
}
for (; i + 2 <= size; i += 2) {
*reinterpret_cast<uint16_t*>(dst + i) = *reinterpret_cast<const uint16_t*>(array.data() + i);
}
if (i < size) {
dst[i] = array[i];
}
#elif
memcpy(&ret, array.data(), size);
#endif
} else {
uint8_t* dst = reinterpret_cast<uint8_t*>(&ret);
const uint8_t* src = &m_data[m_pos];
size_t remaining = size;

#if defined(__AVX2__)
// Use AVX2 para copiar 32 bytes de cada vez
while (remaining >= 32) {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst), _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src)));
dst += 32;
src += 32;
remaining -= 32;
}
#endif

#if defined(__SSE2__)
// Use SSE2 para copiar os bytes restantes
while (remaining >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
dst += 16;
src += 16;
remaining -= 16;
}
while (remaining >= 8) {
_mm_storel_epi64(reinterpret_cast<__m128i*>(dst), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)));
dst += 8;
src += 8;
remaining -= 8;
}

// Copie os bytes restantes usando operações padrão
while (remaining >= 4) {
*reinterpret_cast<uint32_t*>(dst) = *reinterpret_cast<const uint32_t*>(src);
dst += 4;
src += 4;
remaining -= 4;
}
while (remaining >= 2) {
*reinterpret_cast<uint16_t*>(dst) = *reinterpret_cast<const uint16_t*>(src);
dst += 2;
src += 2;
remaining -= 2;
}
if (remaining == 1) {
*dst = *src;
}
#elif
memcpy(&ret, &m_data[m_pos], size);
#endif

m_pos += size;
}

Expand Down
Loading

0 comments on commit aad48d6

Please sign in to comment.