diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index a1c3367..9c04d27 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -16,13 +16,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) #set(CMAKE_VERBOSE_MAKEFILE ON) -# set(CMAKE_AUTOMOC ON) -# set(CMAKE_AUTORCC ON) -# set(CMAKE_AUTOUIC ON) - -add_custom_target(build-time-make-directory ALL - COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/) - #Find threads library set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) @@ -92,6 +85,9 @@ target_include_directories(PLA-SeedFinder PRIVATE Source/) #enable MP with MSVC (Build with Multiple Processes) if (MSVC) + add_custom_target(build-time-make-directory ALL + COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/) + target_compile_options(PLA-SeedFinder PRIVATE /FAs /FaAssembly/ /MP /W4) target_compile_options(PLA-SeedFinder PRIVATE /wd5054) # Deprecated enum arithemtic target_compile_options(PLA-SeedFinder PRIVATE /wd4505) # unreferenced local function has been removed @@ -106,11 +102,10 @@ if (MSVC) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) else() + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -O2) if (X86) target_compile_options(PLA-SeedFinder PRIVATE -msse4.2) - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) - set(ARCH_FLAGS_09_Nehalem -march=nehalem) set(ARCH_FLAGS_13_Haswell -march=haswell) set(ARCH_FLAGS_17_Skylake -march=skylake-avx512) @@ -119,16 +114,6 @@ else() target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_08_Nehalem) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) - else () - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) - SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_Default.cpp - PROPERTIES COMPILE_FLAGS -O2 - ) - SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_aarch64.cpp - PROPERTIES COMPILE_FLAGS -O2 - ) endif (X86) endif() @@ -162,4 +147,4 @@ if (WIN32) "*.dll" ) file(COPY ${MY_DLLS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) -endif(WIN32) \ No newline at end of file +endif(WIN32) diff --git a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp index dab53f6..db60a3a 100644 --- a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp +++ b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp @@ -6,7 +6,7 @@ namespace PokemonAutomation{ #if defined __aarch64__ && defined __APPLE__ -bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); @@ -29,7 +29,7 @@ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start return false; } -bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); @@ -57,7 +57,7 @@ bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t sta return false; } -bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), @@ -83,7 +83,7 @@ bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start return false; } -bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); diff --git a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h index a791780..16dee6d 100644 --- a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h +++ b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h @@ -58,4 +58,4 @@ class XoroShiro8{ } #undef MAGIC_NUMBER -#endif \ No newline at end of file +#endif diff --git a/PLA-SeedFinder/Source/SeedScan.cpp b/PLA-SeedFinder/Source/SeedScan.cpp index cf92836..2978c38 100644 --- a/PLA-SeedFinder/Source/SeedScan.cpp +++ b/PLA-SeedFinder/Source/SeedScan.cpp @@ -30,16 +30,16 @@ bool seed_scan_thorough_unroll4_SSE41(size_t rolls, uint32_t desired_pid, uint64 bool seed_scan_thorough_unroll8_AVX2(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); bool seed_scan_thorough_unroll16_AVX512(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); void print_isa(){ #ifdef __aarch64__ #if defined __APPLE__ - cout << "Instruction Set: AARCH64" << endl; + cout << "Instruction Set: NEON" << endl; return; #endif #else @@ -99,7 +99,7 @@ bool seed_scan_common(size_t rolls, uint32_t desired_pid, uint64_t start_seed, u #endif #if defined __aarch64__ && defined __APPLE__ uint64_t block = iterations / 8 * 8; - if (block > 0 && seed_scan_common_unroll8(rolls, desired_pid, start_seed, block)){ + if (block > 0 && seed_scan_common_unroll8_NEON(rolls, desired_pid, start_seed, block)){ return true; } start_seed += block * 0x100000000; @@ -146,7 +146,7 @@ bool seed_scan_thorough(size_t rolls, uint32_t desired_pid, uint64_t start_seed, #endif #if defined __aarch64__ && defined __APPLE__ uint64_t block = iterations / 8 * 8; - if (block > 0 && seed_scan_thorough_unroll8(rolls, desired_pid, start_seed, block)){ + if (block > 0 && seed_scan_thorough_unroll8_NEON(rolls, desired_pid, start_seed, block)){ return true; } start_seed += block * 0x100000000;