diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index df12bb38..cfe07bb4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,8 @@ env: CMAKE_REQUIRED_QUIET: OFF DEBIAN_FRONTEND: noninteractive UPX_CMAKE_BUILD_FLAGS: --verbose - # 2023-09-01 - ZIG_DIST_VERSION: 0.12.0-dev.252+5dc2db805 + # 2023-09-04 + ZIG_DIST_VERSION: 0.12.0-dev.263+62f727eed jobs: job-rebuild-and-verify-stubs: diff --git a/.github/workflows/weekly-ci-cc-zigcc.yml b/.github/workflows/weekly-ci-cc-zigcc.yml index 9adea1f4..99c57b3f 100644 --- a/.github/workflows/weekly-ci-cc-zigcc.yml +++ b/.github/workflows/weekly-ci-cc-zigcc.yml @@ -10,8 +10,8 @@ on: env: CMAKE_REQUIRED_QUIET: OFF DEBIAN_FRONTEND: noninteractive - # 2023-09-01 - ZIG_DIST_VERSION: 0.12.0-dev.252+5dc2db805 + # 2023-09-04 + ZIG_DIST_VERSION: 0.12.0-dev.263+62f727eed jobs: job-linux-zigcc: # uses cmake + make diff --git a/src/check/dt_check.cpp b/src/check/dt_check.cpp index 876a93e7..7c0c784d 100644 --- a/src/check/dt_check.cpp +++ b/src/check/dt_check.cpp @@ -647,20 +647,26 @@ TEST_CASE("libc qsort") { x = x * 33 + 1 + (i & 255); e[i].value = (upx_uint16_t) x; } - sort(e, n, sizeof(*e), Elem::compare); + sort(e, n, sizeof(Elem), Elem::compare); for (size_t i = 1; i < n; i++) if very_unlikely (e[i - 1].value > e[i].value) return false; return true; } }; - constexpr size_t N = 4096; Elem e[N]; for (size_t n = 0; n <= N; n = 2 * n + 1) { CHECK(Elem::check_sort(qsort, e, n)); - // CHECK(Elem::check_sort(upx_shellsort, e, n)); - // CHECK(Elem::check_sort(upx_stable_sort, e, n)); + CHECK(Elem::check_sort(upx_gnomesort, e, n)); + CHECK(Elem::check_sort(upx_shellsort_memswap, e, n)); + CHECK(Elem::check_sort(upx_shellsort_memcpy, e, n)); +#if UPX_QSORT_IS_STABLE_SORT + upx_sort_func_t wrap_stable_sort = [](void *aa, size_t nn, size_t, upx_compare_func_t cc) { + upx_std_stable_sort(aa, nn, cc); + }; + CHECK(Elem::check_sort(wrap_stable_sort, e, n)); +#endif } } #endif diff --git a/src/conf.h b/src/conf.h index fd731f6e..6f0f77f3 100644 --- a/src/conf.h +++ b/src/conf.h @@ -97,10 +97,16 @@ inline constexpr bool upx_is_integral_v = upx_is_integral::value; #if (ACC_ARCH_M68K && ACC_OS_TOS && ACC_CC_GNUC) && defined(__MINT__) // horrible hack for broken compiler / ABI #define upx_fake_alignas_1 __attribute__((__aligned__(1),__packed__)) +#define upx_fake_alignas_4 __attribute__((__aligned__(2))) #define upx_fake_alignas_16 __attribute__((__aligned__(2))) // object file maximum 2 ??? #define upx_fake_alignas__(x) upx_fake_alignas_ ## x #define alignas(x) upx_fake_alignas__(x) +#define upx_alignas_max upx_fake_alignas_4 #endif +#ifndef upx_alignas_max +#define upx_alignas_max alignas(std::max_align_t) +#endif + /************************************************************************* // core diff --git a/src/help.cpp b/src/help.cpp index 6729c82f..07383630 100644 --- a/src/help.cpp +++ b/src/help.cpp @@ -129,7 +129,7 @@ static void list_all_packers(FILE *f, int verbose) { PackerNames pn; pn.o = &o; (void) PackMaster::visitAllPackers(PackerNames::visit, nullptr, &o, &pn); - qsort(pn.names, pn.names_count, sizeof(PackerNames::Entry), PackerNames::cmp_fname); + upx_qsort(pn.names, pn.names_count, sizeof(PackerNames::Entry), PackerNames::cmp_fname); size_t pos = 0; for (size_t i = 0; i < pn.names_count; ++i) { const char *fn = pn.names[i].fname; diff --git a/src/p_exe.cpp b/src/p_exe.cpp index c57469ef..faeef76f 100644 --- a/src/p_exe.cpp +++ b/src/p_exe.cpp @@ -398,7 +398,7 @@ void PackExe::pack(OutputFile *fo) { unsigned jc = get_le32(relocs + 4 * ic); set_le32(relocs + 4 * ic, ((jc >> 16) * 16 + (jc & 0xffff)) & 0xfffff); } - qsort(raw_bytes(relocs, 4 * relocnum), relocnum, 4, le32_compare); + upx_qsort(raw_bytes(relocs, 4 * relocnum), relocnum, 4, le32_compare); SPAN_S_VAR(byte, image, ibuf + 0, ih_imagesize); SPAN_S_VAR(byte, crel, ibuf + ih_imagesize, ibuf); diff --git a/src/p_lx_elf.cpp b/src/p_lx_elf.cpp index d0d0484f..0ed4d7ee 100644 --- a/src/p_lx_elf.cpp +++ b/src/p_lx_elf.cpp @@ -2022,7 +2022,7 @@ PackLinuxElf32::sort_DT32_offsets(Elf32_Dyn const *const dynp0) n_off += !!dt_offsets[n_off]; } dt_offsets[n_off++] = file_size; // sentinel - qsort(dt_offsets, n_off, sizeof(dt_offsets[0]), qcmp_unsigned); + upx_qsort(dt_offsets, n_off, sizeof(dt_offsets[0]), qcmp_unsigned); } unsigned PackLinuxElf32::find_dt_ndx(unsigned rva) @@ -7878,7 +7878,7 @@ PackLinuxElf64::sort_DT64_offsets(Elf64_Dyn const *const dynp0) n_off += !!dt_offsets[n_off]; } dt_offsets[n_off++] = file_size; // sentinel - qsort(dt_offsets, n_off, sizeof(dt_offsets[0]), qcmp_unsigned); + upx_qsort(dt_offsets, n_off, sizeof(dt_offsets[0]), qcmp_unsigned); } unsigned PackLinuxElf64::find_dt_ndx(u64_t rva) diff --git a/src/p_mach.cpp b/src/p_mach.cpp index 988113d2..11022e1f 100644 --- a/src/p_mach.cpp +++ b/src/p_mach.cpp @@ -1550,7 +1550,7 @@ void PackMachBase::unpack(OutputFile *fo) } // Put LC_SEGMENT together at the beginning - qsort(msegcmd, ncmds, sizeof(*msegcmd), compare_segment_command); + upx_qsort(msegcmd, ncmds, sizeof(*msegcmd), compare_segment_command); n_segment = 0; for (unsigned j= 0; j < ncmds; ++j) { n_segment += (lc_seg==msegcmd[j].cmd); @@ -2025,7 +2025,7 @@ tribool PackMachBase::canPack() } // Put LC_SEGMENT together at the beginning - qsort(msegcmd, ncmds, sizeof(*msegcmd), compare_segment_command); + upx_qsort(msegcmd, ncmds, sizeof(*msegcmd), compare_segment_command); if (lc_seg==msegcmd[0].cmd && 0==msegcmd[0].vmaddr && !strcmp("__PAGEZERO", msegcmd[0].segname)) { diff --git a/src/p_vmlinx.cpp b/src/p_vmlinx.cpp index 57170f50..d9d72c0f 100644 --- a/src/p_vmlinx.cpp +++ b/src/p_vmlinx.cpp @@ -205,7 +205,7 @@ tribool PackVmlinuxBase::canPack() fi->readx(phdri, ehdri.e_phnum * sizeof(*phdri)); // Put PT_LOAD together at the beginning, ascending by .p_paddr. - qsort(phdri, ehdri.e_phnum, sizeof(*phdri), compare_Phdr); + upx_qsort(phdri, ehdri.e_phnum, sizeof(*phdri), compare_Phdr); // Find convex hull of physical addresses, and count the PT_LOAD. // Ignore ".bss": .p_filesz < .p_memsz diff --git a/src/packer_r.cpp b/src/packer_r.cpp index 6d7aae4a..4efab7c3 100644 --- a/src/packer_r.cpp +++ b/src/packer_r.cpp @@ -50,7 +50,7 @@ unsigned Packer::optimizeReloc(unsigned relocnum, SPAN_P(byte) relocs, SPAN_S(by throwCantPackExact(); if (relocnum == 0) return 0; - qsort(raw_bytes(relocs, 4 * relocnum), relocnum, 4, le32_compare); + upx_qsort(raw_bytes(relocs, 4 * relocnum), relocnum, 4, le32_compare); unsigned pc = (unsigned) -4; for (unsigned i = 0; i < relocnum; i++) { diff --git a/src/pefile.cpp b/src/pefile.cpp index 260afd64..1e0b9382 100644 --- a/src/pefile.cpp +++ b/src/pefile.cpp @@ -254,7 +254,7 @@ void PeFile::Interval::add(const Interval *iv) { void PeFile::Interval::flatten() { if (!ivnum) return; - qsort(ivarr, ivnum, sizeof(interval), Interval::compare); + upx_qsort(ivarr, ivnum, sizeof(interval), Interval::compare); for (unsigned ic = 0; ic < ivnum - 1; ic++) { unsigned jc; for (jc = ic + 1; jc < ivnum && ivarr[ic].start + ivarr[ic].len >= ivarr[jc].start; jc++) @@ -342,7 +342,7 @@ void PeFile::Reloc::add(unsigned pos, unsigned type) { void PeFile::Reloc::finish(byte *&p, unsigned &siz) { unsigned prev = 0xffffffff; set_le32(start + 1024 + 4 * counts[0]++, 0xf0000000); - qsort(start + 1024, counts[0], 4, le32_compare); + upx_qsort(start + 1024, counts[0], 4, le32_compare); rel = (reloc *) start; rel1 = (LE16 *) start; @@ -422,7 +422,7 @@ void PeFile32::processRelocs() // pass1 // remove duplicated records for (ic = 1; ic <= 3; ic++) { - qsort(fix[ic], xcounts[ic], 4, le32_compare); + upx_qsort(fix[ic], xcounts[ic], 4, le32_compare); unsigned prev = ~0u; unsigned jc = 0; for (unsigned kc = 0; kc < xcounts[ic]; kc++) @@ -522,7 +522,7 @@ void PeFile64::processRelocs() // pass1 // remove duplicated records for (ic = 1; ic < 16; ic++) { - qsort(fix[ic], xcounts[ic], 4, le32_compare); + upx_qsort(fix[ic], xcounts[ic], 4, le32_compare); unsigned prev = ~0u; unsigned jc = 0; for (unsigned kc = 0; kc < xcounts[ic]; kc++) @@ -760,7 +760,7 @@ public: outputlen = 0; // sort the sections by name before adding them all - qsort(sections, nsections, sizeof(Section *), ImportLinker::compare); + upx_qsort(sections, nsections, sizeof(Section *), ImportLinker::compare); for (unsigned ic = 0; ic < nsections; ic++) addLoader(sections[ic]->name); @@ -945,7 +945,7 @@ unsigned PeFile::processImports0(ord_mask_t ord_mask) // pass 1 mb_oimport.clear(); oimport = mb_oimport; - qsort(idlls, dllnum, sizeof(*idlls), udll::compare); + upx_qsort(idlls, dllnum, sizeof(*idlls), udll::compare); info("Processing imports: %d DLLs", dllnum); for (unsigned ic = 0; ic < dllnum; ic++) { diff --git a/src/util/util.cpp b/src/util/util.cpp index c33bee26..8976a1a5 100644 --- a/src/util/util.cpp +++ b/src/util/util.cpp @@ -42,10 +42,9 @@ // assert sane memory buffer sizes to protect against integer overflows // and malicious header fields // see C 11 standard, Annex K -// -// this limits uncompressed_size to about 682 MiB (715_128_832 bytes) **************************************************************************/ +// this limits uncompressed_size to about 682 MiB (715_128_832 bytes) ACC_COMPILE_TIME_ASSERT_HEADER(UPX_RSIZE_MAX_MEM == UPX_RSIZE_MAX) ACC_COMPILE_TIME_ASSERT_HEADER(UPX_RSIZE_MAX_STR <= UPX_RSIZE_MAX / 256) ACC_COMPILE_TIME_ASSERT_HEADER(2ull * UPX_RSIZE_MAX * 9 / 8 + 256 * 1024 * 1024 < INT_MAX) @@ -54,23 +53,6 @@ ACC_COMPILE_TIME_ASSERT_HEADER(5ull * UPX_RSIZE_MAX < UINT_MAX) ACC_COMPILE_TIME_ASSERT_HEADER(UPX_RSIZE_MAX >= 8192 * 65536) ACC_COMPILE_TIME_ASSERT_HEADER(UPX_RSIZE_MAX_STR >= 1024) -upx_rsize_t mem_size(upx_uint64_t element_size, upx_uint64_t n, upx_uint64_t extra1, - upx_uint64_t extra2) { - assert(element_size > 0); - if very_unlikely (element_size == 0 || element_size > UPX_RSIZE_MAX) - throwCantPack("mem_size 1; take care"); - if very_unlikely (n > UPX_RSIZE_MAX) - throwCantPack("mem_size 2; take care"); - if very_unlikely (extra1 > UPX_RSIZE_MAX) - throwCantPack("mem_size 3; take care"); - if very_unlikely (extra2 > UPX_RSIZE_MAX) - throwCantPack("mem_size 4; take care"); - upx_uint64_t bytes = element_size * n + extra1 + extra2; // cannot overflow - if very_unlikely (bytes > UPX_RSIZE_MAX) - throwCantPack("mem_size 5; take care"); - return ACC_ICONV(upx_rsize_t, bytes); -} - bool mem_size_valid(upx_uint64_t element_size, upx_uint64_t n, upx_uint64_t extra1, upx_uint64_t extra2) noexcept { assert_noexcept(element_size > 0); @@ -88,6 +70,23 @@ bool mem_size_valid(upx_uint64_t element_size, upx_uint64_t n, upx_uint64_t extr return true; } +upx_rsize_t mem_size(upx_uint64_t element_size, upx_uint64_t n, upx_uint64_t extra1, + upx_uint64_t extra2) { + assert(element_size > 0); + if very_unlikely (element_size == 0 || element_size > UPX_RSIZE_MAX) + throwCantPack("mem_size 1; take care"); + if very_unlikely (n > UPX_RSIZE_MAX) + throwCantPack("mem_size 2; take care"); + if very_unlikely (extra1 > UPX_RSIZE_MAX) + throwCantPack("mem_size 3; take care"); + if very_unlikely (extra2 > UPX_RSIZE_MAX) + throwCantPack("mem_size 4; take care"); + upx_uint64_t bytes = element_size * n + extra1 + extra2; // cannot overflow + if very_unlikely (bytes > UPX_RSIZE_MAX) + throwCantPack("mem_size 5; take care"); + return ACC_ICONV(upx_rsize_t, bytes); +} + TEST_CASE("mem_size") { CHECK(mem_size_valid(1, 0)); CHECK(mem_size_valid(1, 0x30000000)); @@ -277,18 +276,18 @@ void upx_memswap(void *a, void *b, size_t n) { } } -// somewhat better memswap(), optimized for our use cases in sort functions +// much better memswap(), optimized for our use case in sort functions below static void memswap_no_overlap(char *a, char *b, size_t n) { -#if defined(__clang__) && __clang_major__ < 15 && 1 - // work around a clang ICE (Internal Compiler Error); sigh +#if defined(__clang__) && __clang_major__ < 15 + // work around a clang < 15 ICE (Internal Compiler Error) upx_memswap(a, b, n); #else // clang bug - alignas(16) char tmpbuf[16]; + upx_alignas_max char tmp_buf[16]; #define SWAP(x) \ ACC_BLOCK_BEGIN \ - upx_memcpy_inline(tmpbuf, a, x); \ + upx_memcpy_inline(tmp_buf, a, x); \ upx_memcpy_inline(a, b, x); \ - upx_memcpy_inline(b, tmpbuf, x); \ + upx_memcpy_inline(b, tmp_buf, x); \ a += x; \ b += x; \ ACC_BLOCK_END @@ -310,25 +309,9 @@ static void memswap_no_overlap(char *a, char *b, size_t n) { #endif // clang bug } -// simple Shell sort using Knuth's gap; NOT stable -void upx_shellsort(void *array, size_t n, size_t element_size, upx_compare_func_t compare) { - mem_size_assert(element_size, n); // check size - size_t gap = 0; - while (gap * 3 + 1 < n) // cannot overflow - gap = gap * 3 + 1; - for (; gap > 0; gap = (gap - 1) / 3) { - const size_t gap_bytes = element_size * gap; - char *const gbase = (char *) array + gap_bytes; // gbase := &array[gap] - char *ii = gbase; - for (size_t i = gap; i < n; i += gap, ii += gap_bytes) - for (char *a = ii; a >= gbase && compare(a - gap_bytes, a) > 0; a -= gap_bytes) - memswap_no_overlap(a - gap_bytes, a, element_size); - } -} - // extremely simple (and beautiful) stable sort: Gnomesort // WARNING: O(n^2) and thus very inefficient for large n -void upx_stable_sort(void *array, size_t n, size_t element_size, upx_compare_func_t compare) { +void upx_gnomesort(void *array, size_t n, size_t element_size, upx_compare_func_t compare) { for (size_t i = 1; i < n; i++) { char *a = (char *) array + element_size * i; // a := &array[i] if (i != 0 && compare(a - element_size, a) > 0) { // if a[-1] > a[0] then @@ -338,36 +321,100 @@ void upx_stable_sort(void *array, size_t n, size_t element_size, upx_compare_fun } } -#if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG -TEST_CASE("basic upx_stable_sort") { - { - unsigned a[] = {0, 1}; - upx_stable_sort(a, 2, sizeof(*a), ne32_compare); - CHECK((a[0] == 0 && a[1] == 1)); - } - { - unsigned a[] = {1, 0}; - upx_stable_sort(a, 2, sizeof(*a), ne32_compare); - CHECK((a[0] == 0 && a[1] == 1)); - } - { - BE64 a[3]; - a[0] = 257; - a[1] = 256; - a[2] = 255; - upx_stable_sort(a, 3, sizeof(*a), be64_compare); - CHECK((a[0] == 255 && a[1] == 256 && a[2] == 257)); +// simple Shell sort using Knuth's gap; NOT stable; uses memswap() +// cannot compete with modern sort algorithms, but not too bad as a generic fallback +void upx_shellsort_memswap(void *array, size_t n, size_t element_size, upx_compare_func_t compare) { + mem_size_assert(element_size, n); // check size + size_t gap = 0; // 0, 1, 4, 13, 40, 121, 364, 1093, ... + while (gap * 3 + 1 < n) // cannot overflow because of size check above + gap = gap * 3 + 1; + for (; gap > 0; gap = (gap - 1) / 3) { + const size_t gap_bytes = element_size * gap; + char *p = (char *) array + gap_bytes; + for (size_t i = gap; i < n; i += gap, p += gap_bytes) // invariant: p == &array[i] + for (char *a = p; a != array && compare(a - gap_bytes, a) > 0; a -= gap_bytes) + memswap_no_overlap(a - gap_bytes, a, element_size); } } +// simple Shell sort using Knuth's gap; NOT stable; uses memcpy() +// should be faster than memswap() in theory, but benchmarks are inconsistent +void upx_shellsort_memcpy(void *array, size_t n, size_t element_size, upx_compare_func_t compare) { + mem_size_assert(element_size, n); // check size + constexpr size_t MAX_INLINE_ELEMENT_SIZE = 256; + upx_alignas_max char tmp_buf[MAX_INLINE_ELEMENT_SIZE]; // buffer for one element + char *tmp = tmp_buf; + if (element_size > MAX_INLINE_ELEMENT_SIZE) { + tmp = (char *) malloc(element_size); + assert(tmp != nullptr); + } + size_t gap = 0; // 0, 1, 4, 13, 40, 121, 364, 1093, ... + while (gap * 3 + 1 < n) // cannot overflow because of size check above + gap = gap * 3 + 1; + for (; gap > 0; gap = (gap - 1) / 3) { + const size_t gap_bytes = element_size * gap; + char *p = (char *) array + gap_bytes; + for (size_t i = gap; i < n; i += gap, p += gap_bytes) // invariant: p == &array[i] + if (compare(p - gap_bytes, p) > 0) { + char *a = p; + memcpy(tmp, a, element_size); + do { + memcpy(a, a - gap_bytes, element_size); + a -= gap_bytes; + } while (a != array && compare(a - gap_bytes, tmp) > 0); + memcpy(a, tmp, element_size); + } + } + if (element_size > MAX_INLINE_ELEMENT_SIZE) + free(tmp); +} + +// wrap std::stable_sort() +template +void upx_std_stable_sort(void *array, size_t n, upx_compare_func_t compare) { + static_assert(ElementSize > 0 && ElementSize <= UPX_RSIZE_MAX); + mem_size_assert(ElementSize, n); // check size +#if 0 + // just for testing + upx_gnomesort(array, n, ElementSize, compare); +#else + struct alignas(1) element_type { char data[ElementSize]; }; + static_assert(sizeof(element_type) == ElementSize); + static_assert(alignof(element_type) == 1); + auto cmp = [compare](const element_type &a, const element_type &b) -> bool { + return compare(&a, &b) < 0; + }; + std::stable_sort((element_type *) array, (element_type *) array + n, cmp); +#endif +} + +#if UPX_QSORT_IS_STABLE_SORT +// instantiate function templates for all element sizes we need +// efficient, but code size bloat +template void upx_std_stable_sort<1>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<2>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<4>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<8>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<16>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<32>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<56>(void *, size_t, upx_compare_func_t); +template void upx_std_stable_sort<72>(void *, size_t, upx_compare_func_t); +#endif + +#if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG >= 1 #if __cplusplus >= 202002L // use C++20 std::next_permutation() to test all permutations namespace { template struct TestSortAllPermutations { + typedef ElementType element_type; static noinline upx_uint64_t test(upx_sort_func_t sort, size_t n) { constexpr size_t N = 16; - assert(n > 0 && n <= N); + assert_noexcept(n <= N); ElementType perm[N]; + if (n == 0) { + sort(perm, 0, sizeof(ElementType), CompareFunc); // check that n == 0 works + return 0; + } for (size_t i = 0; i < n; i++) perm[i] = 255 + i; upx_uint64_t num_perms = 0; @@ -376,40 +423,53 @@ struct TestSortAllPermutations { memcpy(a, perm, sizeof(*a) * n); sort(a, n, sizeof(*a), CompareFunc); for (size_t i = 0; i < n; i++) - assert((a[i] == 255 + i)); + assert_noexcept((a[i] == 255 + i)); num_perms += 1; } while (std::next_permutation(perm, perm + n)); return num_perms; } + static bool test_permutations(upx_sort_func_t sort) { + bool ok = true; + ok &= (test(sort, 0) == 0); + ok &= (test(sort, 1) == 1); + ok &= (test(sort, 2) == 2); + ok &= (test(sort, 3) == 6); + ok &= (test(sort, 4) == 24); + ok &= (test(sort, 5) == 120); +#if DEBUG >= 2 + ok &= (test(sort, 6) == 720); + ok &= (test(sort, 7) == 5040); + ok &= (test(sort, 8) == 40320); + ok &= (test(sort, 9) == 362880); + ok &= (test(sort, 10) == 3628800); + // ok &= (test(sort, 11) == 39916800); +#endif + return ok; + } }; } // namespace -TEST_CASE("upx_shellsort") { +TEST_CASE("upx_gnomesort") { // typedef TestSortAllPermutations TestSort; typedef TestSortAllPermutations TestSort; - CHECK(TestSort::test(upx_shellsort, 1) == 1); - CHECK(TestSort::test(upx_shellsort, 2) == 2); - CHECK(TestSort::test(upx_shellsort, 3) == 6); - CHECK(TestSort::test(upx_shellsort, 4) == 24); - CHECK(TestSort::test(upx_shellsort, 5) == 120); - // CHECK(TestSort::test(upx_shellsort, 6) == 720); - // CHECK(TestSort::test(upx_shellsort, 7) == 5040); - // CHECK(TestSort::test(upx_shellsort, 8) == 40320); - // CHECK(TestSort::test(upx_shellsort, 9) == 362880); - // CHECK(TestSort::test(upx_shellsort, 10) == 3628800); + CHECK(TestSort::test_permutations(upx_gnomesort)); } -TEST_CASE("upx_stable_sort") { +TEST_CASE("upx_shellsort_memswap") { // typedef TestSortAllPermutations TestSort; typedef TestSortAllPermutations TestSort; - CHECK(TestSort::test(upx_stable_sort, 1) == 1); - CHECK(TestSort::test(upx_stable_sort, 2) == 2); - CHECK(TestSort::test(upx_stable_sort, 3) == 6); - CHECK(TestSort::test(upx_stable_sort, 4) == 24); - CHECK(TestSort::test(upx_stable_sort, 5) == 120); - // CHECK(TestSort::test(upx_stable_sort, 6) == 720); - // CHECK(TestSort::test(upx_stable_sort, 7) == 5040); - // CHECK(TestSort::test(upx_stable_sort, 8) == 40320); - // CHECK(TestSort::test(upx_stable_sort, 9) == 362880); - // CHECK(TestSort::test(upx_stable_sort, 10) == 3628800); + CHECK(TestSort::test_permutations(upx_shellsort_memswap)); +} +TEST_CASE("upx_shellsort_memcpy") { + // typedef TestSortAllPermutations TestSort; + typedef TestSortAllPermutations TestSort; + CHECK(TestSort::test_permutations(upx_shellsort_memcpy)); +} +TEST_CASE("upx_std_stable_sort") { + // typedef TestSortAllPermutations TestSort; + typedef TestSortAllPermutations TestSort; + upx_sort_func_t wrap_stable_sort = [](void *a, size_t n, size_t, upx_compare_func_t compare) { + upx_std_stable_sort(a, n, compare); + }; + CHECK(TestSort::test_permutations(wrap_stable_sort)); } #endif // C++20 #endif // DEBUG diff --git a/src/util/util.h b/src/util/util.h index f3eac5a2..e301e204 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -130,9 +130,22 @@ void upx_memswap(void *a, void *b, size_t n); typedef int(__acc_cdecl_qsort *upx_compare_func_t)(const void *, const void *); typedef void (*upx_sort_func_t)(void *array, size_t n, size_t element_size, upx_compare_func_t); -void upx_shellsort(void *array, size_t n, size_t element_size, upx_compare_func_t compare); +void upx_gnomesort(void *array, size_t n, size_t element_size, upx_compare_func_t compare); +void upx_shellsort_memswap(void *array, size_t n, size_t element_size, upx_compare_func_t compare); +void upx_shellsort_memcpy(void *array, size_t n, size_t element_size, upx_compare_func_t compare); -void upx_stable_sort(void *array, size_t n, size_t element_size, upx_compare_func_t compare); +// this wraps std::stable_sort() +template +void upx_std_stable_sort(void *array, size_t n, upx_compare_func_t compare); + +#if 1 +// use libc qsort() +#define upx_qsort qsort +#else +// use std::stable_sort() +#define upx_qsort(a, b, c, d) upx_std_stable_sort<(c)>(a, b, d) +#define UPX_QSORT_IS_STABLE_SORT 1 +#endif /************************************************************************* // misc. support functions