src: implement upx_shellsort()

This commit is contained in:
Markus F.X.J. Oberhumer 2023-09-01 10:49:47 +02:00
parent bfb438dc89
commit bb6b087862
3 changed files with 122 additions and 16 deletions

View File

@ -93,6 +93,22 @@ build/extra/clang-static/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/clang-static/%: export CC = clang -static build/extra/clang-static/%: export CC = clang -static
build/extra/clang-static/%: export CXX = clang++ -static build/extra/clang-static/%: export CXX = clang++ -static
# force building with clang/clang++ C17/C++20
build/extra/clang-std-cxx20/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/clang-std-cxx20/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/clang-std-cxx20/%: export CC = clang -std=gnu17
build/extra/clang-std-cxx20/%: export CXX = clang++ -std=gnu++20
build/extra/clang-std-cxx20/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
build/extra/clang-std-cxx20/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
# force building with clang/clang++ C23/C++23
build/extra/clang-std-cxx23/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/clang-std-cxx23/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/clang-std-cxx23/%: export CC = clang -std=gnu2x
build/extra/clang-std-cxx23/%: export CXX = clang++ -std=gnu++2b
build/extra/clang-std-cxx23/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
build/extra/clang-std-cxx23/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
# force building with gcc/g++ # force building with gcc/g++
build/extra/gcc/debug: PHONY; $(call run_config_and_build,$@,Debug) build/extra/gcc/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/gcc/release: PHONY; $(call run_config_and_build,$@,Release) build/extra/gcc/release: PHONY; $(call run_config_and_build,$@,Release)
@ -123,6 +139,22 @@ build/extra/gcc-static/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/gcc-static/%: export CC = gcc -static build/extra/gcc-static/%: export CC = gcc -static
build/extra/gcc-static/%: export CXX = g++ -static build/extra/gcc-static/%: export CXX = g++ -static
# force building with gcc/g++ C17/C++20
build/extra/gcc-std-cxx20/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/gcc-std-cxx20/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/gcc-std-cxx20/%: export CC = gcc -std=gnu17
build/extra/gcc-std-cxx20/%: export CXX = g++ -std=gnu++20
build/extra/gcc-std-cxx20/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
build/extra/gcc-std-cxx20/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
# force building with gcc/g++ C23/C++23
build/extra/gcc-std-cxx23/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/gcc-std-cxx23/release: PHONY; $(call run_config_and_build,$@,Release)
build/extra/gcc-std-cxx23/%: export CC = gcc -std=gnu2x
build/extra/gcc-std-cxx23/%: export CXX = g++ -std=gnu++23
build/extra/gcc-std-cxx23/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
build/extra/gcc-std-cxx23/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
# cross compiler: Linux glibc aarch64-linux-gnu (arm64) # cross compiler: Linux glibc aarch64-linux-gnu (arm64)
build/extra/cross-linux-gnu-aarch64/debug: PHONY; $(call run_config_and_build,$@,Debug) build/extra/cross-linux-gnu-aarch64/debug: PHONY; $(call run_config_and_build,$@,Debug)
build/extra/cross-linux-gnu-aarch64/release: PHONY; $(call run_config_and_build,$@,Release) build/extra/cross-linux-gnu-aarch64/release: PHONY; $(call run_config_and_build,$@,Release)

View File

@ -277,6 +277,55 @@ void upx_memswap(void *a, void *b, size_t n) {
} }
} }
// somewhat better memswap(), optimized for our use cases in sort functions
static void memswap_no_overlap(char *a, char *b, size_t n) {
#if defined(__clang__) && __clang_major__ < 15 && 1
// avoid a clang ICE; sigh
upx_memswap(a, b, n);
#else // clang bug
alignas(16) char tmpbuf[16];
#define SWAP(x) \
ACC_BLOCK_BEGIN \
upx_memcpy_inline(tmpbuf, a, x); \
upx_memcpy_inline(a, b, x); \
upx_memcpy_inline(b, tmpbuf, x); \
a += x; \
b += x; \
ACC_BLOCK_END
for (; n >= 16; n -= 16)
SWAP(16);
if (n & 8)
SWAP(8);
if (n & 4)
SWAP(4);
if (n & 2)
SWAP(2);
if (n & 1)
SWAP(1);
UNUSED(a); // avoid pedantic warning about final assignment
UNUSED(b); // avoid pedantic warning about final assignment
#undef SWAP
#endif // clang bug
}
// simple Shell sort using Knuth's gap; NOT stable
void upx_shellsort(void *array, size_t n, size_t element_size,
int (*compare)(const void *, const void *)) {
mem_size_assert(element_size, n); // check size
size_t gap = 1;
while (gap * 3 <= n) // cannot overflow
gap = gap * 3 + 1;
for (; gap > 0; gap = (gap - 1) / 3) {
const size_t gap_bytes = element_size * gap;
char *g = (char *) array + gap_bytes; // g := &array[gap]
char *ii = g;
for (size_t i = gap; i < n; i += gap, ii += gap_bytes)
for (char *a = ii; a >= g && compare(a - gap_bytes, a) > 0; a -= gap_bytes)
memswap_no_overlap(a - gap_bytes, a, element_size);
}
}
// extremely simple (and beautiful) stable sort: Gnomesort // extremely simple (and beautiful) stable sort: Gnomesort
// WARNING: O(n^2) and thus very inefficient for large n // WARNING: O(n^2) and thus very inefficient for large n
void upx_stable_sort(void *array, size_t n, size_t element_size, void upx_stable_sort(void *array, size_t n, size_t element_size,
@ -284,14 +333,14 @@ void upx_stable_sort(void *array, size_t n, size_t element_size,
for (size_t i = 1; i < n; i++) { for (size_t i = 1; i < n; i++) {
char *a = (char *) array + element_size * i; // a := &array[i] char *a = (char *) array + element_size * i; // a := &array[i]
if (i != 0 && compare(a - element_size, a) > 0) { // if a[-1] > a[0] then if (i != 0 && compare(a - element_size, a) > 0) { // if a[-1] > a[0] then
upx_memswap(a - element_size, a, element_size); // swap elements a[-1] <=> a[0] memswap_no_overlap(a - element_size, a, element_size); // swap elements a[-1] <=> a[0]
i -= 2; // and decrease i i -= 2; // and decrease i
} }
} }
} }
#if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG #if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG
TEST_CASE("upx_stable_sort") { TEST_CASE("basic upx_stable_sort") {
{ {
unsigned a[] = {0, 1}; unsigned a[] = {0, 1};
upx_stable_sort(a, 2, sizeof(*a), ne32_compare); upx_stable_sort(a, 2, sizeof(*a), ne32_compare);
@ -314,18 +363,21 @@ TEST_CASE("upx_stable_sort") {
#if __cplusplus >= 202002L // use C++20 std::next_permutation() to test all permutations #if __cplusplus >= 202002L // use C++20 std::next_permutation() to test all permutations
namespace { namespace {
typedef int (*compare_func)(const void *, const void *);
typedef void (*sort_func)(void *array, size_t n, size_t element_size, compare_func compare);
template <class ElementType, compare_func CompareFunc>
struct TestSortAllPermutations { struct TestSortAllPermutations {
static upx_uint64_t test(size_t n) { static noinline upx_uint64_t test(sort_func sort, size_t n) {
constexpr size_t N = 16; constexpr size_t N = 16;
assert(n > 0 && n <= N); assert(n > 0 && n <= N);
LE16 perm[N]; ElementType perm[N];
for (size_t i = 0; i < n; i++) for (size_t i = 0; i < n; i++)
perm[i] = 255 + i; perm[i] = 255 + i;
upx_uint64_t num_perms = 0; upx_uint64_t num_perms = 0;
do { do {
LE16 a[N]; ElementType a[N];
memcpy(a, perm, sizeof(*a) * n); memcpy(a, perm, sizeof(*a) * n);
upx_stable_sort(a, n, sizeof(*a), le16_compare); sort(a, n, sizeof(*a), CompareFunc);
for (size_t i = 0; i < n; i++) for (size_t i = 0; i < n; i++)
assert((a[i] == 255 + i)); assert((a[i] == 255 + i));
num_perms += 1; num_perms += 1;
@ -334,14 +386,33 @@ struct TestSortAllPermutations {
} }
}; };
} // namespace } // namespace
TEST_CASE("upx_shellsort") {
// typedef TestSortAllPermutations<BE64, be64_compare> TestSort;
typedef TestSortAllPermutations<LE16, le16_compare> TestSort;
CHECK(TestSort::test(upx_shellsort, 1) == 1);
CHECK(TestSort::test(upx_shellsort, 2) == 2);
CHECK(TestSort::test(upx_shellsort, 3) == 6);
CHECK(TestSort::test(upx_shellsort, 4) == 24);
CHECK(TestSort::test(upx_shellsort, 5) == 120);
// CHECK(TestSort::test(upx_shellsort, 6) == 720);
// CHECK(TestSort::test(upx_shellsort, 7) == 5040);
// CHECK(TestSort::test(upx_shellsort, 8) == 40320);
// CHECK(TestSort::test(upx_shellsort, 9) == 362880);
// CHECK(TestSort::test(upx_shellsort, 10) == 3628800);
}
TEST_CASE("upx_stable_sort") { TEST_CASE("upx_stable_sort") {
CHECK(TestSortAllPermutations::test(1) == 1); // typedef TestSortAllPermutations<BE64, be64_compare> TestSort;
CHECK(TestSortAllPermutations::test(2) == 2); typedef TestSortAllPermutations<LE16, le16_compare> TestSort;
CHECK(TestSortAllPermutations::test(3) == 6); CHECK(TestSort::test(upx_stable_sort, 1) == 1);
CHECK(TestSortAllPermutations::test(4) == 24); CHECK(TestSort::test(upx_stable_sort, 2) == 2);
CHECK(TestSortAllPermutations::test(5) == 120); CHECK(TestSort::test(upx_stable_sort, 3) == 6);
// CHECK(TestSortAllPermutations::test(6) == 720); CHECK(TestSort::test(upx_stable_sort, 4) == 24);
// CHECK(TestSortAllPermutations::test(7) == 5040); CHECK(TestSort::test(upx_stable_sort, 5) == 120);
// CHECK(TestSort::test(upx_stable_sort, 6) == 720);
// CHECK(TestSort::test(upx_stable_sort, 7) == 5040);
// CHECK(TestSort::test(upx_stable_sort, 8) == 40320);
// CHECK(TestSort::test(upx_stable_sort, 9) == 362880);
// CHECK(TestSort::test(upx_stable_sort, 10) == 3628800);
} }
#endif // C++20 #endif // C++20
#endif // DEBUG #endif // DEBUG

View File

@ -127,6 +127,9 @@ void *upx_calloc(size_t n, size_t element_size);
void upx_memswap(void *a, void *b, size_t n); void upx_memswap(void *a, void *b, size_t n);
void upx_shellsort(void *array, size_t n, size_t element_size,
int (*compare)(const void *, const void *));
void upx_stable_sort(void *array, size_t n, size_t element_size, void upx_stable_sort(void *array, size_t n, size_t element_size,
int (*compare)(const void *, const void *)); int (*compare)(const void *, const void *));