src: implement upx_shellsort()
This commit is contained in:
parent
bfb438dc89
commit
bb6b087862
32
Makefile
32
Makefile
@ -93,6 +93,22 @@ build/extra/clang-static/release: PHONY; $(call run_config_and_build,$@,Release)
|
|||||||
build/extra/clang-static/%: export CC = clang -static
|
build/extra/clang-static/%: export CC = clang -static
|
||||||
build/extra/clang-static/%: export CXX = clang++ -static
|
build/extra/clang-static/%: export CXX = clang++ -static
|
||||||
|
|
||||||
|
# force building with clang/clang++ C17/C++20
|
||||||
|
build/extra/clang-std-cxx20/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
|
build/extra/clang-std-cxx20/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
|
build/extra/clang-std-cxx20/%: export CC = clang -std=gnu17
|
||||||
|
build/extra/clang-std-cxx20/%: export CXX = clang++ -std=gnu++20
|
||||||
|
build/extra/clang-std-cxx20/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
|
||||||
|
build/extra/clang-std-cxx20/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
|
||||||
|
|
||||||
|
# force building with clang/clang++ C23/C++23
|
||||||
|
build/extra/clang-std-cxx23/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
|
build/extra/clang-std-cxx23/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
|
build/extra/clang-std-cxx23/%: export CC = clang -std=gnu2x
|
||||||
|
build/extra/clang-std-cxx23/%: export CXX = clang++ -std=gnu++2b
|
||||||
|
build/extra/clang-std-cxx23/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
|
||||||
|
build/extra/clang-std-cxx23/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
|
||||||
|
|
||||||
# force building with gcc/g++
|
# force building with gcc/g++
|
||||||
build/extra/gcc/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
build/extra/gcc/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
build/extra/gcc/release: PHONY; $(call run_config_and_build,$@,Release)
|
build/extra/gcc/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
@ -123,6 +139,22 @@ build/extra/gcc-static/release: PHONY; $(call run_config_and_build,$@,Release)
|
|||||||
build/extra/gcc-static/%: export CC = gcc -static
|
build/extra/gcc-static/%: export CC = gcc -static
|
||||||
build/extra/gcc-static/%: export CXX = g++ -static
|
build/extra/gcc-static/%: export CXX = g++ -static
|
||||||
|
|
||||||
|
# force building with gcc/g++ C17/C++20
|
||||||
|
build/extra/gcc-std-cxx20/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
|
build/extra/gcc-std-cxx20/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
|
build/extra/gcc-std-cxx20/%: export CC = gcc -std=gnu17
|
||||||
|
build/extra/gcc-std-cxx20/%: export CXX = g++ -std=gnu++20
|
||||||
|
build/extra/gcc-std-cxx20/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
|
||||||
|
build/extra/gcc-std-cxx20/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
|
||||||
|
|
||||||
|
# force building with gcc/g++ C23/C++23
|
||||||
|
build/extra/gcc-std-cxx23/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
|
build/extra/gcc-std-cxx23/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
|
build/extra/gcc-std-cxx23/%: export CC = gcc -std=gnu2x
|
||||||
|
build/extra/gcc-std-cxx23/%: export CXX = g++ -std=gnu++23
|
||||||
|
build/extra/gcc-std-cxx23/%: export UPX_CONFIG_DISABLE_C_STANDARD=ON
|
||||||
|
build/extra/gcc-std-cxx23/%: export UPX_CONFIG_DISABLE_CXX_STANDARD=ON
|
||||||
|
|
||||||
# cross compiler: Linux glibc aarch64-linux-gnu (arm64)
|
# cross compiler: Linux glibc aarch64-linux-gnu (arm64)
|
||||||
build/extra/cross-linux-gnu-aarch64/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
build/extra/cross-linux-gnu-aarch64/debug: PHONY; $(call run_config_and_build,$@,Debug)
|
||||||
build/extra/cross-linux-gnu-aarch64/release: PHONY; $(call run_config_and_build,$@,Release)
|
build/extra/cross-linux-gnu-aarch64/release: PHONY; $(call run_config_and_build,$@,Release)
|
||||||
|
|||||||
@ -277,6 +277,55 @@ void upx_memswap(void *a, void *b, size_t n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// somewhat better memswap(), optimized for our use cases in sort functions
|
||||||
|
static void memswap_no_overlap(char *a, char *b, size_t n) {
|
||||||
|
#if defined(__clang__) && __clang_major__ < 15 && 1
|
||||||
|
// avoid a clang ICE; sigh
|
||||||
|
upx_memswap(a, b, n);
|
||||||
|
#else // clang bug
|
||||||
|
alignas(16) char tmpbuf[16];
|
||||||
|
#define SWAP(x) \
|
||||||
|
ACC_BLOCK_BEGIN \
|
||||||
|
upx_memcpy_inline(tmpbuf, a, x); \
|
||||||
|
upx_memcpy_inline(a, b, x); \
|
||||||
|
upx_memcpy_inline(b, tmpbuf, x); \
|
||||||
|
a += x; \
|
||||||
|
b += x; \
|
||||||
|
ACC_BLOCK_END
|
||||||
|
|
||||||
|
for (; n >= 16; n -= 16)
|
||||||
|
SWAP(16);
|
||||||
|
if (n & 8)
|
||||||
|
SWAP(8);
|
||||||
|
if (n & 4)
|
||||||
|
SWAP(4);
|
||||||
|
if (n & 2)
|
||||||
|
SWAP(2);
|
||||||
|
if (n & 1)
|
||||||
|
SWAP(1);
|
||||||
|
UNUSED(a); // avoid pedantic warning about final assignment
|
||||||
|
UNUSED(b); // avoid pedantic warning about final assignment
|
||||||
|
#undef SWAP
|
||||||
|
#endif // clang bug
|
||||||
|
}
|
||||||
|
|
||||||
|
// simple Shell sort using Knuth's gap; NOT stable
|
||||||
|
void upx_shellsort(void *array, size_t n, size_t element_size,
|
||||||
|
int (*compare)(const void *, const void *)) {
|
||||||
|
mem_size_assert(element_size, n); // check size
|
||||||
|
size_t gap = 1;
|
||||||
|
while (gap * 3 <= n) // cannot overflow
|
||||||
|
gap = gap * 3 + 1;
|
||||||
|
for (; gap > 0; gap = (gap - 1) / 3) {
|
||||||
|
const size_t gap_bytes = element_size * gap;
|
||||||
|
char *g = (char *) array + gap_bytes; // g := &array[gap]
|
||||||
|
char *ii = g;
|
||||||
|
for (size_t i = gap; i < n; i += gap, ii += gap_bytes)
|
||||||
|
for (char *a = ii; a >= g && compare(a - gap_bytes, a) > 0; a -= gap_bytes)
|
||||||
|
memswap_no_overlap(a - gap_bytes, a, element_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// extremely simple (and beautiful) stable sort: Gnomesort
|
// extremely simple (and beautiful) stable sort: Gnomesort
|
||||||
// WARNING: O(n^2) and thus very inefficient for large n
|
// WARNING: O(n^2) and thus very inefficient for large n
|
||||||
void upx_stable_sort(void *array, size_t n, size_t element_size,
|
void upx_stable_sort(void *array, size_t n, size_t element_size,
|
||||||
@ -284,14 +333,14 @@ void upx_stable_sort(void *array, size_t n, size_t element_size,
|
|||||||
for (size_t i = 1; i < n; i++) {
|
for (size_t i = 1; i < n; i++) {
|
||||||
char *a = (char *) array + element_size * i; // a := &array[i]
|
char *a = (char *) array + element_size * i; // a := &array[i]
|
||||||
if (i != 0 && compare(a - element_size, a) > 0) { // if a[-1] > a[0] then
|
if (i != 0 && compare(a - element_size, a) > 0) { // if a[-1] > a[0] then
|
||||||
upx_memswap(a - element_size, a, element_size); // swap elements a[-1] <=> a[0]
|
memswap_no_overlap(a - element_size, a, element_size); // swap elements a[-1] <=> a[0]
|
||||||
i -= 2; // and decrease i
|
i -= 2; // and decrease i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG
|
#if !defined(DOCTEST_CONFIG_DISABLE) && DEBUG
|
||||||
TEST_CASE("upx_stable_sort") {
|
TEST_CASE("basic upx_stable_sort") {
|
||||||
{
|
{
|
||||||
unsigned a[] = {0, 1};
|
unsigned a[] = {0, 1};
|
||||||
upx_stable_sort(a, 2, sizeof(*a), ne32_compare);
|
upx_stable_sort(a, 2, sizeof(*a), ne32_compare);
|
||||||
@ -314,18 +363,21 @@ TEST_CASE("upx_stable_sort") {
|
|||||||
|
|
||||||
#if __cplusplus >= 202002L // use C++20 std::next_permutation() to test all permutations
|
#if __cplusplus >= 202002L // use C++20 std::next_permutation() to test all permutations
|
||||||
namespace {
|
namespace {
|
||||||
|
typedef int (*compare_func)(const void *, const void *);
|
||||||
|
typedef void (*sort_func)(void *array, size_t n, size_t element_size, compare_func compare);
|
||||||
|
template <class ElementType, compare_func CompareFunc>
|
||||||
struct TestSortAllPermutations {
|
struct TestSortAllPermutations {
|
||||||
static upx_uint64_t test(size_t n) {
|
static noinline upx_uint64_t test(sort_func sort, size_t n) {
|
||||||
constexpr size_t N = 16;
|
constexpr size_t N = 16;
|
||||||
assert(n > 0 && n <= N);
|
assert(n > 0 && n <= N);
|
||||||
LE16 perm[N];
|
ElementType perm[N];
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
perm[i] = 255 + i;
|
perm[i] = 255 + i;
|
||||||
upx_uint64_t num_perms = 0;
|
upx_uint64_t num_perms = 0;
|
||||||
do {
|
do {
|
||||||
LE16 a[N];
|
ElementType a[N];
|
||||||
memcpy(a, perm, sizeof(*a) * n);
|
memcpy(a, perm, sizeof(*a) * n);
|
||||||
upx_stable_sort(a, n, sizeof(*a), le16_compare);
|
sort(a, n, sizeof(*a), CompareFunc);
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
assert((a[i] == 255 + i));
|
assert((a[i] == 255 + i));
|
||||||
num_perms += 1;
|
num_perms += 1;
|
||||||
@ -334,14 +386,33 @@ struct TestSortAllPermutations {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
TEST_CASE("upx_shellsort") {
|
||||||
|
// typedef TestSortAllPermutations<BE64, be64_compare> TestSort;
|
||||||
|
typedef TestSortAllPermutations<LE16, le16_compare> TestSort;
|
||||||
|
CHECK(TestSort::test(upx_shellsort, 1) == 1);
|
||||||
|
CHECK(TestSort::test(upx_shellsort, 2) == 2);
|
||||||
|
CHECK(TestSort::test(upx_shellsort, 3) == 6);
|
||||||
|
CHECK(TestSort::test(upx_shellsort, 4) == 24);
|
||||||
|
CHECK(TestSort::test(upx_shellsort, 5) == 120);
|
||||||
|
// CHECK(TestSort::test(upx_shellsort, 6) == 720);
|
||||||
|
// CHECK(TestSort::test(upx_shellsort, 7) == 5040);
|
||||||
|
// CHECK(TestSort::test(upx_shellsort, 8) == 40320);
|
||||||
|
// CHECK(TestSort::test(upx_shellsort, 9) == 362880);
|
||||||
|
// CHECK(TestSort::test(upx_shellsort, 10) == 3628800);
|
||||||
|
}
|
||||||
TEST_CASE("upx_stable_sort") {
|
TEST_CASE("upx_stable_sort") {
|
||||||
CHECK(TestSortAllPermutations::test(1) == 1);
|
// typedef TestSortAllPermutations<BE64, be64_compare> TestSort;
|
||||||
CHECK(TestSortAllPermutations::test(2) == 2);
|
typedef TestSortAllPermutations<LE16, le16_compare> TestSort;
|
||||||
CHECK(TestSortAllPermutations::test(3) == 6);
|
CHECK(TestSort::test(upx_stable_sort, 1) == 1);
|
||||||
CHECK(TestSortAllPermutations::test(4) == 24);
|
CHECK(TestSort::test(upx_stable_sort, 2) == 2);
|
||||||
CHECK(TestSortAllPermutations::test(5) == 120);
|
CHECK(TestSort::test(upx_stable_sort, 3) == 6);
|
||||||
// CHECK(TestSortAllPermutations::test(6) == 720);
|
CHECK(TestSort::test(upx_stable_sort, 4) == 24);
|
||||||
// CHECK(TestSortAllPermutations::test(7) == 5040);
|
CHECK(TestSort::test(upx_stable_sort, 5) == 120);
|
||||||
|
// CHECK(TestSort::test(upx_stable_sort, 6) == 720);
|
||||||
|
// CHECK(TestSort::test(upx_stable_sort, 7) == 5040);
|
||||||
|
// CHECK(TestSort::test(upx_stable_sort, 8) == 40320);
|
||||||
|
// CHECK(TestSort::test(upx_stable_sort, 9) == 362880);
|
||||||
|
// CHECK(TestSort::test(upx_stable_sort, 10) == 3628800);
|
||||||
}
|
}
|
||||||
#endif // C++20
|
#endif // C++20
|
||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
|
|||||||
@ -127,6 +127,9 @@ void *upx_calloc(size_t n, size_t element_size);
|
|||||||
|
|
||||||
void upx_memswap(void *a, void *b, size_t n);
|
void upx_memswap(void *a, void *b, size_t n);
|
||||||
|
|
||||||
|
void upx_shellsort(void *array, size_t n, size_t element_size,
|
||||||
|
int (*compare)(const void *, const void *));
|
||||||
|
|
||||||
void upx_stable_sort(void *array, size_t n, size_t element_size,
|
void upx_stable_sort(void *array, size_t n, size_t element_size,
|
||||||
int (*compare)(const void *, const void *));
|
int (*compare)(const void *, const void *));
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user