我正在用C写一些数据结构,我想我会对合并排序和快速排序进行基准测试。下面的代码是一个更大的代码库的一部分,所以它缺少一些功能,但它是自包含的,应该编译和运行。
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
double execution_time;
clock_t start, end;
typedef struct {
double qs_time;
double ms_time;
} Tuple;
typedef struct vector {
int* vec;
int len;
int cap;
} Vector;
Vector* ds_new_vector() {
Vector* new_vec = malloc(sizeof(Vector));
new_vec->vec = malloc(1024 * sizeof(int));
new_vec->len = 0;
new_vec->cap = 1024;
return new_vec;
}
static void double_vec_cap(Vector* vec) {
int* new_ptr = realloc(vec->vec, (sizeof(int) * (u_int64_t) vec->cap * 2));
if (new_ptr == NULL) {
printf("Error: realloc failed in vector_double_vec_cap\n");
}
else {
vec->vec = new_ptr;
vec->cap *= 2;
}
return;
}
void vector_push(Vector* vec, int x) {
if (vec == NULL) {
vec = ds_new_vector();
} else if (vec->cap == vec->len) {
double_vec_cap(vec);
}
vec->vec[vec->len] = x;
vec->len++;
return;
}
void vector_print(Vector* vec) {
printf("[");
for (int i = 0; i < vec->len - 1; i++) {
printf("%d, ", vec->vec[i]);
}
printf("%d]\n", vec->vec[vec->len - 1]);
return;
}
void vector_print_partial(Vector* vec) {
if (vec->len <= 10) {
vector_print(vec);
return;
}
printf("[");
for (int i = 0; i < 5; i++) {
printf("%d, ", vec->vec[i]);
}
printf("... , ");
for (int i = vec->len - 5; i < vec->len - 1; i++) {
printf("%d, ", vec->vec[i]);
}
printf("%d]\n", vec->vec[vec->len - 1]);
return;
}
void vector_destroy(Vector* vec) {
free(vec->vec);
vec->vec = NULL;
free(vec);
vec = NULL;
return;
}
static int* merge(int* left_arr, int left_arr_len, int* right_arr, int right_arr_len) {
int* result = malloc(sizeof(int) * (u_int64_t) (left_arr_len + right_arr_len));
int i = 0; int l = 0; int r = 0;
while (l < left_arr_len && r < right_arr_len) {
if (left_arr[l] <= right_arr[r]) {
result[i] = left_arr[l];
i++; l++;
} else {
result[i] = right_arr[r];
i++; r++;
}
}
while (l < left_arr_len) {
result[i] = left_arr[l];
i++; l++;
}
while (r < right_arr_len) {
result[i] = right_arr[r];
i++; r++;
}
free(left_arr);
left_arr = NULL;
free(right_arr);
right_arr = NULL;
return result;
}
static int* ds_mergesort(int* arr, int length) {
if (length <= 1) return arr;
int mid = length / 2;
int* left_arr = malloc(sizeof(int) * (u_int64_t) mid);
int* right_arr = malloc(sizeof(int) * (u_int64_t) (length - mid));
int j = 0;
for (int i = 0; i < length; i++) {
if (i < mid) {
left_arr[i] = arr[i];
} else {
right_arr[j] = arr[i];
j++;
}
}
free(arr);
arr = NULL;
left_arr = ds_mergesort(left_arr, mid);
right_arr = ds_mergesort(right_arr, length - mid);
return merge(left_arr, mid, right_arr, (length - mid));
}
void sort_vector_mergesort(Vector* vec) {
vec->vec = ds_mergesort(vec->vec, vec->len);
return;
}
static void quicksort(int arr[], int left, int right) {
if (right < left) return;
int pivot = arr[right];
int i = left - 1;
for (int j = left; j < right; j++) {
if (arr[j] < pivot) {
i++;
int temp = arr[j];
arr[j] = arr[i];
arr[i] = temp;
}
}
i++;
int temp = arr[i];
arr[i] = arr[right];
arr[right] = temp;
quicksort(arr, left, i - 1);
quicksort(arr, i + 1, right);
}
void sort_vector_quicksort(Vector* vec) {
quicksort(vec->vec, 0, vec->len - 1);
return;
}
static double test_mergesort(Vector* vec) {
start = clock();
sort_vector_mergesort(vec);
end = clock();
execution_time = ((double)(end - start))/CLOCKS_PER_SEC;
return execution_time;
}
static double test_quicksort(Vector* vec) {
start = clock();
sort_vector_quicksort(vec);
end = clock();
execution_time = ((double)(end - start))/CLOCKS_PER_SEC;
return execution_time;
}
static void test_exponential_sort(Tuple* t, int size) {
Vector* vec1 = ds_new_vector();
Vector* vec2 = ds_new_vector();
srand((u_int32_t) time(NULL));
int num;
for (int i = 0; i < size; i++) {
num = rand() % 1000;
vector_push(vec1, num);
vector_push(vec2, num);
}
t->ms_time = test_mergesort(vec1);
t->qs_time = test_quicksort(vec2);
vector_destroy(vec1);
vector_destroy(vec2);
}
int main () {
Tuple* t = malloc(sizeof(Tuple));
printf("\nSorting Exponetially larger vectors\n\n");
for (int i = 1024; i < 10000000; i = i * 2) {
test_exponential_sort(t, i);
printf("Vector size: %d\n", i);
printf("Mergesort Time: %fs Quicksort Time: %fs\n", t->ms_time, t->qs_time);
if (t->qs_time > t->ms_time) {
printf("Mergesort was faster than Quicksort by: %lfs\n", t->qs_time - t->ms_time);
} else {
printf("Quicksort was faster than Mergesort by: %lfs\n", t->ms_time - t->qs_time);
}
printf("----------------------------------------------------\n\n");
}
free(t);
}
我认为,因为quicksort在适当的位置进行排序,它总是比mergesort执行得更快,因为后者除了排序之外还必须处理内存分配,直到“vector”大小达到大约500,000。
Sorting Exponetially larger vectors
Vector size: 1024
Mergesort Time: 0.000318s Quicksort Time: 0.000062s
Quicksort was faster than Mergesort by: 0.000256s
----------------------------------------------------
Vector size: 2048
Mergesort Time: 0.000638s Quicksort Time: 0.000127s
Quicksort was faster than Mergesort by: 0.000511s
----------------------------------------------------
Vector size: 4096
Mergesort Time: 0.001377s Quicksort Time: 0.000265s
Quicksort was faster than Mergesort by: 0.001112s
----------------------------------------------------
Vector size: 8192
Mergesort Time: 0.003064s Quicksort Time: 0.000539s
Quicksort was faster than Mergesort by: 0.002525s
----------------------------------------------------
Vector size: 16384
Mergesort Time: 0.005424s Quicksort Time: 0.001347s
Quicksort was faster than Mergesort by: 0.004077s
----------------------------------------------------
Vector size: 32768
Mergesort Time: 0.010996s Quicksort Time: 0.002865s
Quicksort was faster than Mergesort by: 0.008131s
----------------------------------------------------
Vector size: 65536
Mergesort Time: 0.022966s Quicksort Time: 0.007522s
Quicksort was faster than Mergesort by: 0.015444s
----------------------------------------------------
Vector size: 131072
Mergesort Time: 0.045921s Quicksort Time: 0.021228s
Quicksort was faster than Mergesort by: 0.024693s
----------------------------------------------------
Vector size: 262144
Mergesort Time: 0.098435s Quicksort Time: 0.067185s
Quicksort was faster than Mergesort by: 0.031250s
----------------------------------------------------
Vector size: 524288
Mergesort Time: 0.186068s Quicksort Time: 0.230357s
Mergesort was faster than Quicksort by: 0.044289s
----------------------------------------------------
Vector size: 1048576
Mergesort Time: 0.377109s Quicksort Time: 0.853521s
Mergesort was faster than Quicksort by: 0.476412s
----------------------------------------------------
Vector size: 2097152
Mergesort Time: 0.765805s Quicksort Time: 3.259530s
Mergesort was faster than Quicksort by: 2.493725s
----------------------------------------------------
Vector size: 4194304
Mergesort Time: 1.534298s Quicksort Time: 12.558161s
Mergesort was faster than Quicksort by: 11.023863s
----------------------------------------------------
Vector size: 8388608
Mergesort Time: 3.118347s Quicksort Time: 48.325201s
Mergesort was faster than Quicksort by: 45.206854s
----------------------------------------------------
Quicksort最终花费了更长的时间来对大小为8,388,608的数组进行排序,而不是mergesort。是否有什么与内存缓存有关的事情我不知道?任何关于这一点的想法或我如何实现这些功能的想法都将受到赞赏。我确实尝试使用不同的枢轴进行快速排序,随机选择索引,选择最后一个索引似乎是最有效的,大概是因为数组中的数字都是随机的。
1条答案
按热度按时间db2dz4w81#
正如@CraigEstey所指出的,选择一个糟糕的枢轴点可能会导致快速排序在最坏情况下的O(n^2)。这个新的更新的快速排序使用中间元素作为枢轴,在所有情况下都比合并排序表现得更好。
下面是一个大小为8,388,608的数组的结果:
快速排序从大约48秒缩短到不到1秒
我在这里找到了这个实现:
https://www.algolist.net/Algorithms/Sorting/Quicksort