I'm trying to measure the write bandwidth of my memory, I created an 8G char array, and call memset on it with 128 threads. Below is the code snippet.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <pthread.h>
int64_t char_num = 8000000000;
int threads = 128;
int res_num = 62500000;
uint8_t* arr;
static inline double timespec_to_sec(struct timespec t)
{
return t.tv_sec * 1.0 + t.tv_nsec / 1000000000.0;
}
void* multithread_memset(void* val) {
int thread_id = *(int*)val;
memset(arr + (res_num * thread_id), 1, res_num);
return NULL;
}
void start_parallel()
{
int* thread_id = malloc(sizeof(int) * threads);
for (int i = 0; i < threads; i++) {
thread_id[i] = i;
}
pthread_t* thread_array = malloc(sizeof(pthread_t) * threads);
for (int i = 0; i < threads; i++) {
pthread_create(&thread_array[i], NULL, multithread_memset, &thread_id[i]);
}
for (int i = 0; i < threads; i++) {
pthread_join(thread_array[i], NULL);
}
}
int main(int argc, char *argv[])
{
struct timespec before;
struct timespec after;
float time = 0;
arr = malloc(char_num);
clock_gettime(CLOCK_MONOTONIC, &before);
start_parallel();
clock_gettime(CLOCK_MONOTONIC, &after);
double before_time = timespec_to_sec(before);
double after_time = timespec_to_sec(after);
time = after_time - before_time;
printf("sequential = %10.8f\n", time);
return 0;
}
According to the output, it took 0.6 second to finish all memset, to my understanding, this implies a 8G/0.6 = 13G memory write bandwith. However, I have a 2667 MHz DDR4 which should have a 21.3 GB/s bandwith. Is there anything wrong with my code or my calculation? Thanks for any help!!
memsetwon't do only write cycles. The first write instruction in each cache line will necessarily read that line into cache, because the CPU doesn't know you will later overwrite all of it.