Recently I wrote my own implementation of a utility for recursive output of the directories' contents, kind of similar to the ls Linux utility with the -Ro flags. As a training, I wrote it in C.
I would love to hear your feedback on possible optimizations, better error handling, flaws in implementation, or the code in general.
I usually code in C++ and it so happened that I'm not very familiar with C, so maybe some places could have been implemented better, but I need someone experienced to point this out. Thanks!
#include <sys/stat.h>
#include <dirent.h>
#include <pwd.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <errno.h>
#include <error.h>
#include <locale.h>
#include <malloc.h>
#include <stddef.h>
#define BUF_SIZE 1024 /* initial size of the buffer for storing file information */
#define UNAME_MAX 32
#define DATE_LEN 20
#define PERM_LEN 10
#define MINOR_ERROR 1
#define FATAL_ERROR 2
enum file_type {
REGULAR,
DIRECTORY,
SYMLINK,
FIFO,
SOCKET,
EXECUTABLE,
UNKNOWN
};
const char *file_type_str[] = { /* for some reason, this works faster than the array of chars.
There may be some kind of alignment issues, not sure */
[REGULAR] = "",
[DIRECTORY] = "/",
[SYMLINK] = "@",
[FIFO] = "|",
[SOCKET] = "=",
[EXECUTABLE] = "*",
[UNKNOWN] = "?"
};
struct file_info {
struct stat st;
enum file_type type;
char *name;
char *path;
char *target;
};
struct user_info {
char name[UNAME_MAX];
uid_t uid;
struct user_info *next;
};
static struct user_info *user_info_cache; /* cache the results of the expensive getpwuid() calls */
static int exit_status;
static void
init(void) {
tzset(); /* call tzset() to initialize the timezone information before call to reentrant localtime_r()
according to POSIX.1-2004 */
setlocale(LC_COLLATE, ""); /* set the locale for collation aware string comparison */
}
static int
compare_file_name(const void *a, const void *b) {
return strcoll(((const struct file_info *) a)->name, ((const struct file_info *) b)->name);
}
static void
*xmalloc(size_t size) {
void *ptr = malloc(size);
if (ptr == NULL) {
error(FATAL_ERROR, errno, "malloc");
__builtin_unreachable(); /* if status given to error() is a nonzero value, error() calls exit(3)
to terminate the program, so this line is unreachable */
}
return ptr;
}
static void
*xrealloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (new_ptr == NULL) {
error(FATAL_ERROR, errno, "realloc");
__builtin_unreachable();
}
return new_ptr;
}
static void
file_error(const char *message, const char *path) {
error(0, errno, "%s: %s", message, path);
exit_status = MINOR_ERROR;
}
static void
format_permissions(const mode_t mode, char *formatted) {
formatted[0] = (mode & S_IRUSR) ? 'r' : '-';
formatted[1] = (mode & S_IWUSR) ? 'w' : '-';
formatted[2] = (mode & S_IXUSR) ? 'x' : '-';
formatted[3] = (mode & S_IRGRP) ? 'r' : '-';
formatted[4] = (mode & S_IWGRP) ? 'w' : '-';
formatted[5] = (mode & S_IXGRP) ? 'x' : '-';
formatted[6] = (mode & S_IROTH) ? 'r' : '-';
formatted[7] = (mode & S_IWOTH) ? 'w' : '-';
formatted[8] = (mode & S_IXOTH) ? 'x' : '-';
formatted[9] = '\0';
}
static char
*get_owner(uid_t uid) {
struct user_info *user_info;
for (user_info = user_info_cache; user_info; user_info = user_info->next) {
if (user_info->uid == uid) {
return user_info->name;
}
}
user_info = xmalloc(sizeof(struct user_info));
const struct passwd *pwd = getpwuid(uid);
if (pwd == NULL) {
sprintf(user_info->name, "%u", uid); /* FIXME: known error:
if given uid actually exists in the database and it's getpwuid() that just fails to retrieve it,
we end up writing wrong username to cache */
} else {
strcpy(user_info->name, pwd->pw_name);
}
user_info->uid = uid;
user_info->next = user_info_cache;
user_info_cache = user_info;
return user_info->name;
}
static void
format_time(const time_t mtime, char *formatted) {
struct tm tm_info;
localtime_r(&mtime, &tm_info); /* use reentrant version of localtime,
as it's not required to call tzset() that causes overhead */
strftime(formatted, 20, "%Y-%m-%d %H:%M:%S", &tm_info);
}
static int
get_file_type(const mode_t st_mode) {
switch (st_mode & S_IFMT) {
case S_IFDIR:
return DIRECTORY;
case S_IFLNK:
return SYMLINK;
case S_IFIFO:
return FIFO;
case S_IFSOCK:
return SOCKET;
default:
if (st_mode & S_IXUSR) { /* first check for executable bit,
as regular files can be executable */
return EXECUTABLE;
} else if (S_ISREG(st_mode)) {
return REGULAR;
} else {
return UNKNOWN;
}
}
}
static void
print_file_info(const struct file_info *file_info, const int owner_w, const int size_w) {
char permissions[PERM_LEN];
char date[DATE_LEN];
char *owner;
format_permissions(file_info->st.st_mode, permissions);
format_time(file_info->st.st_mtime, date);
owner = get_owner(file_info->st.st_uid);
printf("%s %*s %*ld %s %s%s",
permissions, owner_w,
owner, size_w,
file_info->st.st_size,
date,
file_type_str[file_info->type],
file_info->name);
if (file_info->target != NULL) {
printf(" -> %s", file_info->target);
}
printf("\n");
}
static void
concat_path(const char *dir_name, const char *file_name, char *full_path) {
char *end = stpcpy(full_path, dir_name);
if (*(end - 1) != '/' && *file_name != '\0') {
*end++ = '/';
}
strcpy(end, file_name);
}
static char
*read_link(const char *path, size_t expected_size) {
if (expected_size == 0) { /* in some cases lstat() can report zero size for a pseudo-files that
behave like symlinks, e.g. /proc/self/exe. We can't rely on this value, so instead fallback
to _POSIX_SYMLINK_MAX as a guess to prevent multiple calls to readlink() and realloc() */
expected_size = _POSIX_SYMLINK_MAX;
}
size_t buf_size = expected_size < PATH_MAX ? expected_size + 1 : PATH_MAX;
char *buffer = xmalloc(buf_size);
size_t read;
while ((read = readlink(path, buffer, buf_size)) >= buf_size) {
if (buf_size >= PATH_MAX) {
free(buffer);
return NULL;
}
buf_size = buf_size <= PATH_MAX / 2 ? buf_size * 2 : PATH_MAX;
buffer = xrealloc(buffer, buf_size);
}
buffer[read] = '\0';
return buffer;
}
static int
read_file_info(const char *dir_path, const char *file_name, struct file_info *file_info) {
char *full_path = xmalloc(strlen(dir_path) + strlen(file_name) + 2);
concat_path(dir_path, file_name, full_path);
struct stat st;
if (lstat(full_path, &st) == -1) {
file_error("failed to get information", full_path);
free(full_path);
return MINOR_ERROR;
}
file_info->st = st;
file_info->type = get_file_type(st.st_mode);
file_info->target = NULL;
if (file_info->type == SYMLINK) {
file_info->target = read_link(full_path, st.st_size);
}
const char *real_name = (file_name[0] == '\0') ? dir_path : file_name;
file_info->name = strdup(real_name);
if (file_info->type == DIRECTORY) {
file_info->path = full_path;
} else {
free(full_path);
}
return EXIT_SUCCESS;
}
static void
free_file_info(struct file_info *file_info, const int count) {
for (int i = 0; i < count; i++) {
free(file_info[i].name);
if (file_info[i].target != NULL) {
free(file_info[i].target);
}
if (file_info[i].type == DIRECTORY) {
free(file_info[i].path);
}
}
}
static void
list_dir(const char *path) { // NOLINT(*-no-recursion): recursion is required
static int first_call = 1;
int buf_size = BUF_SIZE;
const struct dirent *entry;
DIR *dir = opendir(path);
if (dir == NULL) {
file_error("failed to open directory", path);
return;
}
int owner_w = 0;
int size_w = 0;
int entry_no = 0;
struct file_info *files = xmalloc(buf_size * sizeof(struct file_info));
while ((entry = readdir(dir))) {
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
continue;
}
struct file_info info;
if (read_file_info(path, entry->d_name, &info) != 0) {
continue;
}
files[entry_no] = info;
int owner_len = (int) strlen(get_owner(info.st.st_uid));
int size_len = snprintf(NULL, 0, "%ld", info.st.st_size);
owner_w = (owner_len > owner_w) ? owner_len : owner_w;
size_w = (size_len > size_w) ? size_len : size_w;
if (entry_no++ == buf_size - 1) {
buf_size *= 2;
struct file_info *new_files = xrealloc(files, buf_size * sizeof(struct file_info));
files = new_files;
}
}
closedir(dir);
qsort(files, entry_no, sizeof(struct file_info), compare_file_name);
if (!first_call) {
printf("\n");
}
first_call = 0;
printf("%s:\n", path);
for (int i = 0; i < entry_no; i++) {
print_file_info(&files[i], owner_w, size_w);
}
for (int i = 0; i < entry_no; i++) {
if (files[i].type == DIRECTORY) {
list_dir(files[i].path);
}
}
free_file_info(files, entry_no);
free(files);
}
static int
myrls(const char *path) {
init();
struct stat st;
if (lstat(path, &st) == -1) {
file_error("cannot access", path);
return MINOR_ERROR;
}
if (S_ISDIR(st.st_mode)) {
list_dir(path);
} else { /* if myrls is called against a file, pay the price of double call to lstat()
in favor of code simplicity - it would not be executed recursively anyway */
struct file_info info;
if ((read_file_info(path, "", &info)) != 0) {
return MINOR_ERROR;
}
print_file_info(&info, 0, 0);
free_file_info(&info, 1);
}
return exit_status;
}
int
main(int argc, char *argv[]) {
if (argc > 2) {
error(FATAL_ERROR, 0, "too many arguments");
}
const char *path = (argc == 2) ? argv[1] : "./";
return myrls(path);
}