The code is fragile in many points.
read_file() will fail if the file size is >= BUFFER_SIZE.
- there is no warranty that there is a
\0 to end the sequence of bytes read, unless the exact size of the file is BUF_SIZE-1, so
char buffer[BUFFER_SIZE] = {"\0"};
if (read_file(buffer, EXAMPLE_FILE) == 1)
{
printf("Got Error To Open %s File!\n", file_name);
return errno;
}
printf("%s\n", buffer);
and the printf() call will fail.
Also
read_file() is called with EXAMPLE_FILE insead of file_name.
getopt() is a bit too much when the single purpose of the program is to load and print a string built with the contents of a file: just use the file name. And Windows does not have getopt
3 examples
Below are 3 simple examples of common ways to do this in C.
All 3 compiles under Microsoft compiler and gcc, and run ok.
The file used in the examples is 28.txt and has 28 bytes in size
01234
FGHIJ
56789
ABCDE
In many uses we need to change encoding of \n. I am now in Windows and the default content is
SO >
SO > od -x 28.txt
0000000 3130 3332 0d34 460a 4847 4a49 0a0d 3635
0000020 3837 0d39 410a 4342 4544 0a0d
0000034
SO >
and we seee that \n uses 2 bytes as 0x0d0a
example 1: read the file at once
#ifdef __linux__
#define _GNU_SOURCE
#include <unistd.h>
#else
#pragma warning(disable : 4013)
#endif
#include <errno.h>
#include <fcntl.h>
#include <iso646.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
long long int get_file_size(const char*);
int main(int argc, char** argv)
{
const char* dflt_f_name = "28.txt";
char file_name[256];
if (argc < 2)
strcpy(file_name, dflt_f_name);
else
strcpy(file_name, argv[1]);
int in = open(file_name, O_RDONLY);
if (in < 0) return -1; // could not open
struct stat f_stats;
if (fstat(in, &f_stats) < 0) return -2; // fstat error
// is it a regular file?
if (not(f_stats.st_mode & S_IFREG)) return -3;
if (f_stats.st_size < 1) return -4;
fprintf(
stderr, "size of file \"%s\" is %d\n", file_name,
f_stats.st_size);
off_t size = 1 + f_stats.st_size;
unsigned char* buffer = malloc(size);
if (buffer == NULL) return -5;
int n = read(in, buffer, size);
close(in);
*(n + buffer) = 0; // terminate string
fprintf(stderr,
"%d bytes read into buffer\nstrlen(buffer) is "
"%llu\n\n",
n, strlen(buffer));
// show on screen
printf("Bytes:\t");
for (size_t i = 0; i < n; ++i)
{
if (isprint(*(buffer + i)))
printf("%c ", *(buffer + i));
else
printf("0x%X ", *(buffer + i));
}
printf("\n"); free(buffer);
fprintf(stderr,"\n");
return 0;
}
The program just
- calls
stat to get the file size
- allocates a block of memory for the file
- uses a single read to upload the file
- terminates the string
- shows the data
SO > ex1 28.txt
size of file "28.txt" is 28
24 bytes read into buffer
strlen(buffer) is 24
Bytes: 0 1 2 3 4 0xA F G H I J 0xA 5 6 7 8 9 0xA A B C D E 0xA
SO >
example 2: uses a buffer extended as need
This one uses an initial buffer that is expanded as needed. The idea is to allocate a buffer large enough to hold most of the expected files in a single read, but then extend it as needed for files larger than the initial block.
The buffer used here has just 4 bytes, it is just for test... Use something larger, like 1MB.
#define BUF_SIZE 4
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv)
{
const char* dflt_f_name = "28.txt";
char file_name[256];
if (argc < 2)
strcpy(file_name, dflt_f_name);
else
strcpy(file_name, argv[1]);
FILE* in = fopen(file_name, "rb");
if (in == NULL) return -1;
size_t b_size = 1 + BUF_SIZE; // buffer
size_t f_size = 0; // file
unsigned char* buffer = malloc(b_size);
if (buffer == NULL) return -2;
size_t n = fread(buffer, 1, BUF_SIZE, in);
if (n < 1)
{
fclose(in), free(buffer);
return -3;
}
f_size += n;
if (n < BUF_SIZE)
{
buffer[n] = 0; // terminate string
fprintf(
stderr,
"\n\n\
buffer size is: %llu\n\
file size is: %llu\n\
string size in memory is: %llu\n\n",
b_size, f_size, strlen(buffer));
fclose(in), free(buffer);
return 0;
}
while (n > 0)
{
unsigned char* buf =
realloc(buffer, b_size + BUF_SIZE);
if (buf == NULL)
{
fclose(in), free(buffer);
return -4;
}
buffer = buf;
b_size += BUF_SIZE;
n = fread(buffer + f_size, 1, BUF_SIZE, in);
f_size += n;
if (n == BUF_SIZE) continue;
*(buffer + f_size) = 0;
break;
}
fprintf(
stderr,
"\n\n\
buffer size is: %llu\n\
file size is: %llu\n\
string size in memory os: %llu\n\n",
b_size, f_size, strlen(buffer));
printf("Bytes:\t");
fclose(in);
for (size_t i = 0; i < f_size; ++i)
{
if (isprint(*(buffer + i)))
printf("%c ", *(buffer + i));
else
printf("0x%X ", *(buffer + i));
}
printf("\n");
free(buffer);
return 0;
}
The important part is inside the loop, the logic around realloc and the changes in buffer size.
SO > ex2 28.txt
buffer size is: 33
file size is: 28
string size in memory os: 28
Bytes: 0 1 2 3 4 0xD 0xA F G H I J 0xD 0xA 5 6 7 8 9 0xD 0xA A B C D E 0xD 0xA
SO >
Example 3: uploading the file as an array of pointers to individual lines
In general this is the more useful way, since just after loading we can have direct access to any line of the original file. As an example, here the input file is sorted and then printed again.
These is the struct used to hold the file:
typedef struct
{
size_t incr; // increment size
size_t limit; // actual allocated size
size_t size; // size in use
char** line; // the lines
} Block;
Block* create_blk(size_t size, size_t increment);
Block* delete_blk(Block* block_to_go);
int resize_blk(Block* block_to_go);
int show_blk(Block* block, const char* msg);
Block* load_file(const char*);
int cmp_line(const void*, const void*);
void usage();
and the methods used to load a file, show its contents et. al.
The main program turns to simply:
- get the file name and call
load_file().
- show the lines
- sort them
- show the lines again, sorted
- free all
int main(int argc, char** argv)
{
char msg[80] = {0};
if (argc < 2) usage();
Block* test = load_file(argv[1]);
if (test == NULL) return -1;
sprintf(
msg, "\n\n==> Loading \"%s\" into memory", argv[1]);
show_blk(test, msg);
qsort(test->line, test->size, sizeof(void*), cmp_line);
sprintf(
msg, "\n\n==> \"%s\" after sort in-memory",
argv[1]);
show_blk(test, msg);
test = delete_blk(test);
return 0;
};
the complete code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct
{
size_t incr; // increment size
size_t limit; // actual allocated size
size_t size; // size in use
char** line; // the lines
} Block;
Block* create_blk(size_t size, size_t increment);
Block* delete_blk(Block* block_to_go);
int resize_blk(Block* block_to_go);
int show_blk(Block* block, const char* msg);
Block* load_file(const char*);
int cmp_line(const void*, const void*);
void usage();
int main(int argc, char** argv)
{
char msg[80] = {0};
if (argc < 2) usage();
Block* test = load_file(argv[1]);
if (test == NULL) return -1;
sprintf(
msg, "\n\n==> Loading \"%s\" into memory", argv[1]);
show_blk(test, msg);
qsort(test->line, test->size, sizeof(void*), cmp_line);
sprintf(
msg, "\n\n==> \"%s\" after sort in-memory",
argv[1]);
show_blk(test, msg);
test = delete_blk(test);
return 0;
};
int cmp_line(const void* one, const void* other)
{
return strcmp(
*((const char**)one), *((const char**)other));
}
Block* create_blk(size_t size, size_t increment)
{
Block* nb = (Block*)malloc(sizeof(Block));
if (nb == NULL) return NULL;
nb->incr = increment;
nb->limit = size;
nb->size = 0;
nb->line = (char**)malloc(sizeof(char*) * size);
return nb;
}
Block* delete_blk(Block* blk)
{
if (blk == NULL) return NULL;
for (size_t i = 0; i < blk->size; i += 1)
free(blk->line[i]); // free lines
free(blk->line); // free block
free(blk); // free struct
return NULL;
}
int resize_blk(Block* nb)
{
const size_t new_sz = nb->limit + nb->incr;
char* new_block =
realloc(nb->line, (new_sz * sizeof(char*)));
if (new_block == NULL)
{
fprintf(
stderr,
"\tCould not extend block to %zd "
"lines\n",
new_sz);
return -1;
}
nb->limit = new_sz;
nb->line = (char**)new_block;
return 0;
} // resize_blk()
int show_blk(Block* bl, const char* msg)
{
if (msg != NULL) printf("%s\n", msg);
if (bl == NULL)
{
printf("Status: not allocated\n");
return -1;
}
printf(
"Status: %zd of %zd lines. [Incr. is %zd]:\n",
bl->size, bl->limit, bl->incr);
for (unsigned i = 0; i < bl->size; i += 1)
printf("%4d\t%s", 1 + i, bl->line[i]);
return 0;
}
Block* load_file(const char* f_name)
{
if (f_name == NULL) return NULL;
fprintf(stderr, "loading \"%s\" into memory\n", f_name);
FILE* F = fopen(f_name, "r");
if (F == NULL) return NULL;
// file is open
Block* nb = create_blk(4, 16); // block size is 8
char line[200];
char* p = &line[0];
p = fgets(p, sizeof(line), F);
while (p != NULL)
{
// is block full?
if (nb->size >= nb->limit)
{
resize_blk(nb);
printf(
"Block extended for a total of %zd "
"pointers\n",
nb->limit);
}
// now copy the line
nb->line[nb->size] = (char*)malloc(1 + strlen(p));
strcpy(nb->line[nb->size], p);
nb->size += 1;
// read next line
p = fgets(p, sizeof(line), F);
}; // while()
fclose(F);
return nb;
}
void usage()
{
fprintf(stderr, "Use: program file_to_load\n");
exit(EXIT_FAILURE);
}
sample output
SO > ex3
Use: program file_to_load
SO > ex3 28.txt
loading "28.txt" into memory
==> Loading "28.txt" into memory
Status: 4 of 4 lines. [Incr. is 16]:
1 01234
2 FGHIJ
3 56789
4 ABCDE
==> "28.txt" after sort in-memory
Status: 4 of 4 lines. [Incr. is 16]:
1 01234
2 56789
3 ABCDE
4 FGHIJ
SO >
snprintf(file_name, ...)but no memory allocated tofile_name. The compiler should warn about that, don't ignore.%s).fread(buffer, sizeof(char), BUFFER_SIZE, file)to know how much was read.