11
\$\begingroup\$

I have created a cross-platform CLI tool to quickly retrieve file size and present it to the user in different units of data. The whole program is written in C and for Windows Win32 API is used to check if a location given is a directory or not. I have tried to be as minimal as I could be while coding this project.

/** * @file fsize.c * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html. * @author Tushar Chaurasia (Dark-CodeX) */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <stdbool.h> #if defined _WIN32 || defined _WIN64 || defined __CYGWIN__ #include <Windows.h> #else #include <sys/stat.h> #include <sys/types.h> #endif #define FSIZE_VER "1.0.0" enum FILE_SIZE_TYPE { BIT = 8, // multiply BYTES = 1, // as-it-is BASE KIB = 1024, // divide MIB = 1048576, // divide GIB = 1073741824, // divide KB = 1000, // divide MB = 1000000, // divide GB = 1000000000 // divide }; bool is_directory(const char *loc) { if (!loc) return false; #if defined _WIN32 || defined _WIN64 || defined __CYGWIN__ DWORD fileAttributes = GetFileAttributesA(loc); if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false; else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY) return true; else return false; #else struct stat buffer; if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode)) return true; #endif return false; } int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "err: no file was given\n"); return EXIT_FAILURE; } int i = 1; if (strcmp(argv[i], "--help") == 0) { puts("Usage: [option] [files]"); puts("Options:"); puts(" --help Display this help message."); puts(" --version Display the version of the app."); puts(" --size=<unit_of_byte> Converts bytes into other units."); puts(" =bit, bits, BIT, BITS Converts bytes into bits."); puts(" =byte, bytes, BYTE, BYTES Default size."); puts(" =kib, KIB Converts bytes into kibibytes."); puts(" =mib, MIB Converts bytes into mebibytes."); puts(" =gib, GIB Converts bytes into gibibytes."); puts(" =kb, KB Converts bytes into kilobytes."); puts(" =mb, MB Converts bytes into megabytes."); puts(" =gb, GB Converts bytes into gigabytes."); return EXIT_SUCCESS; } if (strcmp(argv[i], "--version") == 0) { printf("%s: %s\n", argv[0], FSIZE_VER); return EXIT_SUCCESS; } enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES if (strncmp(argv[i], "--size=", 7) == 0) { argv[i] += 7; // skip `--size=` which is total of 7 characters/bytes excluding null-terminating character if (strcmp(argv[i], "bit") == 0 || strcmp(argv[i], "bits") == 0 || strcmp(argv[i], "BIT") == 0 || strcmp(argv[i], "BITS") == 0) { type = BIT; } else if (strcmp(argv[i], "byte") == 0 || strcmp(argv[i], "bytes") == 0 || strcmp(argv[i], "BYTE") == 0 || strcmp(argv[i], "BYTES") == 0) { type = BYTES; } else if (strcmp(argv[i], "kib") == 0 || strcmp(argv[i], "KIB") == 0) { type = KIB; } else if (strcmp(argv[i], "mib") == 0 || strcmp(argv[i], "MIB") == 0) { type = MIB; } else if (strcmp(argv[i], "gib") == 0 || strcmp(argv[i], "GIB") == 0) { type = GIB; } else if (strcmp(argv[i], "kb") == 0 || strcmp(argv[i], "KB") == 0) { type = KB; } else if (strcmp(argv[i], "mb") == 0 || strcmp(argv[i], "MB") == 0) { type = MB; } else if (strcmp(argv[i], "gb") == 0 || strcmp(argv[i], "GB") == 0) { type = GB; } else { fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", argv[i]); return EXIT_FAILURE; } i++; if (i == argc) { fprintf(stderr, "err: no file was given\n"); return EXIT_FAILURE; } } for (; i < argc; i++) { if (is_directory(argv[i]) == true) { fprintf(stderr, "err: '%s': Is a directory\n", argv[i]); continue; } FILE *fptr = fopen(argv[i], "rb"); if (!fptr) { fprintf(stderr, "err: fopen(): '%s': %s\n", argv[i], strerror(errno)); continue; } fseek(fptr, 0, SEEK_END); size_t len = ftell(fptr); // size of file in bytes fseek(fptr, 0, SEEK_SET); switch (type) { case BIT: printf("%zu bits : %s\n", len * BIT, argv[i]); break; case BYTES: printf("%zu bytes : %s\n", len, argv[i]); break; case KIB: printf("%0.3f kibibytes : %s \n", (float)len / KIB, argv[i]); break; case MIB: printf("%0.3f mebibytes : %s \n", (float)len / MIB, argv[i]); break; case GIB: printf("%0.3f gibibytes : %s \n", (float)len / GIB, argv[i]); break; case KB: printf("%0.3f kilobytes : %s \n", (float)len / KB, argv[i]); break; case MB: printf("%0.3f megabytes : %s \n", (float)len / MB, argv[i]); break; case GB: printf("%0.3f gigabytes : %s \n", (float)len / GB, argv[i]); break; } fclose(fptr); } return EXIT_SUCCESS; } 

I'm open to any improvements and better practices, if I have done something here which might be non-standard practice.

\$\endgroup\$
4
  • 1
    \$\begingroup\$Too trivial to post as an answer: A bit inconsistent with BIT vs BYTES... After REAL effort allowing user both "byte" and "bytes", a tiny file will be reported with 1 bytes : filename. Most software users quickly grow-out-of fussing about plural/singular. Best not to offer too much accommodation to language rules. (Those are arcane!!)\$\endgroup\$
    – Fe2O3
    CommentedJan 31 at 3:09
  • 2
    \$\begingroup\$Darth, "--size=<unit_of_byte>" leads to an interesting option idea, allow user to enter a unsigned long long number here. Say I wanted to know how many 8k byte blocks?, I could use "--size=8192". Might be good step toward future proofing. Your thoughts?\$\endgroup\$
    – chux
    CommentedFeb 2 at 23:10
  • 1
    \$\begingroup\$@chux, Yeah, that could be a very practical option as well to the user\$\endgroup\$CommentedFeb 3 at 6:02
  • 1
    \$\begingroup\$@chux Re: 'blocksize' Still remember my 'mentor' rubbing his wounds after an 'oopsey' with the Unix util "dd" did what it was told to do. Thankfully, the damage was repairable.\$\endgroup\$
    – Fe2O3
    CommentedFeb 3 at 18:16

6 Answers 6

14
\$\begingroup\$

Multiple puts() calls

For the --help message, I get why you made a puts() call for each line, because having all that in one line separated with \n would not be readable. However, there is an even better way:

puts( "Usage: [option] [files]\n" "Options:\n" " --help Display this help message.\n" " --version Display the version of the app.\n" " --size=<unit_of_byte> Converts bytes into other units.\n" " =bit, bits, BIT, BITS Converts bytes into bits.\n" " =byte, bytes, BYTE, BYTES Default size.\n" " =kib, KIB Converts bytes into kibibytes.\n" " =mib, MIB Converts bytes into mebibytes.\n" " =gib, GIB Converts bytes into gibibytes.\n" " =kb, KB Converts bytes into kilobytes.\n" " =mb, MB Converts bytes into megabytes.\n" " =gb, GB Converts bytes into gigabytes." ); 

This way, you do not call puts() multiple times unnecessarily, but the code is still readable. This works because adjacent string constants are automatically concatenated at compile time.

Repetitive capital checks for size units

Instead of manually checking for the all caps versions, another option is to make the arguments case-insensitive by converting them to lower case before checking:

char *arg = argv[i]; // Additionally, I would store this in a variable instead of indexing argv with i multiple times for (int c = 0; arg[c]; ++c) { arg[c] = tolower((unsigned char)arg[c]); // #include <ctype.h> } if (strcmp(arg, "byte") || strcmp(arg, "bytes")) { // ... } // else ... 

Alternatively, instead of using an elseif chain, you could store a list, either using an array of struct or a parallel array, and then either linear search using a loop or binary search if you ensure that the strings are in lexicographically increasing order.

struct SizeType { char *name; enum FILE_SIZE_TYPE type; } size_types[] = { {"byte", BYTES}, {"bytes", BYTES}, // ... } 

if (...) return true; else return false;

Replace:

if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false; else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY) return true; else return false; 

With:

if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false; else return (bool)(fileAttributes & FILE_ATTRIBUTE_DIRECTORY); // Explicitly cast to bool to ensure a value of 0 or 1 

Or better yet:

return (fileAttributes != INVALID_FILE_ATTRIBUTES) && (fileAttributes & FILE_ATTRIBUTE_DIRECTORY); 

Using standard logical operators is both more concise and generally more readable than a chain of if/else and return true/false.

Also the POSIX branch can be simplified:

#else struct stat buffer; if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode)) return true; #endif return false; 

Can be:

#else struct stat buffer; return (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode)); #endif 

In this case, the final return false; will be unreachable and can be omitted.

\$\endgroup\$
7
  • 2
    \$\begingroup\$Re: OP's "Usage:" puts()... My experience is argv[0] is echoed to the user, confirming the name of the program they've just run... Maybe need printf() here... And, why not use a char * ptr so the long-ish help text isn't buried as a parameter to an output function... And (being a grumpy sort), I wouldn't try to accommodate plural/singular as a parameter. "How many bits?" "One." "Oh, just one bits?" "Yes, and don't be a jerk!"... :-)\$\endgroup\$
    – Fe2O3
    CommentedJan 31 at 3:01
  • 1
    \$\begingroup\$@Fe2O3 printf("Usage: %s [option] [files] ...", argv[0]); You mean like that?\$\endgroup\$
    – CPlus
    CommentedJan 31 at 3:08
  • 1
    \$\begingroup\$Looks good to me! :-) Cheers!\$\endgroup\$
    – Fe2O3
    CommentedJan 31 at 3:10
  • 1
    \$\begingroup\$Also, check some standard man pages... The 1st filename shouldn't be shown as optional, but provide for multiple filenames...\$\endgroup\$
    – Fe2O3
    CommentedJan 31 at 3:12
  • 1
    \$\begingroup\$ALSO rather than letting OP print error for no arguments, no arguments should, perhaps, default to spewing the usage help... Just a thought... Nice answer, btw! :-)\$\endgroup\$
    – Fe2O3
    CommentedJan 31 at 3:13
9
\$\begingroup\$
  • Invalid options are silently ignored. You should print a help message if the option is not recognized.

    Along the same line, you should print a help message if a size unit is not recognized.

    Together, it means that help deserves to be factored out into a function.

  • А filename --help is perfectly valid. However, the code can't compute its size. A standard way to deal with such problem is to have -- option, meaning end of options. Everything after it is a filename no matter what.

  • Short options are very user friendly. -v is simpler to type than --version. getopt is your friend.

  • Avoid magic constants. Consider

     argv[i] += strlen("--size="); 

    or something along this line.

  • Closing a file is done way too late. Better do it right after ftell. BTW, there is no need to fseek(fptr, 0, SEEK_SET);.

  • KIB, MIB, GIB values are unnecessarily cryptic. Better use hex constants, eg

     KIB = 0x400, MIB = 0x100000, GIB = 0x40000000, 
\$\endgroup\$
3
  • \$\begingroup\$Re argv[i] += strlen("--size="); I would prefer argv[i] += sizeof("--size=")-1; because that is guaranteed to be evaluated at compile time.\$\endgroup\$
    – CPlus
    CommentedJan 31 at 5:40
  • 2
    \$\begingroup\$@CPlus This is a possibility. But honestly I equally dislike my version and your version (expicit --size== is as bad as a magic number). Hence a suggestion for getopt.\$\endgroup\$
    – vnp
    CommentedJan 31 at 5:47
  • 2
    \$\begingroup\$@CPlus: Even with optimization disabled, GCC and Clang do optimize strlen("literal") to a compile-time constant. godbolt.org/z/Pb5vj3bcW So it's fine for real use, unless you're compiling with -ffreestanding, -fno-builtin, or -fno-builtin-strlen. MSVC needs optimization to be enabled, but we normally only care about quality of the machine code in optimized builds.\$\endgroup\$CommentedFeb 2 at 18:19
9
\$\begingroup\$

Not bad for a beginner who wants to write something useful. Keep at it!

Mystical knowledge

The pointer to the array of pointers to character strings (*argv[]) CAN be manipulated by application code.

The "runtime loader" will add a NULL pointer to the end of the array. i.e. argv[ argc ] = NULL
(Some esoteric versions of the compiler may not provide this NULL. You'll likely see a leprechaun before you'll see one of those.)

We can use this fact to simplify the code a bit:

/* int main(int argc, char **argv) */ int main(int argc, char *argv[]) // 'feels' like ptr to an array of ptrs. { // Remember progname, and start advancing through argv[] char *progname = *argv; ++argv; // avoid precedence worries if (*argv == NULL || strcmp(*argv, "--help") == 0) // Always try to be useful! { puts("Usage: [option] [files]"); puts("Options:"); ... return EXIT_SUCCESS; } if (strcmp(*argv, "--version") == 0) { printf("%s: %s\n", progname, FSIZE_VER); return EXIT_SUCCESS; } enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES if (strncmp(*argv, "--size=", 7) == 0) { char *cp = strchr(*argv, '=') + 1; // ptr to just beyond that '=' if (strcmp(cp, "bit") == 0 || strcmp(cp, "bits") == 0 || strcmp(cp, "BIT") == 0 || strcmp(cp, "BITS") == 0) // Note where '||' moved to { type = BIT; } else if (strcmp(cp, "byte") == 0 || strcmp(cp, "bytes") == 0 || strcmp(cp, "BYTE") == 0 || strcmp(cp, "BYTES") == 0) { type = BYTES; } ... else // see remark in description below { fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", cp); return EXIT_FAILURE; } argv++; // Done with this argument if (*argv == NULL) // OP! Do you want this outside enclosing if() block? { fprintf(stderr, "%s: err: no file was given\n", progname); return EXIT_FAILURE; } } for( ; *argv; ++argv) // until encountering NULL { ... FILE *fptr = fopen(*argv, "rb"); if (!fptr) { fprintf(stderr, "err: fopen(): '%s': %s\n", *argv, strerror(errno)); continue; } ... switch (type) { case BIT: printf("%zu bits : %s\n", len * BIT, *argv); break; case BYTES: printf("%zu bytes : %s\n", len, *argv); break; case KIB: printf("%0.3f kibibytes : %s \n", (float)len / KIB, *argv); break; ... } ... } ... } 

More practise and confidence with pointers (and arrays of pointers) will form a solid foundation to write more professional code. The above is merely an attempt to point the way.

Don't be too quick to scold the user and terminate the program. If I'd unknowingly entered
--size=bots instead of ...bits, consider how helpful the program could be if it showed me the help menu, rather than suggesting I ask for it... Be kind to users and they'll be kinder to you...

Final comment: type is such an overused term. Best to find another word for your variable. Perhaps mode as its meaning is the "mode of delivered information; KayBee or Kibbies. What's your pleasure?"

Also: look into stricmp() or strcasecmp() as two possibilities to use a library "compare" function that is case INsensitive...


DRY

 ...: %s \n", (float)len / KIB, argv[i]); ...: %s \n", (float)len / MIB, argv[i]); ...: %s \n", (float)len / GIB, argv[i]); ...: %s \n", (float)len / KB, argv[i]); ...: %s \n", (float)len / MB, argv[i]); ...: %s \n", (float)len / GB, argv[i]); 

That's a lot of floating

 float flen = (float)len; ...: %s \n", flen / KIB, argv[i]); ...: %s \n", flen / MIB, argv[i]); ...: %s \n", flen / GIB, argv[i]); ...: %s \n", flen / KB, argv[i]); ...: %s \n", flen / MB, argv[i]); ...: %s \n", flen / GB, argv[i]); 
\$\endgroup\$
0
    7
    \$\begingroup\$

    Compiling for Windows

    I couldn't compile your code on MSVC.

    I had to add

    #define _CRT_SECURE_NO_WARNINGS 

    in order to use the strerror() function.

    Also, I suggest you define

    #define WIN32_LEAN_AND_MEAN 

    if you don't need most of the stuff that the Windows header gives you.

    i

    I like reserving i for for loops. Here, it can be named current_arg or similar.

    Help

    [files] is not described in help.

    It's unclear whether options can be combined or not. The implementation does not allow it.

    It sounds a bit strange to me that bit is a unit of bytes.

    Output

    I have never seen a program output "kibibytes" and similar. Maybe use "kiB" instead.

    Constants

    Someone has suggested to use 0x400, 0x100000 etc. for the KIB and MIB definitions. I'd prefer

    KIB = 1<<10, MIB = 1<<20, GIB = 1<<30, 

    Consistency

    I somehow don't like the idea that some of the values need to be used with multiplication while others need division.

    enum FILE_SIZE_TYPE { BIT = 8, // multiply BYTES = 1, // as-it-is BASE KIB = 1024, // divide 

    If everything were bits, then the calculation would always be the same. To do that, you need C23, because 8*1<<30 is 2^3*2^30 = 2^33, which does not fit into an int anymore. And :unsigned long is only available since C23. Visual Studio 2022 (17.13) does not have support for that... Meh...

    But since I'll rework that anyways, it's not a big deal.

    Redundant == true

    if (is_directory(argv[i]) == true) 

    Moving Help into a method

    Moving help into its own method makes main() shorter. If you put it to the top of the file, a reader will see it and understand the purpose of the program quickly.

    I'll apply the string chaining technique proposed by @CPlus as well.

    Moving unit parsing into its own method

    Moving the unit parsing into its own method makes main() shorter. Many readers will start reading the main() method to get an overview.

    Comments instead of names

    size_t len = ftell(fptr); // size of file in bytes 

    can be

    size_t size_in_bytes = ftell(fptr); 

    Maintainability: enums and switch/case

    Enums are inherently bad for maintainability. Typically you need a switch/case or else/if cascade somewhere to handle all the cases. This means: if you add or remove an item from the list, you need to adapt all the switch/case or else/if cascades.

    When removing an item from an enum, the compiler will help you finding all occurrences. But adding an item is dangerous, because the code will fall into the default branch.

    Since this is a major redesign, simply look at the resulting code.

    Suggested code

    /** * @file fsize.c * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html. * @author Tushar Chaurasia (Dark-CodeX) */ /** * @file fsize.c * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html. * @author Tushar Chaurasia (Dark-CodeX) */ #define _CRT_SECURE_NO_WARNINGS #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <stdbool.h> #include <ctype.h> #if defined _WIN32 || defined _WIN64 || defined __CYGWIN__ #define WIN32_LEAN_AND_MEAN #include <Windows.h> #else #include <sys/stat.h> #include <sys/types.h> #endif #define FSIZE_VER "1.0.0" void show_help(void) { puts( "Usage: [option] [files]\n" "Options:\n" " --help Display this help message.\n" " --version Display the version of the app.\n" " --size=<unit_of_byte> Define output unit.\n" " =bit, bits Output in bits.\n" " =byte, bytes Output in bytes (default).\n" " =kib Output in kibibytes.\n" " =mib Output in mebibytes.\n" " =gib Output in gibibytes.\n" " =kb Output in kilobytes.\n" " =mb Output in megabytes.\n" " =gb Output in gigabytes.\n" ); } bool is_directory(const char* loc) { if (!loc) return false; #if defined _WIN32 || defined _WIN64 || defined __CYGWIN__ DWORD fileAttributes = GetFileAttributesA(loc); if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false; return fileAttributes & FILE_ATTRIBUTE_DIRECTORY; #else struct stat buffer; return stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode); #endif } typedef struct UNIT { const char* input_name; const char* output_name; const char* output_format; const unsigned long long divider; } UNIT; UNIT UNITS[] = { // Put the default unit first { "byte", "bytes", "%0.f", 8ULL }, {"bit", "bits", "%0.f", 1ULL}, {"bits", "bits", "%0.f", 1ULL}, {"bytes", "bytes", "%0.f", 8ULL}, {"kib", "kibibytes" , "%0.3f", 8ULL<<10}, {"mib", "mebibytes", "%0.3f", 8ULL<<20}, {"gib", "gibibytes", "%0.3f", 8ULL<<30}, {"kb", "kilobytes", "%0.3f", 8ULL*1000}, {"mb", "megabytes", "%0.3f", 8ULL*1000*1000}, {"gb", "gigabytes", "%0.3f", 8ULL*1000*1000*1000}, // Put the "no unit" last { NULL, NULL, NULL, 0 }, }; UNIT* parse_unit(char* unit_name, int const argc) { UNIT* NO_UNIT = &UNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1]; UNIT* result = &UNITS[0]; // Parse unit as lower case for (int c = 0; unit_name[c]; ++c) { unit_name[c] = tolower((unsigned char)unit_name[c]); } // Unit lookup by input name bool found = false; for (size_t j = 0; UNITS[j].input_name != NULL; j++) { if (strcmp(unit_name, UNITS[j].input_name) == 0) { result = &UNITS[j]; found = true; } } // Error handling if (!found) { fprintf(stderr, "err: '%s' is not a valid unit, try using '--help' flag\n", unit_name); return NO_UNIT; } if (argc == 2) { fprintf(stderr, "err: no file was given\n"); return NO_UNIT; } return result; } void print_file_size(char* file_name, UNIT unit) { if (is_directory(file_name)) { fprintf(stderr, "err: '%s': Is a directory\n", file_name); return; } FILE* fptr = fopen(file_name, "rb"); if (!fptr) { fprintf(stderr, "err: fopen(): '%s': %s\n", file_name, strerror(errno)); return; } int result = fseek(fptr, 0, SEEK_END); if (result != 0) { fprintf(stderr, "err: fseek(): '%s': %s\n", file_name, strerror(errno)); fclose(fptr); return; } size_t size_in_bytes = ftell(fptr); fclose(fptr); char format[] = "xxxxxx %s : %s\n"; sprintf_s(format, sizeof(format) - 1, "%s %%s : %%s\n", unit.output_format); printf(format, 8.f * (float)size_in_bytes / (float)unit.divider, unit.output_name, file_name); } int main(int argc, char** argv) { if (argc == 1) { fprintf(stderr, "err: no file was given\n"); return EXIT_FAILURE; } int current_arg = 1; if (strcmp(argv[current_arg], "--help") == 0) { show_help(); return EXIT_SUCCESS; } if (strcmp(argv[current_arg], "--version") == 0) { printf("%s: %s\n", argv[0], FSIZE_VER); return EXIT_SUCCESS; } UNIT* unit = &UNITS[0]; if (strncmp(argv[current_arg], "--size=", sizeof("--size=") - 1) == 0) { unit = parse_unit(argv[current_arg] + sizeof("--size=") - 1, argc); if (unit->input_name == NULL) return EXIT_FAILURE; current_arg++; } for (; current_arg < argc; current_arg++) { print_file_size(argv[current_arg], *unit); } return EXIT_SUCCESS; } 

    Result of the change

    Before: 182 lines, 1 helper method, main has 130 lines.

    After: 180 lines, 4 helper methods, main has 33 lines and the 5 steps of processing are clearly visible. No switch/case and no if/else cascade.

    \$\endgroup\$
    5
    • 1
      \$\begingroup\$Nice review. Detailed. Oops: show_help() :: puts() needs \n at the end of each string fragment... Oops 2: tolower() not quite right. Look at answer by @Chux... Braces inconsistent for that loop... Y'know, if loop counter j had larger scope, &UNITS[ j ] WOULD be that empty result to return. Less confusion... Oops 3: current_arg not used in strncmp( ..., "--size-", ....). Finally, (very, very few do this) fseek() can fail, returning an error code... Pedantry... Cheers! :-)\$\endgroup\$
      – Fe2O3
      CommentedFeb 1 at 2:45
    • \$\begingroup\$Re: "I'd prefer ... MIB = 1<<20, GIB = 1<<30," has growth weaknesses when the shift is more than the 31. If the scale was a floating point, code could use 0x1.0p10, 0x1.0p20, 0x1.0p30. Of course FP impact other things, yet I find the enum approach limiting.\$\endgroup\$
      – chux
      CommentedFeb 1 at 22:29
    • 2
      \$\begingroup\$Minor: rather than UNITS[sizeof(UNITS) / sizeof(UNIT) - 1], which obliges a check of an object and a type, consider UNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1] which only depends on the object and not its or any type.\$\endgroup\$
      – chux
      CommentedFeb 1 at 22:32
    • 1
      \$\begingroup\$Thanks for the comments.\$\endgroup\$CommentedFeb 2 at 16:17
    • \$\begingroup\$Thomas Weller, on deeper review, I really like the convert it all to bits approach. To support bits and future files sizes, perhaps go to long long size, constants.\$\endgroup\$
      – chux
      CommentedFeb 2 at 23:06
    7
    \$\begingroup\$

    Just a few left over points not yet reported:

    Use a function to get file size

    The fseek(... SEEK_END); len = ftell(fptr);trick to get the file size is weak and inadequate for large files. I am not suggesting you write a better implementation, yet realize it has short-comings.

    When code has significant short-comings, yet one lacks good alternatives, make it a function call:

    long long len = my_get_file_size(fptr); 

    Inside your my_get_file_size(), you can code the current fseek/ftell trick and allow for easier updates (e.g. only local impact).

    1024 or 1000?

    Help deserved a line of explanation to clearly indicate when the 1024 or 1000 scale is used.

    Why stop at GB?

    The metric prefixes are kMGTPEZYRQ. Even if one does not desire to support terabytes, etc. files, consider forming your code so it can gracefully grow.

    Deeper: Consider your code base's lifetime. Do you expect users of this code today as well as a later version of fsize in 10 years just might need bigger files?
    Perhaps not so much bigger: Hard drive capacity over timeHard DiskHDD capacity by year, by power of 10?

    Improve error checking

    ftell(fptr) returns a long and -1 on error. Detect that. long may or may not have a wider range than size_t. No need to inject the size_t here. size_t is for object memory sizes, not file lengths.

    // size_t len = ftell(fptr); long len = ftell(fptr); if (len == -1) { Handle_Error(); } if (len < 0) { TBD code(); } 

    Avoid overflow

    len * BIT risks overflow. Use wider math.

    //printf("%zu bits : %s\n", len * BIT, argv[i]); printf("%ju bits : %s\n", (uintmax_t) len * BIT, argv[i]); 

    Avoid imprecision

    (float)len / KIB, as a printf() argument, has weaknesses.

    The float cast can readily loses precision for large values.

    Strange that float was chosen as the quotient. (float)len / KIB is up converted to double before passing to printf(), so why not use double division right away?

    Consider double

     printf("%0.3f kibibytes : %s \n", (double)len / KIB, argv[i]); printf("%0.3f kibibytes : %s \n", len / (double)KIB, argv[i]); ... // or maybe long double printf("%0.3Lf kibibytes : %s \n", len / (long double)KIB, argv[i]); 
    \$\endgroup\$
    2
    • \$\begingroup\$You forgot Yotta.\$\endgroup\$CommentedFeb 3 at 8:09
    • \$\begingroup\$@OskarSkog Answer amended.\$\endgroup\$
      – chux
      CommentedFeb 3 at 13:38
    2
    \$\begingroup\$

    This is a non-portable assumption:

     BIT = 8, // multiply 

    We should be using CHAR_BIT to ensure our units are consistent with the target platform.


    Most of is_directory() is platform-dependent. Consider refactoring into two separate implementations - these might even move into two separate header files, especially if we add more functions to our filesystem abstraction:

    #ifdef USE_WINAPI #include <files_windows.h> #elifdef POSIX_SOURCE #include <files_unix.h> #else #error Unsupported target #endif 

    (#elifdef requires C23 or later, but the legacy equivalent should be obvious if you need it)

    The implementations become much more readable without conditional preprocessing to parse:

    /* files_windows.h */ #define WIN32_LEAN_AND_MEAN #include <Windows.h> bool is_directory(const char *loc) { if (!loc) { return false; } DWORD const fileAttributes = GetFileAttributesA(loc); return fileAttributes != INVALID_FILE_ATTRIBUTES && fileAttributes & FILE_ATTRIBUTE_DIRECTORY; } 
    /* files_unix.h */ #include <sys/stat.h> #include <sys/types.h> bool is_directory(const char *loc) { if (!loc) { return false; } /* Probably not needed as stat() will check */ struct stat buffer; return stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode); } 

    When one or more files cannot be handled, we emit a message to standard error, but still exit with a success status. I think most users would expect success only if all arguments were successfully processed.

    \$\endgroup\$

      Start asking to get answers

      Find the answer to your question by asking.

      Ask question

      Explore related questions

      See similar questions with these tags.