summaryrefslogtreecommitdiff
path: root/src/printf.c
blob: 2a73bb7fed892347eafb40f497ce5080f511fc9b (plain)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
/* printf - format and print data Copyright (C) 1990-2025 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. */#include <config.h>#include <stdio.h>#include <sys/types.h>#include <wchar.h>#include"system.h"#include"c-ctype.h"#include"cl-strtod.h"#include"octhexdigits.h"#include"quote.h"#include"unicodeio.h"#include"xprintf.h"/* The official name of this program (e.g., no 'g' prefix). */#define PROGRAM_NAME"printf"#define AUTHORS proper_name ("David MacKenzie")/* The value to return to the calling program. */static int exit_status;/* True if the POSIXLY_CORRECT environment variable is set. */static bool posixly_correct;/* This message appears in N_() here rather than just in _() below because the sole use would have been in a #define. */static char const*const cfcc_msg =N_("warning: %s: character(s) following character constant have been ignored");voidusage(int status){if(status != EXIT_SUCCESS)emit_try_help();else{printf(_("\Usage: %s FORMAT [ARGUMENT]...\n\ or: %s OPTION\n\"), program_name, program_name);fputs(_("\Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\\n\"), stdout);fputs(HELP_OPTION_DESCRIPTION, stdout);fputs(VERSION_OPTION_DESCRIPTION, stdout);fputs(_("\\n\FORMAT controls the output as in C printf. Interpreted sequences are:\n\\n\\\\"double quote\n\"), stdout);fputs(_("\\\\\backslash\n\\\a alert (BEL)\n\\\b backspace\n\\\c produce no further output\n\\\e escape\n\\\f form feed\n\\\n new line\n\\\r carriage return\n\\\t horizontal tab\n\\\v vertical tab\n\"), stdout);fputs(_("\\\NNN byte with octal value NNN (1 to 3 digits)\n\\\xHH byte with hexadecimal value HH (1 to 2 digits)\n\\\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\\\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\"), stdout);fputs(_("\ %% a single %\n\ %b ARGUMENT as a string with '\\' escapes interpreted,\n\ except that octal escapes should have a leading 0 like\\0NNN\n\ %q ARGUMENT is printed in a format that can be reused as shell input,\n\ escaping non-printable characters with the POSIX $'' syntax\\n\n\and all C format specifications ending with one of diouxXfeEgGcs, with\n\ARGUMENTs converted to proper type first. Variable widths are handled.\n\"), stdout);printf(USAGE_BUILTIN_WARNING, PROGRAM_NAME);emit_ancillary_info(PROGRAM_NAME);}exit(status);}static voidverify_numeric(char const*s,char const*end){if(s == end){error(0,0,_("%s: expected a numeric value"),quote(s)); exit_status = EXIT_FAILURE;}else if(errno){error(0, errno,"%s",quote(s)); exit_status = EXIT_FAILURE;}else if(*end){error(0,0,_("%s: value not completely converted"),quote(s)); exit_status = EXIT_FAILURE;}}#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \static TYPE \FUNC_NAME (char const *s) \{ \ char *end; \ TYPE val; \ \ if ((*s =='\"' || *s =='\'') && *(s + 1)) \ { \ unsigned char ch = *++s; \ val = ch; \ \ if (MB_CUR_MAX > 1 && *(s + 1)) \ { \ mbstate_t mbstate; mbszero (&mbstate); \ wchar_t wc; \ size_t slen = strlen (s); \ ssize_t bytes; \/* Use mbrtowc not mbrtoc32, as per POSIX. */ \ bytes = mbrtowc (&wc, s, slen, &mbstate); \ if (0 < bytes) \ { \ val = wc; \ s += bytes - 1; \ } \ } \ \/* If POSIXLY_CORRECT is not set, then give a warning that there \ are characters following the character constant and that GNU \ printf is ignoring those characters. If POSIXLY_CORRECT *is* \ set, then don't give the warning. */ \ if (*++s != 0 && !posixly_correct) \ error (0, 0, _(cfcc_msg), s); \ } \ else \ { \ errno = 0; \ val = (LIB_FUNC_EXPR); \ verify_numeric (s, end); \ } \ return val; \} \STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))STRTOX(uintmax_t, vstrtoumax,strtoumax(s, &end,0))STRTOX(long double, vstrtold,cl_strtold(s, &end))/* Output a single-character \ escape. */static voidprint_esc_char(char c){switch(c){case'a':/* Alert. */putchar('\a');break;case'b':/* Backspace. */putchar('\b');break;case'c':/* Cancel the rest of the output. */exit(EXIT_SUCCESS);break;case'e':/* Escape. */putchar('\x1B');break;case'f':/* Form feed. */putchar('\f');break;case'n':/* New line. */putchar('\n');break;case'r':/* Carriage return. */putchar('\r');break;case't':/* Horizontal tab. */putchar('\t');break;case'v':/* Vertical tab. */putchar('\v');break;default:putchar(c);break;}}/* Print a \ escape sequence starting at ESCSTART. Return the number of characters in the escape sequence besides the backslash. If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o is an octal digit; otherwise they are of the form \ooo. */static intprint_esc(char const*escstart,bool octal_0){char const*p = escstart +1;int esc_value =0;/* Value of \nnn escape. */int esc_length;/* Length of \nnn escape. */if(*p =='x'){/* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */for(esc_length =0, ++p; esc_length <2&&c_isxdigit(*p);++esc_length, ++p) esc_value = esc_value *16+fromhex(*p);if(esc_length ==0)error(EXIT_FAILURE,0,_("missing hexadecimal number in escape"));putchar(esc_value);}else if(isoct(*p)){/* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise). Allow \ooo if octal_0 && *p != '0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b. */for(esc_length =0, p += octal_0 && *p =='0'; esc_length <3&&isoct(*p);++esc_length, ++p) esc_value = esc_value *8+fromoct(*p);putchar(esc_value);}else if(*p &&strchr("\"\\abcefnrtv", *p))print_esc_char(*p++);else if(*p =='u'|| *p =='U'){char esc_char = *p;unsigned int uni_value; uni_value =0;for(esc_length = (esc_char =='u'?4:8), ++p; esc_length >0;--esc_length, ++p){if(!c_isxdigit(*p))error(EXIT_FAILURE,0,_("missing hexadecimal number in escape")); uni_value = uni_value *16+fromhex(*p);}/* Error for invalid code points 0000D800 through 0000DFFF inclusive. Note print_unicode_char() would print the literal \u.. in this case. */if(uni_value >=0xd800&& uni_value <=0xdfff)error(EXIT_FAILURE,0,_("invalid universal character name\\%c%0*x"), esc_char, (esc_char =='u'?4:8), uni_value);print_unicode_char(stdout, uni_value,0);}else{putchar('\\');if(*p){putchar(*p); p++;}}return p - escstart -1;}/* Print string STR, evaluating \ escapes. */static voidprint_esc_string(char const*str){for(; *str; str++)if(*str =='\\') str +=print_esc(str,true);elseputchar(*str);}/* Evaluate a printf conversion specification. START is the start of the directive, and CONVERSION specifies the type of conversion. FIELD_WIDTH and PRECISION are the field width and precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively. ARGUMENT is the argument to be formatted. */static voidprint_direc(char const*start,char conversion,bool have_field_width,int field_width,bool have_precision,int precision,char const*argument){char*p;/* Null-terminated copy of % directive. *//* Create a null-terminated copy of the % directive, with an intmax_t-wide length modifier substituted for any existing integer length modifier. */{char*q;char const*length_modifier;size_t length_modifier_len;switch(conversion){case'd':case'i':case'o':case'u':case'x':case'X': length_modifier ="j"; length_modifier_len =1;break;case'a':case'e':case'f':case'g':case'A':case'E':case'F':case'G': length_modifier ="L"; length_modifier_len =1;break;default: length_modifier = start;/* Any valid pointer will do. */ length_modifier_len =0;break;}size_t length =strlen(start); p =xmalloc(length + length_modifier_len +2); q =mempcpy(p, start, length); q =mempcpy(q, length_modifier, length_modifier_len);*q++ = conversion;*q ='\0';}switch(conversion){case'd':case'i':{intmax_t arg = argument ?vstrtoimax(argument) :0;if(!have_field_width){if(!have_precision)xprintf(p, arg);elsexprintf(p, precision, arg);}else{if(!have_precision)xprintf(p, field_width, arg);elsexprintf(p, field_width, precision, arg);}}break;case'o':case'u':case'x':case'X':{uintmax_t arg = argument ?vstrtoumax(argument) :0;if(!have_field_width){if(!have_precision)xprintf(p, arg);elsexprintf(p, precision, arg);}else{if(!have_precision)xprintf(p, field_width, arg);elsexprintf(p, field_width, precision, arg);}}break;case'a':case'A':case'e':case'E':case'f':case'F':case'g':case'G':{long double arg = argument ?vstrtold(argument) :0;if(!have_field_width){if(!have_precision)xprintf(p, arg);elsexprintf(p, precision, arg);}else{if(!have_precision)xprintf(p, field_width, arg);elsexprintf(p, field_width, precision, arg);}}break;case'c':{char c = argument ? *argument :'\0';if(!have_field_width)xprintf(p, c);elsexprintf(p, field_width, c);}break;case's':if(!argument) argument ="";if(!have_field_width){if(!have_precision)xprintf(p, argument);elsexprintf(p, precision, argument);}else{if(!have_precision)xprintf(p, field_width, argument);elsexprintf(p, field_width, precision, argument);}break;}free(p);}/* Set curr_arg from indexed %i$ or otherwise next in sequence. POS can be 0,1,2,3 corresponding to [%][width][.precision][conversion] respectively. */struct arg_cursor {char const*f;/* Pointer into 'format'. */int curr_arg;/* Current offset. */int curr_s_arg;/* Current sequential offset. */int end_arg;/* End arg processed. */int direc_arg;/* Arg for main directive. */}; ATTRIBUTE_PURE static struct arg_cursor get_curr_arg(int pos,struct arg_cursor ac){/* Convert sequences like "123$" by hand to avoid problems with strtol, which might treat "$" as part of the number in some locales. */int arg =0;char const*f = ac.f;if(pos <3&&c_isdigit(*f)){bool v =false;int a = *f++ -'0';for(;c_isdigit(*f); f++){ v |=ckd_mul(&a, a,10); v |=ckd_add(&a, a, *f -'0');}if(*f =='$') arg = v ? INT_MAX : a;}if(0< arg){/* Process indexed %i$ format. */ arg--; ac.f = f +1;if(pos ==0) ac.direc_arg = arg;}else{/* Process sequential arg. */ arg = (pos ==0? (ac.direc_arg = -1): pos <3|| ac.direc_arg <0? ++ac.curr_s_arg : ac.direc_arg);}if(0<= arg){ ac.curr_arg = arg; ac.end_arg =MAX(ac.end_arg, arg);}return ac;}/* Print the text in FORMAT, using ARGV (with ARGC elements) for arguments to any '%' directives. Return the number of elements of ARGV used. */static intprint_formatted(char const*format,int argc,char**argv){struct arg_cursor ac; ac.curr_arg = ac.curr_s_arg = ac.end_arg = ac.direc_arg = -1;char const*direc_start;/* Start of % directive. */char*direc;/* Generated % directive. */char*pdirec;/* Pointer to current end of directive. */bool have_field_width;/* True if FIELD_WIDTH is valid. */int field_width =0;/* Arg to first '*'. */bool have_precision;/* True if PRECISION is valid. */int precision =0;/* Arg to second '*'. */char ok[UCHAR_MAX +1];/* ok['x'] is true if %x is allowed. */ direc =xmalloc(strlen(format) +1);for(ac.f = format; *ac.f; ac.f++){switch(*ac.f){case'%': direc_start = ac.f; pdirec = direc;*pdirec++ = *ac.f++; have_field_width = have_precision =false;if(*ac.f =='%'){putchar('%');break;} ac =get_curr_arg(0, ac);if(*ac.f =='b'){/* FIXME: Field width and precision are not supported for %b, even though POSIX requires it. */ ac =get_curr_arg(3, ac);if(ac.curr_arg < argc)print_esc_string(argv[ac.curr_arg]);break;}if(*ac.f =='q'){ ac =get_curr_arg(3, ac);if(ac.curr_arg < argc){fputs(quotearg_style(shell_escape_quoting_style, argv[ac.curr_arg]), stdout);}break;}memset(ok,0,sizeof ok); ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] = ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = ok['s'] = ok['u'] = ok['x'] = ok['X'] =1;for(;; ac.f++){switch(*ac.f){#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__case'I':#endifcase'\'': ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = ok['o'] = ok['s'] = ok['x'] = ok['X'] =0;break;case'-':case'+':case' ':break;case'#': ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] =0;break;case'0': ok['c'] = ok['s'] =0;break;default:goto no_more_flag_characters;}*pdirec++ = *ac.f;} no_more_flag_characters:if(*ac.f =='*'){*pdirec++ = *ac.f++; ac =get_curr_arg(1, ac);if(ac.curr_arg < argc){intmax_t width =vstrtoimax(argv[ac.curr_arg]);if(INT_MIN <= width && width <= INT_MAX) field_width = width;elseerror(EXIT_FAILURE,0,_("invalid field width: %s"),quote(argv[ac.curr_arg]));}else field_width =0; have_field_width =true;}elsewhile(c_isdigit(*ac.f))*pdirec++ = *ac.f++;if(*ac.f =='.'){*pdirec++ = *ac.f++; ok['c'] =0;if(*ac.f =='*'){*pdirec++ = *ac.f++; ac =get_curr_arg(2, ac);if(ac.curr_arg < argc){intmax_t prec =vstrtoimax(argv[ac.curr_arg]);if(prec <0){/* A negative precision is taken as if the precision were omitted, so -1 is safe here even if prec < INT_MIN. */ precision = -1;}else if(INT_MAX < prec)error(EXIT_FAILURE,0,_("invalid precision: %s"),quote(argv[ac.curr_arg]));else precision = prec;}else precision =0; have_precision =true;}elsewhile(c_isdigit(*ac.f))*pdirec++ = *ac.f++;}*pdirec++ ='\0';while(*ac.f =='l'|| *ac.f =='L'|| *ac.f =='h'|| *ac.f =='j'|| *ac.f =='t'|| *ac.f =='z')++ac.f;{unsigned char conversion = *ac.f;int speclen =MIN(ac.f +1- direc_start, INT_MAX);if(! ok[conversion])error(EXIT_FAILURE,0,_("%.*s: invalid conversion specification"), speclen, direc_start);} ac =get_curr_arg(3, ac);print_direc(direc, *ac.f, have_field_width, field_width, have_precision, precision, ac.curr_arg < argc ? argv[ac.curr_arg] :nullptr);break;case'\\': ac.f +=print_esc(ac.f,false);break;default:putchar(*ac.f);}}free(direc);returnMIN(argc, ac.end_arg +1);}intmain(int argc,char**argv){char*format;int args_used;initialize_main(&argc, &argv);set_program_name(argv[0]);setlocale(LC_ALL,"");bindtextdomain(PACKAGE, LOCALEDIR);textdomain(PACKAGE);atexit(close_stdout); exit_status = EXIT_SUCCESS; posixly_correct = (getenv("POSIXLY_CORRECT") !=nullptr);/* We directly parse options, rather than use parse_long_options, in order to avoid accepting abbreviations. */if(argc ==2){if(STREQ(argv[1],"--help"))usage(EXIT_SUCCESS);if(STREQ(argv[1],"--version")){version_etc(stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,(char*)nullptr);return EXIT_SUCCESS;}}/* The above handles --help and --version. Since there is no other invocation of getopt, handle '--' here. */if(1< argc &&STREQ(argv[1],"--")){--argc;++argv;}if(argc <=1){error(0,0,_("missing operand"));usage(EXIT_FAILURE);} format = argv[1]; argc -=2; argv +=2;do{ args_used =print_formatted(format, argc, argv); argc -= args_used; argv += args_used;}while(args_used >0&& argc >0);if(argc >0)error(0,0,_("warning: ignoring excess arguments, starting with %s"),quote(argv[0]));return exit_status;}
close