/* printf - format and print data
Copyright (C) 1990-2025 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
#include
#include
#include
#include
#include "system.h"
#include "c-ctype.h"
#include "cl-strtod.h"
#include "octhexdigits.h"
#include "quote.h"
#include "unicodeio.h"
#include "xprintf.h"
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "printf"
#define AUTHORS proper_name ("David MacKenzie")
/* The value to return to the calling program. */
static int exit_status;
/* True if the POSIXLY_CORRECT environment variable is set. */
static bool posixly_correct;
/* This message appears in N_() here rather than just in _() below because
the sole use would have been in a #define. */
static char const *const cfcc_msg =
N_("warning: %s: character(s) following character constant have been ignored");
void
usage (int status)
{
if (status != EXIT_SUCCESS)
emit_try_help ();
else
{
printf (_("\
Usage: %s FORMAT [ARGUMENT]...\n\
or: %s OPTION\n\
"),
program_name, program_name);
fputs (_("\
Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
\n\
FORMAT controls the output as in C printf. Interpreted sequences are:\n\
\n\
\\\" double quote\n\
"), stdout);
fputs (_("\
\\\\ backslash\n\
\\a alert (BEL)\n\
\\b backspace\n\
\\c produce no further output\n\
\\e escape\n\
\\f form feed\n\
\\n new line\n\
\\r carriage return\n\
\\t horizontal tab\n\
\\v vertical tab\n\
"), stdout);
fputs (_("\
\\NNN byte with octal value NNN (1 to 3 digits)\n\
\\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
\\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
\\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
"), stdout);
fputs (_("\
%% a single %\n\
%b ARGUMENT as a string with '\\' escapes interpreted,\n\
except that octal escapes should have a leading 0 like \\0NNN\n\
%q ARGUMENT is printed in a format that can be reused as shell input,\n\
escaping non-printable characters with the POSIX $'' syntax\
\n\n\
and all C format specifications ending with one of diouxXfeEgGcs, with\n\
ARGUMENTs converted to proper type first. Variable widths are handled.\n\
"), stdout);
printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
static void
verify_numeric (char const *s, char const *end)
{
if (s == end)
{
error (0, 0, _("%s: expected a numeric value"), quote (s));
exit_status = EXIT_FAILURE;
}
else if (errno)
{
error (0, errno, "%s", quote (s));
exit_status = EXIT_FAILURE;
}
else if (*end)
{
error (0, 0, _("%s: value not completely converted"), quote (s));
exit_status = EXIT_FAILURE;
}
}
#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
static TYPE \
FUNC_NAME (char const *s) \
{ \
char *end; \
TYPE val; \
\
if ((*s == '\"' || *s == '\'') && *(s + 1)) \
{ \
unsigned char ch = *++s; \
val = ch; \
\
if (MB_CUR_MAX > 1 && *(s + 1)) \
{ \
mbstate_t mbstate; mbszero (&mbstate); \
wchar_t wc; \
size_t slen = strlen (s); \
ssize_t bytes; \
/* Use mbrtowc not mbrtoc32, as per POSIX. */ \
bytes = mbrtowc (&wc, s, slen, &mbstate); \
if (0 < bytes) \
{ \
val = wc; \
s += bytes - 1; \
} \
} \
\
/* If POSIXLY_CORRECT is not set, then give a warning that there \
are characters following the character constant and that GNU \
printf is ignoring those characters. If POSIXLY_CORRECT *is* \
set, then don't give the warning. */ \
if (*++s != 0 && !posixly_correct) \
error (0, 0, _(cfcc_msg), s); \
} \
else \
{ \
errno = 0; \
val = (LIB_FUNC_EXPR); \
verify_numeric (s, end); \
} \
return val; \
} \
STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
STRTOX (long double, vstrtold, cl_strtold (s, &end))
/* Output a single-character \ escape. */
static void
print_esc_char (char c)
{
switch (c)
{
case 'a': /* Alert. */
putchar ('\a');
break;
case 'b': /* Backspace. */
putchar ('\b');
break;
case 'c': /* Cancel the rest of the output. */
exit (EXIT_SUCCESS);
break;
case 'e': /* Escape. */
putchar ('\x1B');
break;
case 'f': /* Form feed. */
putchar ('\f');
break;
case 'n': /* New line. */
putchar ('\n');
break;
case 'r': /* Carriage return. */
putchar ('\r');
break;
case 't': /* Horizontal tab. */
putchar ('\t');
break;
case 'v': /* Vertical tab. */
putchar ('\v');
break;
default:
putchar (c);
break;
}
}
/* Print a \ escape sequence starting at ESCSTART.
Return the number of characters in the escape sequence
besides the backslash.
If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
is an octal digit; otherwise they are of the form \ooo. */
static int
print_esc (char const *escstart, bool octal_0)
{
char const *p = escstart + 1;
int esc_value = 0; /* Value of \nnn escape. */
int esc_length; /* Length of \nnn escape. */
if (*p == 'x')
{
/* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
for (esc_length = 0, ++p;
esc_length < 2 && c_isxdigit (*p);
++esc_length, ++p)
esc_value = esc_value * 16 + fromhex (*p);
if (esc_length == 0)
error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
putchar (esc_value);
}
else if (isoct (*p))
{
/* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
Allow \ooo if octal_0 && *p != '0'; this is an undocumented
extension to POSIX that is compatible with Bash 2.05b. */
for (esc_length = 0, p += octal_0 && *p == '0';
esc_length < 3 && isoct (*p);
++esc_length, ++p)
esc_value = esc_value * 8 + fromoct (*p);
putchar (esc_value);
}
else if (*p && strchr ("\"\\abcefnrtv", *p))
print_esc_char (*p++);
else if (*p == 'u' || *p == 'U')
{
char esc_char = *p;
unsigned int uni_value;
uni_value = 0;
for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
esc_length > 0;
--esc_length, ++p)
{
if (! c_isxdigit (*p))
error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
uni_value = uni_value * 16 + fromhex (*p);
}
/* Error for invalid code points 0000D800 through 0000DFFF inclusive.
Note print_unicode_char() would print the literal \u.. in this case. */
if (uni_value >= 0xd800 && uni_value <= 0xdfff)
error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
print_unicode_char (stdout, uni_value, 0);
}
else
{
putchar ('\\');
if (*p)
{
putchar (*p);
p++;
}
}
return p - escstart - 1;
}
/* Print string STR, evaluating \ escapes. */
static void
print_esc_string (char const *str)
{
for (; *str; str++)
if (*str == '\\')
str += print_esc (str, true);
else
putchar (*str);
}
/* Evaluate a printf conversion specification. START is the start of
the directive, and CONVERSION specifies the type of conversion.
FIELD_WIDTH and PRECISION are the field width and precision for '*'
values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
ARGUMENT is the argument to be formatted. */
static void
print_direc (char const *start, char conversion,
bool have_field_width, int field_width,
bool have_precision, int precision,
char const *argument)
{
char *p; /* Null-terminated copy of % directive. */
/* Create a null-terminated copy of the % directive, with an
intmax_t-wide length modifier substituted for any existing
integer length modifier. */
{
char *q;
char const *length_modifier;
size_t length_modifier_len;
switch (conversion)
{
case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
length_modifier = "j";
length_modifier_len = 1;
break;
case 'a': case 'e': case 'f': case 'g':
case 'A': case 'E': case 'F': case 'G':
length_modifier = "L";
length_modifier_len = 1;
break;
default:
length_modifier = start; /* Any valid pointer will do. */
length_modifier_len = 0;
break;
}
size_t length = strlen (start);
p = xmalloc (length + length_modifier_len + 2);
q = mempcpy (p, start, length);
q = mempcpy (q, length_modifier, length_modifier_len);
*q++ = conversion;
*q = '\0';
}
switch (conversion)
{
case 'd':
case 'i':
{
intmax_t arg = argument ? vstrtoimax (argument) : 0;
if (!have_field_width)
{
if (!have_precision)
xprintf (p, arg);
else
xprintf (p, precision, arg);
}
else
{
if (!have_precision)
xprintf (p, field_width, arg);
else
xprintf (p, field_width, precision, arg);
}
}
break;
case 'o':
case 'u':
case 'x':
case 'X':
{
uintmax_t arg = argument ? vstrtoumax (argument) : 0;
if (!have_field_width)
{
if (!have_precision)
xprintf (p, arg);
else
xprintf (p, precision, arg);
}
else
{
if (!have_precision)
xprintf (p, field_width, arg);
else
xprintf (p, field_width, precision, arg);
}
}
break;
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
{
long double arg = argument ? vstrtold (argument) : 0;
if (!have_field_width)
{
if (!have_precision)
xprintf (p, arg);
else
xprintf (p, precision, arg);
}
else
{
if (!have_precision)
xprintf (p, field_width, arg);
else
xprintf (p, field_width, precision, arg);
}
}
break;
case 'c':
{
char c = argument ? *argument : '\0';
if (!have_field_width)
xprintf (p, c);
else
xprintf (p, field_width, c);
}
break;
case 's':
if (!argument)
argument = "";
if (!have_field_width)
{
if (!have_precision)
xprintf (p, argument);
else
xprintf (p, precision, argument);
}
else
{
if (!have_precision)
xprintf (p, field_width, argument);
else
xprintf (p, field_width, precision, argument);
}
break;
}
free (p);
}
/* Set curr_arg from indexed %i$ or otherwise next in sequence.
POS can be 0,1,2,3 corresponding to
[%][width][.precision][conversion] respectively. */
struct arg_cursor
{
char const *f; /* Pointer into 'format'. */
int curr_arg; /* Current offset. */
int curr_s_arg; /* Current sequential offset. */
int end_arg; /* End arg processed. */
int direc_arg; /* Arg for main directive. */
};
ATTRIBUTE_PURE static struct arg_cursor
get_curr_arg (int pos, struct arg_cursor ac)
{
/* Convert sequences like "123$" by hand to avoid problems with strtol,
which might treat "$" as part of the number in some locales. */
int arg = 0;
char const *f = ac.f;
if (pos < 3 && c_isdigit (*f))
{
bool v = false;
int a = *f++ - '0';
for (; c_isdigit (*f); f++)
{
v |= ckd_mul (&a, a, 10);
v |= ckd_add (&a, a, *f - '0');
}
if (*f == '$')
arg = v ? INT_MAX : a;
}
if (0 < arg)
{
/* Process indexed %i$ format. */
arg--;
ac.f = f + 1;
if (pos == 0)
ac.direc_arg = arg;
}
else
{
/* Process sequential arg. */
arg = (pos == 0 ? (ac.direc_arg = -1)
: pos < 3 || ac.direc_arg < 0 ? ++ac.curr_s_arg
: ac.direc_arg);
}
if (0 <= arg)
{
ac.curr_arg = arg;
ac.end_arg = MAX (ac.end_arg, arg);
}
return ac;
}
/* Print the text in FORMAT, using ARGV (with ARGC elements) for
arguments to any '%' directives.
Return the number of elements of ARGV used. */
static int
print_formatted (char const *format, int argc, char **argv)
{
struct arg_cursor ac;
ac.curr_arg = ac.curr_s_arg = ac.end_arg = ac.direc_arg = -1;
char const *direc_start; /* Start of % directive. */
char *direc; /* Generated % directive. */
char *pdirec; /* Pointer to current end of directive. */
bool have_field_width; /* True if FIELD_WIDTH is valid. */
int field_width = 0; /* Arg to first '*'. */
bool have_precision; /* True if PRECISION is valid. */
int precision = 0; /* Arg to second '*'. */
char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
direc = xmalloc (strlen (format) + 1);
for (ac.f = format; *ac.f; ac.f++)
{
switch (*ac.f)
{
case '%':
direc_start = ac.f;
pdirec = direc;
*pdirec++ = *ac.f++;
have_field_width = have_precision = false;
if (*ac.f == '%')
{
putchar ('%');
break;
}
ac = get_curr_arg (0, ac);
if (*ac.f == 'b')
{
/* FIXME: Field width and precision are not supported
for %b, even though POSIX requires it. */
ac = get_curr_arg (3, ac);
if (ac.curr_arg < argc)
print_esc_string (argv[ac.curr_arg]);
break;
}
if (*ac.f == 'q')
{
ac = get_curr_arg (3, ac);
if (ac.curr_arg < argc)
{
fputs (quotearg_style (shell_escape_quoting_style,
argv[ac.curr_arg]), stdout);
}
break;
}
memset (ok, 0, sizeof ok);
ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
for (;; ac.f++)
{
switch (*ac.f)
{
#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
case 'I':
#endif
case '\'':
ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
break;
case '-': case '+': case ' ':
break;
case '#':
ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
break;
case '0':
ok['c'] = ok['s'] = 0;
break;
default:
goto no_more_flag_characters;
}
*pdirec++ = *ac.f;
}
no_more_flag_characters:
if (*ac.f == '*')
{
*pdirec++ = *ac.f++;
ac = get_curr_arg (1, ac);
if (ac.curr_arg < argc)
{
intmax_t width = vstrtoimax (argv[ac.curr_arg]);
if (INT_MIN <= width && width <= INT_MAX)
field_width = width;
else
error (EXIT_FAILURE, 0, _("invalid field width: %s"),
quote (argv[ac.curr_arg]));
}
else
field_width = 0;
have_field_width = true;
}
else
while (c_isdigit (*ac.f))
*pdirec++ = *ac.f++;
if (*ac.f == '.')
{
*pdirec++ = *ac.f++;
ok['c'] = 0;
if (*ac.f == '*')
{
*pdirec++ = *ac.f++;
ac = get_curr_arg (2, ac);
if (ac.curr_arg < argc)
{
intmax_t prec = vstrtoimax (argv[ac.curr_arg]);
if (prec < 0)
{
/* A negative precision is taken as if the
precision were omitted, so -1 is safe
here even if prec < INT_MIN. */
precision = -1;
}
else if (INT_MAX < prec)
error (EXIT_FAILURE, 0, _("invalid precision: %s"),
quote (argv[ac.curr_arg]));
else
precision = prec;
}
else
precision = 0;
have_precision = true;
}
else
while (c_isdigit (*ac.f))
*pdirec++ = *ac.f++;
}
*pdirec++ = '\0';
while (*ac.f == 'l' || *ac.f == 'L' || *ac.f == 'h'
|| *ac.f == 'j' || *ac.f == 't' || *ac.f == 'z')
++ac.f;
{
unsigned char conversion = *ac.f;
int speclen = MIN (ac.f + 1 - direc_start, INT_MAX);
if (! ok[conversion])
error (EXIT_FAILURE, 0,
_("%.*s: invalid conversion specification"),
speclen, direc_start);
}
ac = get_curr_arg (3, ac);
print_direc (direc, *ac.f,
have_field_width, field_width,
have_precision, precision,
ac.curr_arg < argc ? argv[ac.curr_arg] : nullptr);
break;
case '\\':
ac.f += print_esc (ac.f, false);
break;
default:
putchar (*ac.f);
}
}
free (direc);
return MIN (argc, ac.end_arg + 1);
}
int
main (int argc, char **argv)
{
char *format;
int args_used;
initialize_main (&argc, &argv);
set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
exit_status = EXIT_SUCCESS;
posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
/* We directly parse options, rather than use parse_long_options, in
order to avoid accepting abbreviations. */
if (argc == 2)
{
if (STREQ (argv[1], "--help"))
usage (EXIT_SUCCESS);
if (STREQ (argv[1], "--version"))
{
version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
(char *) nullptr);
return EXIT_SUCCESS;
}
}
/* The above handles --help and --version.
Since there is no other invocation of getopt, handle '--' here. */
if (1 < argc && STREQ (argv[1], "--"))
{
--argc;
++argv;
}
if (argc <= 1)
{
error (0, 0, _("missing operand"));
usage (EXIT_FAILURE);
}
format = argv[1];
argc -= 2;
argv += 2;
do
{
args_used = print_formatted (format, argc, argv);
argc -= args_used;
argv += args_used;
}
while (args_used > 0 && argc > 0);
if (argc > 0)
error (0, 0,
_("warning: ignoring excess arguments, starting with %s"),
quote (argv[0]));
return exit_status;
}