1 From 176fc6c7ddec8d93468f9b790d39dcab6d41b1a6 Mon Sep 17 00:00:00 2001
2 From: Florian Weimer <fweimer@redhat.com>
3 Date: Mon, 12 May 2014 15:24:12 +0200
4 Subject: [PATCH] _nl_find_locale: Improve handling of crafted locale names [BZ #17137]
6 Prevent directory traversal in locale-related environment variables
9 (cherry picked from commit 4e8f95a0df7c2300b830ec12c0ae1e161bc8a8a3)
17 locale/findlocale.c | 74 +++++++++++++---
18 localedata/ChangeLog | 6 ++
19 localedata/Makefile | 2 +-
20 localedata/tst-setlocale3.c | 203 +++++++++++++++++++++++++++++++++++++++++++
21 6 files changed, 290 insertions(+), 15 deletions(-)
22 create mode 100644 localedata/tst-setlocale3.c
24 diff --git a/locale/findlocale.c b/locale/findlocale.c
25 index 0c42b99..faeee61 100644
26 --- a/locale/findlocale.c
27 +++ b/locale/findlocale.c
29 <http://www.gnu.org/licenses/>. */
36 @@ -57,6 +58,45 @@ struct loaded_l10nfile *_nl_locale_file_list[__LC_LAST];
38 const char _nl_default_locale_path[] attribute_hidden = LOCALEDIR;
40 +/* Checks if the name is actually present, that is, not NULL and not
43 +name_present (const char *name)
45 + return name != NULL && name[0] != '\0';
48 +/* Checks that the locale name neither extremely long, nor contains a
49 + ".." path component (to prevent directory traversal). */
51 +valid_locale_name (const char *name)
54 + size_t namelen = strlen (name);
55 + /* Name too long. The limit is arbitrary and prevents stack overflow
57 + if (__glibc_unlikely (namelen > 255))
59 + /* Directory traversal attempt. */
60 + static const char slashdot[4] = {'/', '.', '.', '/'};
61 + if (__glibc_unlikely (memmem (name, namelen,
62 + slashdot, sizeof (slashdot)) != NULL))
64 + if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.'))
67 + && __glibc_unlikely (((name[0] == '.'
70 + || (name[namelen - 3] == '/'
71 + && name[namelen - 2] == '.'
72 + && name[namelen - 1] == '.'))))
74 + /* If there is a slash in the name, it must start with one. */
75 + if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/')
80 struct __locale_data *
82 @@ -65,7 +105,7 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
85 /* Name of the locale for this category. */
87 + char *loc_name = (char *) *name;
90 const char *territory;
91 @@ -73,31 +113,39 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
92 const char *normalized_codeset;
93 struct loaded_l10nfile *locale_file;
95 - if ((*name)[0] == '\0')
96 + if (loc_name[0] == '\0')
98 /* The user decides which locale to use by setting environment
100 - *name = getenv ("LC_ALL");
101 - if (*name == NULL || (*name)[0] == '\0')
102 - *name = getenv (_nl_category_names.str
103 + loc_name = getenv ("LC_ALL");
104 + if (!name_present (loc_name))
105 + loc_name = getenv (_nl_category_names.str
106 + _nl_category_name_idxs[category]);
107 - if (*name == NULL || (*name)[0] == '\0')
108 - *name = getenv ("LANG");
109 + if (!name_present (loc_name))
110 + loc_name = getenv ("LANG");
111 + if (!name_present (loc_name))
112 + loc_name = (char *) _nl_C_name;
115 - if (*name == NULL || (*name)[0] == '\0'
116 - || (__builtin_expect (__libc_enable_secure, 0)
117 - && strchr (*name, '/') != NULL))
118 - *name = (char *) _nl_C_name;
119 + /* We used to fall back to the C locale if the name contains a slash
120 + character '/', but we now check for directory traversal in
121 + valid_locale_name, so this is no longer necessary. */
123 - if (__builtin_expect (strcmp (*name, _nl_C_name), 1) == 0
124 - || __builtin_expect (strcmp (*name, _nl_POSIX_name), 1) == 0)
125 + if (__builtin_expect (strcmp (loc_name, _nl_C_name), 1) == 0
126 + || __builtin_expect (strcmp (loc_name, _nl_POSIX_name), 1) == 0)
128 /* We need not load anything. The needed data is contained in
129 the library itself. */
130 *name = (char *) _nl_C_name;
131 return _nl_C[category];
133 + else if (!valid_locale_name (loc_name))
135 + __set_errno (EINVAL);
141 /* We really have to load some data. First we try the archive,
142 but only if there was no LOCPATH environment variable specified. */
144 diff --git a/localedata/Makefile b/localedata/Makefile
145 index 7d157bf..9daa470 100644
146 --- a/localedata/Makefile
147 +++ b/localedata/Makefile
148 @@ -77,7 +77,7 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl \
150 tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
151 tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
152 - tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2
153 + tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3
154 tests-static = bug-setlocale1-static
155 tests += $(tests-static)
156 ifeq (yes,$(build-shared))
157 diff --git a/localedata/tst-setlocale3.c b/localedata/tst-setlocale3.c
159 index 0000000..e3b21a9
161 +++ b/localedata/tst-setlocale3.c
163 +/* Regression test for setlocale invalid environment variable handling.
164 + Copyright (C) 2014 Free Software Foundation, Inc.
165 + This file is part of the GNU C Library.
167 + The GNU C Library is free software; you can redistribute it and/or
168 + modify it under the terms of the GNU Lesser General Public
169 + License as published by the Free Software Foundation; either
170 + version 2.1 of the License, or (at your option) any later version.
172 + The GNU C Library is distributed in the hope that it will be useful,
173 + but WITHOUT ANY WARRANTY; without even the implied warranty of
174 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
175 + Lesser General Public License for more details.
177 + You should have received a copy of the GNU Lesser General Public
178 + License along with the GNU C Library; if not, see
179 + <http://www.gnu.org/licenses/>. */
186 +/* The result of setlocale may be overwritten by subsequent calls, so
187 + this wrapper makes a copy. */
189 +setlocale_copy (int category, const char *locale)
191 + const char *result = setlocale (category, locale);
192 + if (result == NULL)
194 + return strdup (result);
197 +static char *de_locale;
200 +setlocale_fail (const char *envstring)
202 + setenv ("LC_CTYPE", envstring, 1);
203 + if (setlocale (LC_CTYPE, "") != NULL)
205 + printf ("unexpected setlocale success for \"%s\" locale\n", envstring);
208 + const char *newloc = setlocale (LC_CTYPE, NULL);
209 + if (strcmp (newloc, de_locale) != 0)
211 + printf ("failed setlocale call \"%s\" changed locale to \"%s\"\n",
212 + envstring, newloc);
218 +setlocale_success (const char *envstring)
220 + setenv ("LC_CTYPE", envstring, 1);
221 + char *newloc = setlocale_copy (LC_CTYPE, "");
222 + if (newloc == NULL)
224 + printf ("setlocale for \"%s\": %m\n", envstring);
227 + if (strcmp (newloc, de_locale) == 0)
229 + printf ("setlocale with LC_CTYPE=\"%s\" left locale at \"%s\"\n",
230 + envstring, de_locale);
233 + if (setlocale (LC_CTYPE, de_locale) == NULL)
235 + printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n",
236 + de_locale, envstring);
239 + char *newloc2 = setlocale_copy (LC_CTYPE, newloc);
240 + if (newloc2 == NULL)
242 + printf ("restoring locale \"%s\" following \"%s\": %m\n",
243 + newloc, envstring);
246 + if (strcmp (newloc, newloc2) != 0)
248 + printf ("representation of locale \"%s\" changed from \"%s\" to \"%s\"",
249 + envstring, newloc, newloc2);
255 + if (setlocale (LC_CTYPE, de_locale) == NULL)
257 + printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n",
258 + de_locale, envstring);
263 +/* Checks that a known-good locale still works if LC_ALL contains a
264 + value which should be ignored. */
266 +setlocale_ignore (const char *to_ignore)
268 + const char *fr_locale = "fr_FR.UTF-8";
269 + setenv ("LC_CTYPE", fr_locale, 1);
270 + char *expected_locale = setlocale_copy (LC_CTYPE, "");
271 + if (expected_locale == NULL)
273 + printf ("setlocale with LC_CTYPE=\"%s\" failed: %m\n", fr_locale);
276 + if (setlocale (LC_CTYPE, de_locale) == NULL)
278 + printf ("failed to restore locale: %m\n");
281 + unsetenv ("LC_CTYPE");
283 + setenv ("LC_ALL", to_ignore, 1);
284 + setenv ("LC_CTYPE", fr_locale, 1);
285 + const char *actual_locale = setlocale (LC_CTYPE, "");
286 + if (actual_locale == NULL)
288 + printf ("setlocale with LC_ALL, LC_CTYPE=\"%s\" failed: %m\n",
292 + if (strcmp (actual_locale, expected_locale) != 0)
294 + printf ("setlocale under LC_ALL failed: got \"%s\", expected \"%s\"\n",
295 + actual_locale, expected_locale);
298 + unsetenv ("LC_CTYPE");
299 + setlocale_success (fr_locale);
300 + unsetenv ("LC_ALL");
301 + free (expected_locale);
307 + /* The glibc test harness sets this environment variable
309 + unsetenv ("LC_ALL");
311 + de_locale = setlocale_copy (LC_CTYPE, "de_DE.UTF-8");
312 + if (de_locale == NULL)
314 + printf ("setlocale (LC_CTYPE, \"de_DE.UTF-8\"): %m\n");
317 + setlocale_success ("C");
318 + setlocale_success ("en_US.UTF-8");
319 + setlocale_success ("/en_US.UTF-8");
320 + setlocale_success ("//en_US.UTF-8");
321 + setlocale_ignore ("");
323 + setlocale_fail ("does-not-exist");
324 + setlocale_fail ("/");
325 + setlocale_fail ("/../localedata/en_US.UTF-8");
326 + setlocale_fail ("en_US.UTF-8/");
327 + setlocale_fail ("en_US.UTF-8/..");
328 + setlocale_fail ("en_US.UTF-8/../en_US.UTF-8");
329 + setlocale_fail ("../localedata/en_US.UTF-8");
331 + size_t large_length = 1024;
332 + char *large_name = malloc (large_length + 1);
333 + if (large_name == NULL)
335 + puts ("malloc failure");
338 + memset (large_name, '/', large_length);
339 + const char *suffix = "en_US.UTF-8";
340 + strcpy (large_name + large_length - strlen (suffix), suffix);
341 + setlocale_fail (large_name);
345 + size_t huge_length = 64 * 1024 * 1024;
346 + char *huge_name = malloc (huge_length + 1);
347 + if (huge_name == NULL)
349 + puts ("malloc failure");
352 + memset (huge_name, 'X', huge_length);
353 + huge_name[huge_length] = '\0';
354 + /* Construct a composite locale specification. */
355 + const char *prefix = "LC_CTYPE=de_DE.UTF-8;LC_TIME=";
356 + memcpy (huge_name, prefix, strlen (prefix));
357 + setlocale_fail (huge_name);
364 +#define TEST_FUNCTION do_test ()
365 +#include "../test-skeleton.c"
368 From d07eb371352d67ee4ef931b6956d1e0f28b599dc Mon Sep 17 00:00:00 2001
369 From: Florian Weimer <fweimer@redhat.com>
370 Date: Wed, 28 May 2014 14:05:03 +0200
371 Subject: [PATCH] manual: Update the locale documentation
373 (cherry picked from commit 585367266923156ac6fb789939a923641ba5aaf4)
379 manual/locale.texi | 146 ++++++++++++++++++++++++++++++++++++++++-----------
380 2 files changed, 127 insertions(+), 32 deletions(-)
382 diff --git a/manual/locale.texi b/manual/locale.texi
383 index 8bfd653..ee1c3a1 100644
384 --- a/manual/locale.texi
385 +++ b/manual/locale.texi
386 @@ -29,6 +29,7 @@ will follow the conventions preferred by the user.
387 * Setting the Locale:: How a program specifies the locale
388 with library functions.
389 * Standard Locales:: Locale names available on all systems.
390 +* Locale Names:: Format of system-specific locale names.
391 * Locale Information:: How to access the information for the locale.
392 * Formatting Numbers:: A dedicated function to format numbers.
393 * Yes-or-No Questions:: Check a Response against the locale.
394 @@ -99,14 +100,16 @@ locale named @samp{espana-castellano} to use the standard conventions of
397 The set of locales supported depends on the operating system you are
398 -using, and so do their names. We can't make any promises about what
399 -locales will exist, except for one standard locale called @samp{C} or
400 -@samp{POSIX}. Later we will describe how to construct locales.
401 -@comment (@pxref{Building Locale Files}).
402 +using, and so do their names, except that the standard locale called
403 +@samp{C} or @samp{POSIX} always exist. @xref{Locale Names}.
405 +In order to force the system to always use the default locale, the
406 +user can set the @code{LC_ALL} environment variable to @samp{C}.
408 @cindex combining locales
409 -A user also has the option of specifying different locales for different
410 -purposes---in effect, choosing a mixture of multiple locales.
411 +A user also has the option of specifying different locales for
412 +different purposes---in effect, choosing a mixture of multiple
413 +locales. @xref{Locale Categories}.
415 For example, the user might specify the locale @samp{espana-castellano}
416 for most purposes, but specify the locale @samp{usa-english} for
417 @@ -120,7 +123,7 @@ which locales apply. However, the user can choose to use each locale
418 for a particular subset of those purposes.
420 @node Locale Categories, Setting the Locale, Choosing Locale, Locales
421 -@section Categories of Activities that Locales Affect
422 +@section Locale Categories
423 @cindex categories for locales
424 @cindex locale categories
426 @@ -128,7 +131,11 @@ The purposes that locales serve are grouped into @dfn{categories}, so
427 that a user or a program can choose the locale for each category
428 independently. Here is a table of categories; each name is both an
429 environment variable that a user can set, and a macro name that you can
430 -use as an argument to @code{setlocale}.
431 +use as the first argument to @code{setlocale}.
433 +The contents of the environment variable (or the string in the second
434 +argument to @code{setlocale}) has to be a valid locale name.
435 +@xref{Locale Names}.
439 @@ -172,7 +179,7 @@ for affirmative and negative responses.
443 -This is not an environment variable; it is only a macro that you can use
444 +This is not a category; it is only a macro that you can use
445 with @code{setlocale} to set a single locale for all purposes. Setting
446 this environment variable overwrites all selections by the other
447 @code{LC_*} variables or @code{LANG}.
448 @@ -355,13 +362,7 @@ The symbols in this section are defined in the header file @file{locale.h}.
449 @c strndup @ascuheap @acsmem
450 @c strcasecmp_l ok (C locale)
451 The function @code{setlocale} sets the current locale for category
452 -@var{category} to @var{locale}. A list of all the locales the system
453 -provides can be created by running
459 +@var{category} to @var{locale}.
461 If @var{category} is @code{LC_ALL}, this specifies the locale for all
462 purposes. The other possible values of @var{category} specify an
463 @@ -386,10 +387,9 @@ is passed in as @var{locale} parameter.
465 When you read the current locale for category @code{LC_ALL}, the value
466 encodes the entire combination of selected locales for all categories.
467 -In this case, the value is not just a single locale name. In fact, we
468 -don't make any promises about what it looks like. But if you specify
469 -the same ``locale name'' with @code{LC_ALL} in a subsequent call to
470 -@code{setlocale}, it restores the same combination of locale selections.
471 +If you specify the same ``locale name'' with @code{LC_ALL} in a
472 +subsequent call to @code{setlocale}, it restores the same combination
473 +of locale selections.
475 To be sure you can use the returned string encoding the currently selected
476 locale at a later time, you must make a copy of the string. It is not
477 @@ -405,20 +405,15 @@ for @var{category}.
478 If a nonempty string is given for @var{locale}, then the locale of that
479 name is used if possible.
481 +The effective locale name (either the second argument to
482 +@code{setlocale}, or if the argument is an empty string, the name
483 +obtained from the process environment) must be valid locale name.
484 +@xref{Locale Names}.
486 If you specify an invalid locale name, @code{setlocale} returns a null
487 pointer and leaves the current locale unchanged.
490 -The path used for finding locale data can be set using the
491 -@code{LOCPATH} environment variable. The default path for finding
492 -locale data is system specific. It is computed from the value given
493 -as the prefix while configuring the C library. This value normally is
494 -@file{/usr} or @file{/}. For the former the complete path is:
500 Here is an example showing how you might use @code{setlocale} to
501 temporarily switch to a new locale.
503 @@ -458,7 +453,7 @@ locale categories, and future versions of the library will do so. For
504 portability, assume that any symbol beginning with @samp{LC_} might be
505 defined in @file{locale.h}.
507 -@node Standard Locales, Locale Information, Setting the Locale, Locales
508 +@node Standard Locales, Locale Names, Setting the Locale, Locales
509 @section Standard Locales
511 The only locale names you can count on finding on all operating systems
512 @@ -492,7 +487,94 @@ with the environment, rather than trying to specify some non-standard
513 locale explicitly by name. Remember, different machines might have
514 different sets of locales installed.
516 -@node Locale Information, Formatting Numbers, Standard Locales, Locales
517 +@node Locale Names, Locale Information, Standard Locales, Locales
518 +@section Locale Names
520 +The following command prints a list of locales supported by the
528 +@strong{Portability Note:} With the notable exception of the standard
529 +locale names @samp{C} and @samp{POSIX}, locale names are
532 +Most locale names follow XPG syntax and consist of up to four parts:
535 +@var{language}[_@var{territory}[.@var{codeset}]][@@@var{modifier}]
538 +Beside the first part, all of them are allowed to be missing. If the
539 +full specified locale is not found, less specific ones are looked for.
540 +The various parts will be stripped off, in the following order:
553 +For example, the locale name @samp{de_AT.iso885915@@euro} denotes a
554 +German-language locale for use in Austria, using the ISO-8859-15
555 +(Latin-9) character set, and with the Euro as the currency symbol.
557 +In addition to locale names which follow XPG syntax, systems may
558 +provide aliases such as @samp{german}. Both categories of names must
559 +not contain the slash character @samp{/}.
561 +If the locale name starts with a slash @samp{/}, it is treated as a
562 +path relative to the configured locale directories; see @code{LOCPATH}
563 +below. The specified path must not contain a component @samp{..}, or
564 +the name is invalid, and @code{setlocale} will fail.
566 +@strong{Portability Note:} POSIX suggests that if a locale name starts
567 +with a slash @samp{/}, it is resolved as an absolute path. However,
568 +@theglibc{} treats it as a relative path under the directories listed
569 +in @code{LOCPATH} (or the default locale directory if @code{LOCPATH}
572 +Locale names which are longer than an implementation-defined limit are
573 +invalid and cause @code{setlocale} to fail.
575 +As a special case, locale names used with @code{LC_ALL} can combine
576 +several locales, reflecting different locale settings for different
577 +categories. For example, you might want to use a U.S. locale with ISO
578 +A4 paper format, so you set @code{LANG} to @samp{en_US.UTF-8}, and
579 +@code{LC_PAPER} to @samp{de_DE.UTF-8}. In this case, the
580 +@code{LC_ALL}-style combined locale name is
583 +LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;@dots{}
586 +followed by other category settings not shown here.
589 +The path used for finding locale data can be set using the
590 +@code{LOCPATH} environment variable. This variable lists the
591 +directories in which to search for locale definitions, separated by a
594 +The default path for finding locale data is system specific. A typical
595 +value for the @code{LOCPATH} default is:
601 +The value of @code{LOCPATH} is ignored by privileged programs for
602 +security reasons, and only the default directory is used.
604 +@node Locale Information, Formatting Numbers, Locale Names, Locales
605 @section Accessing Locale Information
607 There are several ways to access locale information. The simplest
610 From b76db403426d4978ca2e60998c6dc62668a3f998 Mon Sep 17 00:00:00 2001
611 From: Florian Weimer <fweimer@redhat.com>
612 Date: Wed, 28 May 2014 14:41:52 +0200
613 Subject: [PATCH] setlocale: Use the heap for the copy of the locale argument
615 This avoids alloca calls with potentially large arguments.
617 (cherry picked from commit d183645616b0533b3acee28f1a95570bffbdf50f)
620 locale/setlocale.c | 14 ++++++++++++--
621 2 files changed, 17 insertions(+), 2 deletions(-)
623 diff --git a/locale/setlocale.c b/locale/setlocale.c
624 index b70fa6c..a4c5983 100644
625 --- a/locale/setlocale.c
626 +++ b/locale/setlocale.c
627 @@ -272,6 +272,8 @@ setlocale (int category, const char *locale)
628 of entries of the form `CATEGORY=VALUE'. */
629 const char *newnames[__LC_LAST];
630 struct __locale_data *newdata[__LC_LAST];
631 + /* Copy of the locale argument, for in-place splitting. */
632 + char *locale_copy = NULL;
634 /* Set all name pointers to the argument name. */
635 for (category = 0; category < __LC_LAST; ++category)
636 @@ -281,7 +283,13 @@ setlocale (int category, const char *locale)
637 if (__builtin_expect (strchr (locale, ';') != NULL, 0))
639 /* This is a composite name. Make a copy and split it up. */
640 - char *np = strdupa (locale);
641 + locale_copy = strdup (locale);
642 + if (__glibc_unlikely (locale_copy == NULL))
644 + __libc_rwlock_unlock (__libc_setlocale_lock);
647 + char *np = locale_copy;
651 @@ -299,6 +307,7 @@ setlocale (int category, const char *locale)
654 __libc_rwlock_unlock (__libc_setlocale_lock);
655 + free (locale_copy);
657 /* Bogus category name. */
659 @@ -391,8 +400,9 @@ setlocale (int category, const char *locale)
660 /* Critical section left. */
661 __libc_rwlock_unlock (__libc_setlocale_lock);
663 - /* Free the resources (the locale path variable). */
664 + /* Free the resources. */
666 + free (locale_copy);