| 1 | From e58fa0ba66272c5f28828b15d06c7e42a9882b3b Mon Sep 17 00:00:00 2001 |
| 2 | From: Jungshik Shin <jshin@chromium.org> |
| 3 | Date: Sat, 16 Dec 2017 04:19:27 +0000 |
| 4 | Subject: [PATCH] Use fromUTF8() for UnicodeString construction from UTF-8 |
| 5 | |
| 6 | Chrome's copy of ICU is built with U_CHARSET_IS_UTF8=1 so that |char *| |
| 7 | buffer is treated as UTF-8 when constructing UnicodeString() regardless |
| 8 | of the default encoding of the current locale on Linux or non-Unicode code |
| 9 | page on Windows. |
| 10 | |
| 11 | However, some Linux distros do not set U_CHARSET_IS_UTF=1 when building |
| 12 | ICU and Chromium build with system_icu crashes when Chromium is run in |
| 13 | non-UTF-8 locale (e.g. 'C'). |
| 14 | |
| 15 | To make Chromium work in a non-UTF-8 locale (which is pretty rare these |
| 16 | days), use 'icu::UnicodeString::fromUTF8(StringPiece)' instead of |
| 17 | 'icu::UnicodeString(const char*)'. |
| 18 | |
| 19 | Bug: 772655 |
| 20 | Test: components_unittests --gtest_filter=*IDN* |
| 21 | Test: Chromium built with system_icu does not crash in C locale. |
| 22 | Change-Id: I0daa284ec06b8e83814fc70eb8e9e5c96444ebfa |
| 23 | Reviewed-on: https://chromium-review.googlesource.com/831247 |
| 24 | Reviewed-by: Peter Kasting <pkasting@chromium.org> |
| 25 | Commit-Queue: Jungshik Shin <jshin@chromium.org> |
| 26 | Cr-Commit-Position: refs/heads/master@{#524586} |
| 27 | --- |
| 28 | components/url_formatter/idn_spoof_checker.cc | 10 +++++----- |
| 29 | 1 file changed, 5 insertions(+), 5 deletions(-) |
| 30 | |
| 31 | diff --git a/components/url_formatter/idn_spoof_checker.cc b/components/url_formatter/idn_spoof_checker.cc |
| 32 | index a88c5e8f8331..aee748d8a4d5 100644 |
| 33 | --- a/components/url_formatter/idn_spoof_checker.cc |
| 34 | +++ b/components/url_formatter/idn_spoof_checker.cc |
| 35 | @@ -110,8 +110,8 @@ IDNSpoofChecker::IDNSpoofChecker() { |
| 36 | |
| 37 | // These Cyrillic letters look like Latin. A domain label entirely made of |
| 38 | // these letters is blocked as a simplified whole-script-spoofable. |
| 39 | - cyrillic_letters_latin_alike_ = |
| 40 | - icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); |
| 41 | + cyrillic_letters_latin_alike_ = icu::UnicodeSet( |
| 42 | + icu::UnicodeString::fromUTF8("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); |
| 43 | cyrillic_letters_latin_alike_.freeze(); |
| 44 | |
| 45 | cyrillic_letters_ = |
| 46 | @@ -141,8 +141,8 @@ IDNSpoofChecker::IDNSpoofChecker() { |
| 47 | UParseError parse_error; |
| 48 | diacritic_remover_.reset(icu::Transliterator::createFromRules( |
| 49 | UNICODE_STRING_SIMPLE("DropAcc"), |
| 50 | - icu::UnicodeString("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" |
| 51 | - " ł > l; ø > o; đ > d;"), |
| 52 | + icu::UnicodeString::fromUTF8("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" |
| 53 | + " ł > l; ø > o; đ > d;"), |
| 54 | UTRANS_FORWARD, parse_error, status)); |
| 55 | |
| 56 | // Supplement the Unicode confusable list by the following mapping. |
| 57 | @@ -158,7 +158,7 @@ IDNSpoofChecker::IDNSpoofChecker() { |
| 58 | // - U+0D1F (ട) => s |
| 59 | extra_confusable_mapper_.reset(icu::Transliterator::createFromRules( |
| 60 | UNICODE_STRING_SIMPLE("ExtraConf"), |
| 61 | - icu::UnicodeString( |
| 62 | + icu::UnicodeString::fromUTF8( |
| 63 | "ӏ > l; [кĸκ] > k; п > n; [ƅь] > b; в > b; м > m; н > h; " |
| 64 | "т > t; [шщ] > w; ട > s;"), |
| 65 | UTRANS_FORWARD, parse_error, status)); |
| 66 | -- |
| 67 | 2.15.1 |
| 68 | |