From 9dc26af82902c4002a92ba23831b82ba15d823d0 Mon Sep 17 00:00:00 2001
From: wechman <damian.wechman@codepoets.it>
Date: Fri, 25 Feb 2022 11:12:18 +0100
Subject: [PATCH 1/2] Use "C" instead user environment locale in solc

---
 Changelog.md  |  2 +-
 solc/main.cpp | 24 ------------------------
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 8b9fa530c..99aa6ebb8 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -11,7 +11,7 @@ Compiler Features:
 
 Bugfixes:
  * Yul IR Code Generation: Optimize embedded creation code with correct settings. This fixes potential mismatches between the constructor code of a contract compiled in isolation and the bytecode in ``type(C).creationCode``, resp. the bytecode used for ``new C(...)``.
-
+ * Fix internal error for locales with unusual capitalization rules. Locale set in the environment is now completely ignored.
 
 ### 0.8.12 (2022-02-16)
 
diff --git a/solc/main.cpp b/solc/main.cpp
index ce69d20a7..50a6a8fc5 100644
--- a/solc/main.cpp
+++ b/solc/main.cpp
@@ -27,40 +27,16 @@
 
 #include <boost/exception/all.hpp>
 
-#include <clocale>
 #include <iostream>
 
 using namespace std;
 using namespace solidity;
 
-/*
-The equivalent of setlocale(LC_ALL, "C") is called before any user code is run.
-If the user has an invalid environment setting then it is possible for the call
-to set locale to fail, so there are only two possible actions, the first is to
-throw a runtime exception and cause the program to quit (default behaviour),
-or the second is to modify the environment to something sensible (least
-surprising behaviour).
-
-The follow code produces the least surprising behaviour. It will use the user
-specified default locale if it is valid, and if not then it will modify the
-environment the process is running in to use a sensible default. This also means
-that users do not need to install language packs for their OS.
-*/
-static void setDefaultOrCLocale()
-{
-#if __unix__
-	if (!std::setlocale(LC_ALL, ""))
-	{
-		setenv("LC_ALL", "C", 1);
-	}
-#endif
-}
 
 int main(int argc, char** argv)
 {
 	try
 	{
-		setDefaultOrCLocale();
 		solidity::frontend::CommandLineInterface cli(cin, cout, cerr);
 		return cli.run(argc, argv) ? 0 : 1;
 	}

From 2d834eaf67ad05d4c5d21272cbea3bb51e1b44a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= <kamil.sliwak@codepoets.it>
Date: Fri, 25 Feb 2022 19:21:58 +0100
Subject: [PATCH 2/2] A script and CI job for testing the compiler with
 different locales

---
 .circleci/config.yml | 15 ++++++++
 Changelog.md         |  1 +
 test/localeTest.sh   | 83 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)
 create mode 100755 test/localeTest.sh

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 65adf9ee3..bcfc99cd7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1051,6 +1051,20 @@ jobs:
   t_ubu_release_cli: &t_ubu_release_cli
     <<: *t_ubu_cli
 
+  t_ubu_locale:
+    <<: *base_ubuntu2004_small
+    steps:
+      - checkout
+      - attach_workspace:
+          at: build
+      - run:
+          name: Install all locales
+          command: |
+            apt update --assume-yes
+            apt install locales-all --assume-yes --no-install-recommends
+      - run: test/localeTest.sh build/solc/solc
+      - gitter_notify_failure_unless_pr
+
   t_ubu_asan_cli:
     # Runs slightly faster on medium but we only run it nightly so efficiency matters more.
     <<: *base_ubuntu2004_small
@@ -1435,6 +1449,7 @@ workflows:
       # Ubuntu build and tests
       - b_ubu: *workflow_trigger_on_tags
       - t_ubu_cli: *workflow_ubuntu2004
+      - t_ubu_locale: *workflow_ubuntu2004
       - t_ubu_soltest_all: *workflow_ubuntu2004
       - t_ubu_soltest_enforce_yul: *workflow_ubuntu2004
       - b_ubu_clang: *workflow_trigger_on_tags
diff --git a/Changelog.md b/Changelog.md
index 99aa6ebb8..39d22c88e 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -13,6 +13,7 @@ Bugfixes:
  * Yul IR Code Generation: Optimize embedded creation code with correct settings. This fixes potential mismatches between the constructor code of a contract compiled in isolation and the bytecode in ``type(C).creationCode``, resp. the bytecode used for ``new C(...)``.
  * Fix internal error for locales with unusual capitalization rules. Locale set in the environment is now completely ignored.
 
+
 ### 0.8.12 (2022-02-16)
 
 Language Features:
diff --git a/test/localeTest.sh b/test/localeTest.sh
new file mode 100755
index 000000000..58e2aad4d
--- /dev/null
+++ b/test/localeTest.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+
+#------------------------------------------------------------------------------
+# Script that tests that the compiler works correctly regardless of the locale
+# setting. As a prerequisite, the following locales must be enabled system-wide:
+# C, tr_TR.utf8, ja_JP.eucjp.
+#
+# Usage:
+#    <script name>.sh <path to solc binary>
+#
+# ------------------------------------------------------------------------------
+# This file is part of solidity.
+#
+# solidity is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# solidity is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with solidity.  If not, see <http://www.gnu.org/licenses/>
+#
+# (c) 2022 solidity contributors.
+#------------------------------------------------------------------------------
+
+set -eo pipefail
+
+REPO_ROOT=$(cd "$(dirname "$0")/.." && pwd)
+# shellcheck source=scripts/common.sh
+source "${REPO_ROOT}/scripts/common.sh"
+
+solc_binary="$1"
+(( $# == 1 )) || fail "Expected exactly 1 argument."
+
+# This test won't work without some specific locales installed
+locale -a | grep -e "^tr_TR\.utf8$" || fail "Locale 'tr_TR.utf8' not available."
+locale -a | grep -e "^ja_JP\.eucjp$" || fail "Locale 'ja_JP.eucjp' not available."
+locale -a | grep -e "^C$" || fail "Locale 'C' not available."
+locale -a | grep -e "^__invalid_locale__$" && fail "'__invalid_locale__' is not supposed to be a valid locale name."
+
+i="i"
+
+test_code=$(cat <<'EOF'
+    // SPDX-License-Identifier: GPL-3.0
+    pragma solidity *;
+    library L {}
+EOF
+)
+
+# Whatever locale is set by default.
+printTask "Testing the default locale..."
+default_locale_output=$(echo "$test_code" | "$solc_binary" - --bin)
+
+# Plain C locale
+printTask "Testing the C locale..."
+export LC_ALL=C
+[[ ${i^^} == "I" ]] || assertFail
+c_locale_output=$(echo "$test_code" | "$solc_binary" - --bin)
+diff_values "$default_locale_output" "$c_locale_output"
+
+# Turkish locale, which has capitalization rules (`i` -> `İ` and `I` to `ı`) that can make identifiers invalid.
+printTask "Testing the Turkish locale..."
+export LC_ALL=tr_TR.utf8
+[[ ${i^^} != "I" ]] || assertFail
+tr_locale_output=$(echo "$test_code" | "$solc_binary" - --bin)
+diff_values "$default_locale_output" "$tr_locale_output"
+
+# A different locale, that should not do anything special to ASCII chars.
+printTask "Testing the Japanese locale..."
+export LC_ALL=ja_JP.eucjp
+[[ ${i^^} == "I" ]] || assertFail
+ja_locale_output=$(echo "$test_code" | "$solc_binary" - --bin)
+diff_values "$default_locale_output" "$ja_locale_output"
+
+# The compiler should not crash if the locale is not valid.
+printTask "Testing an invalid locale..."
+export LC_ALL=__invalid_locale__ || true
+invalid_locale_output=$(echo "$test_code" | "$solc_binary" - --bin)
+diff_values "$default_locale_output" "$invalid_locale_output"