From 7d282fbaf5a84b86bea9367a5e11b5449cc69842 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 2 May 2025 12:40:33 +0200 Subject: [PATCH 1/4] The en_US.UTF-8 locale needs glibc-langpack-en on Oracle Linux 8+ --- doc/user/utf8-locale.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/user/utf8-locale.md b/doc/user/utf8-locale.md index b8e610dced8e..80fd147f680c 100644 --- a/doc/user/utf8-locale.md +++ b/doc/user/utf8-locale.md @@ -21,6 +21,7 @@ Instead, all values should be `"en_US.UTF-8"` or other regions but still `.UTF-8 ### Fedora-based: RHEL, Oracle Linux, etc ```bash +sudo dnf install glibc-langpack-en export LANG=en_US.UTF-8 ``` From a9c66625f58a8ca8142a2ec4391441ae769a9dc3 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 2 May 2025 13:21:12 +0200 Subject: [PATCH 2/4] Fix include_all_c_header_spec.rb to work in Docker tests --- spec/truffle/include_all_c_header_spec.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/spec/truffle/include_all_c_header_spec.rb b/spec/truffle/include_all_c_header_spec.rb index 206816b4255f..2cf1b8e52beb 100644 --- a/spec/truffle/include_all_c_header_spec.rb +++ b/spec/truffle/include_all_c_header_spec.rb @@ -10,8 +10,9 @@ describe 'lib/cext/include/internal_all.h' do it 'includes each *.h file from lib/cext/include/internal/' do - filenames = Dir.glob('internal/**/*.h', base: 'lib/cext/include', sort: true) - content = File.read('lib/cext/include/internal_all.h') + ruby_home = RbConfig::CONFIG['prefix'] + filenames = Dir.glob('internal/**/*.h', base: "#{ruby_home}/lib/cext/include", sort: true) + content = File.read("#{ruby_home}/lib/cext/include/internal_all.h") filenames.should_not be_empty @@ -20,8 +21,9 @@ end it 'includes each *.h file from lib/cext/include/stubs/internal/' do - filenames = Dir.glob('internal/**/*.h', base: 'lib/cext/include/stubs', sort: true) - content = File.read('lib/cext/include/internal_all.h') + ruby_home = RbConfig::CONFIG['prefix'] + filenames = Dir.glob('internal/**/*.h', base: "#{ruby_home}/lib/cext/include/stubs", sort: true) + content = File.read("#{ruby_home}/lib/cext/include/internal_all.h") filenames.should_not be_empty From de728530b9ccc87c5eabc47584ac157415b1a0f2 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 14 May 2025 17:04:24 +0200 Subject: [PATCH 3/4] Fixes for LANG=POSIX --- src/main/ruby/truffleruby/core/dir.rb | 2 +- src/main/ruby/truffleruby/core/env.rb | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/ruby/truffleruby/core/dir.rb b/src/main/ruby/truffleruby/core/dir.rb index 66fc503d9959..4cd66d6c345f 100644 --- a/src/main/ruby/truffleruby/core/dir.rb +++ b/src/main/ruby/truffleruby/core/dir.rb @@ -303,7 +303,7 @@ def glob(pattern, flags = 0, base: nil, sort: true, &block) total = matches.size while index < total - matches[index] = matches[index].encode(enc) unless matches[index].encoding == enc + matches[index] = matches[index].force_encoding(enc) unless matches[index].encoding == enc index += 1 end end diff --git a/src/main/ruby/truffleruby/core/env.rb b/src/main/ruby/truffleruby/core/env.rb index 10e7156d872c..a6b609d462ad 100644 --- a/src/main/ruby/truffleruby/core/env.rb +++ b/src/main/ruby/truffleruby/core/env.rb @@ -366,7 +366,11 @@ def set_encoding(value) if Encoding.default_internal && value.ascii_only? value = value.encode Encoding.default_internal, Encoding::LOCALE elsif value.encoding != Encoding::LOCALE - value = value.dup.force_encoding(Encoding::LOCALE) + if Encoding::LOCALE == Encoding::US_ASCII && !value.ascii_only? + value = value.b + else + value = value.dup.force_encoding(Encoding::LOCALE) + end end value.freeze end From f91dd3545d528fcad3e501b234b50c5acc0ad7d8 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 2 May 2025 12:52:48 +0200 Subject: [PATCH 4/4] No need to setup a locale anymore for truffleruby * Test using LANG=POSIX like the default in Docker images. --- CHANGELOG.md | 1 + README.md | 2 -- ci.jsonnet | 23 +++++++++++-------- doc/user/utf8-locale.md | 16 +++++++++---- spec/truffle/launcher_spec.rb | 6 ----- .../core/encoding/EncodingManager.java | 16 ------------- .../java/org/truffleruby/options/Options.java | 5 ---- src/options.yml | 1 - .../shared/options/OptionsCatalog.java | 12 ---------- tool/docker-configs.yaml | 14 ----------- tool/docker.rb | 6 ----- 11 files changed, 25 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c3777a0010e..ed28396534ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Compatibility: * Adjust a `FrozenError`'s message and add a receiver when a frozen module or class is modified (e.g. by defining or undefining an instance method or by defining a nested module (@andrykonchin). * Fix `Kernel#sprintf` and `%p` format specification to produce `"nil"` for `nil` argument (#3846, @andrykonchin). * Reimplement `Data#with` to not call `Data.new` that can be removed or redefined (#3890, @andrykonchin). +* TruffleRuby now supports the `POSIX` locale, the default locale in Docker images (@eregon). Performance: diff --git a/README.md b/README.md index 947714bc8568..e09a66ce067c 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,6 @@ environment, for example, by unmounting system filesystems such as `/dev/shm`. Without these dependencies, many libraries including RubyGems will not work. TruffleRuby will try to print a nice error message if a dependency is missing, but this can only be done on a best effort basis. -You also need to set up a [UTF-8 locale](doc/user/utf8-locale.md) if not already done. - See the [contributor workflow](doc/contributor/workflow.md) document if you wish to build TruffleRuby from source. ## Current Status diff --git a/ci.jsonnet b/ci.jsonnet index b7bd9b69963c..376c5648e8b2 100644 --- a/ci.jsonnet +++ b/ci.jsonnet @@ -245,38 +245,41 @@ local part_definitions = { platform: { local common_deps = common.deps.truffleruby + common.deps.sulong, + local locale = { + # We want to test with the POSIX locale, which is the default locale for Docker images. + # We need to override all locale-related env vars set by the CI. + environment+: { + LANG: "POSIX", + LC_ALL: "POSIX", + LC_CTYPE: "POSIX", + }, + }, - linux: common.linux_amd64 + common_deps + { + linux: common.linux_amd64 + common_deps + locale + { platform_name:: "LinuxAMD64", "$.cap":: { normal_machine: [], bench_machine: ["x52"] + self.normal_machine + ["no_frequency_scaling"], }, }, - linux_aarch64: common.linux_aarch64 + common_deps + { + linux_aarch64: common.linux_aarch64 + common_deps + locale + { platform_name:: "LinuxAArch64", "$.cap":: { normal_machine: [], }, }, - darwin_amd64: common.darwin_amd64 + common_deps + { + darwin_amd64: common.darwin_amd64 + common_deps + locale + { platform_name:: "DarwinAMD64", "$.cap":: { # GR-45839, GR-46279: exclude macmini_late_2014_8gb, they are too slow, have too little RAM and cause various timeouts normal_machine: ["darwin_bigsur", "!macmini_late_2014_8gb"], }, - environment+: { - LANG: "en_US.UTF-8", - }, }, - darwin_aarch64: common.darwin_aarch64 + common_deps + { + darwin_aarch64: common.darwin_aarch64 + common_deps + locale + { platform_name:: "DarwinAArch64", "$.cap":: { normal_machine: ["darwin_bigsur"], }, - environment+: { - LANG: "en_US.UTF-8", - }, }, }, diff --git a/doc/user/utf8-locale.md b/doc/user/utf8-locale.md index 80fd147f680c..3ed28dd469d5 100644 --- a/doc/user/utf8-locale.md +++ b/doc/user/utf8-locale.md @@ -6,17 +6,23 @@ permalink: /reference-manual/ruby/UTF8Locale/ --- # Setting Up a UTF-8 Locale -You need a UTF-8 locale to run some Ruby applications. -For example, we have found that RubyGems and ruby/spec need such a locale. +Since TruffleRuby 25.0, TruffleRuby supports the `POSIX` locale, the default locale in Docker images. +**So there is no need to set up a locale anymore.** -This is not needed if the `$LANG` environment variable is already set and: +Some Ruby applications however require setting up a proper locale (same on CRuby). +The instructions below explain how to do that. + +You can check the current locale using: ```bash locale ``` -shows no `="C"` and no warning. -Instead, all values should be `"en_US.UTF-8"` or other regions but still `.UTF-8`. +If that shows warnings, it probably means `LANG` is set to a locale which is not installed. + +These docs explain how to setup the `en_US.UTF-8` locale. + +As a note, the `C.UTF-8` locale also exists on Linux (but not on macOS) and might be more convenient as it does not require installing extra packages. ### Fedora-based: RHEL, Oracle Linux, etc diff --git a/spec/truffle/launcher_spec.rb b/spec/truffle/launcher_spec.rb index 10474bdeb10b..30974625b16e 100644 --- a/spec/truffle/launcher_spec.rb +++ b/spec/truffle/launcher_spec.rb @@ -404,12 +404,6 @@ def should_print_full_java_command(options, env: {}) end end - it "warns if the locale is not set properly" do - err = ruby_exe("Encoding.find('locale')", args: "2>&1", env: { "LC_ALL" => "C" }) - err.should.include? "[ruby] WARNING: Encoding.find('locale') is US-ASCII (due to nl_langinfo(CODESET) which returned " - err.should.include? "), this often indicates that the system locale is not set properly" - end - ['RUBYOPT', 'TRUFFLERUBYOPT'].each do |var| it "should recognize ruby --vm options in #{var}" do out = ruby_exe('print Truffle::System.get_java_property("foo")', env: { var => "#{ENV[var]} --vm.Dfoo=bar" }, args: @redirect) diff --git a/src/main/java/org/truffleruby/core/encoding/EncodingManager.java b/src/main/java/org/truffleruby/core/encoding/EncodingManager.java index ea3c53203d21..c4de78833abe 100644 --- a/src/main/java/org/truffleruby/core/encoding/EncodingManager.java +++ b/src/main/java/org/truffleruby/core/encoding/EncodingManager.java @@ -127,7 +127,6 @@ public void initializeDefaultEncodings(TruffleNFIPlatform nfi, NativeConfigurati private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguration nativeConfiguration) { final String localeEncodingName; - final String detector; if (nfi != null) { final int codeset = (int) nativeConfiguration.get("platform.langinfo.CODESET"); @@ -146,10 +145,8 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio context, InteropLibrary.getUncached(), 0); - detector = "nl_langinfo(CODESET)"; localeEncodingName = new String(bytes, StandardCharsets.US_ASCII); } else { - detector = "Charset.defaultCharset()"; localeEncodingName = Charset.defaultCharset().name(); } @@ -158,19 +155,6 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio rubyEncoding = Encodings.US_ASCII; } - if (context.getOptions().WARN_LOCALE && rubyEncoding == Encodings.US_ASCII) { - String firstLine = "Encoding.find('locale') is US-ASCII (due to " + detector + " which returned " + - localeEncodingName + "), this often indicates that the system locale is not set properly. "; - if ("C".equals(System.getenv("LANG")) && "C".equals(System.getenv("LC_ALL"))) { - // The parent process seems to explicitly want a C locale (e.g. EnvUtil#invoke_ruby in the MRI test harness), so only warn at config level in this case. - RubyLanguage.LOGGER.config(firstLine + "Warning at level=CONFIG because LANG=C and LC_ALL=C are set. " + - "Set LANG=en_US.UTF-8 and see https://www.graalvm.org/dev/reference-manual/ruby/UTF8Locale/ for details."); - } else { - RubyLanguage.LOGGER.warning(firstLine + - "Set LANG=en_US.UTF-8 and see https://www.graalvm.org/dev/reference-manual/ruby/UTF8Locale/ for details."); - } - } - localeEncoding = rubyEncoding; } diff --git a/src/main/java/org/truffleruby/options/Options.java b/src/main/java/org/truffleruby/options/Options.java index 03a71a2ee8ce..561665a83821 100644 --- a/src/main/java/org/truffleruby/options/Options.java +++ b/src/main/java/org/truffleruby/options/Options.java @@ -75,8 +75,6 @@ public final class Options { public final boolean VIRTUAL_THREAD_FIBERS; /** --log-subprocess=false */ public final boolean LOG_SUBPROCESS; - /** --warn-locale=true */ - public final boolean WARN_LOCALE; /** --exceptions-store-java=false */ public final boolean EXCEPTIONS_STORE_JAVA; /** --exceptions-print-java=false */ @@ -236,7 +234,6 @@ public Options(Env env, OptionValues options, LanguageOptions languageOptions) { HASHING_DETERMINISTIC = options.get(OptionsCatalog.HASHING_DETERMINISTIC_KEY); VIRTUAL_THREAD_FIBERS = options.get(OptionsCatalog.VIRTUAL_THREAD_FIBERS_KEY); LOG_SUBPROCESS = options.get(OptionsCatalog.LOG_SUBPROCESS_KEY); - WARN_LOCALE = options.get(OptionsCatalog.WARN_LOCALE_KEY); EXCEPTIONS_STORE_JAVA = options.get(OptionsCatalog.EXCEPTIONS_STORE_JAVA_KEY); EXCEPTIONS_PRINT_JAVA = options.get(OptionsCatalog.EXCEPTIONS_PRINT_JAVA_KEY); EXCEPTIONS_PRINT_UNCAUGHT_JAVA = options.get(OptionsCatalog.EXCEPTIONS_PRINT_UNCAUGHT_JAVA_KEY); @@ -357,8 +354,6 @@ public Object fromDescriptor(OptionDescriptor descriptor) { return VIRTUAL_THREAD_FIBERS; case "ruby.log-subprocess": return LOG_SUBPROCESS; - case "ruby.warn-locale": - return WARN_LOCALE; case "ruby.exceptions-store-java": return EXCEPTIONS_STORE_JAVA; case "ruby.exceptions-print-java": diff --git a/src/options.yml b/src/options.yml index 82dbfb693d09..9054cbe21c48 100644 --- a/src/options.yml +++ b/src/options.yml @@ -106,7 +106,6 @@ EXPERT: HASHING_DETERMINISTIC: [hashing-deterministic, boolean, false, Produce deterministic hash values] VIRTUAL_THREAD_FIBERS: [virtual-thread-fibers, boolean, false, 'Use VirtualThread for Fibers'] LOG_SUBPROCESS: [log-subprocess, boolean, false, 'Log whenever a subprocess is created'] # Also see --log-process-args - WARN_LOCALE: [warn-locale, boolean, true, 'Warn when the system locale is not set properly'] # Options to tweak backtraces EXCEPTIONS_STORE_JAVA: [exceptions-store-java, boolean, false, Store the Java exception with the Ruby backtrace] diff --git a/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java b/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java index adb40077bff8..b402250c5557 100644 --- a/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java +++ b/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java @@ -53,7 +53,6 @@ public final class OptionsCatalog { public static final OptionKey HASHING_DETERMINISTIC_KEY = new OptionKey<>(false); public static final OptionKey VIRTUAL_THREAD_FIBERS_KEY = new OptionKey<>(false); public static final OptionKey LOG_SUBPROCESS_KEY = new OptionKey<>(false); - public static final OptionKey WARN_LOCALE_KEY = new OptionKey<>(true); public static final OptionKey EXCEPTIONS_STORE_JAVA_KEY = new OptionKey<>(false); public static final OptionKey EXCEPTIONS_PRINT_JAVA_KEY = new OptionKey<>(false); public static final OptionKey EXCEPTIONS_PRINT_UNCAUGHT_JAVA_KEY = new OptionKey<>(false); @@ -429,14 +428,6 @@ public final class OptionsCatalog { .usageSyntax("") .build(); - public static final OptionDescriptor WARN_LOCALE = OptionDescriptor - .newBuilder(WARN_LOCALE_KEY, "ruby.warn-locale") - .help("Warn when the system locale is not set properly") - .category(OptionCategory.EXPERT) - .stability(OptionStability.EXPERIMENTAL) - .usageSyntax("") - .build(); - public static final OptionDescriptor EXCEPTIONS_STORE_JAVA = OptionDescriptor .newBuilder(EXCEPTIONS_STORE_JAVA_KEY, "ruby.exceptions-store-java") .help("Store the Java exception with the Ruby backtrace") @@ -1385,8 +1376,6 @@ public static OptionDescriptor fromName(String name) { return VIRTUAL_THREAD_FIBERS; case "ruby.log-subprocess": return LOG_SUBPROCESS; - case "ruby.warn-locale": - return WARN_LOCALE; case "ruby.exceptions-store-java": return EXCEPTIONS_STORE_JAVA; case "ruby.exceptions-print-java": @@ -1647,7 +1636,6 @@ public static OptionDescriptor[] allDescriptors() { HASHING_DETERMINISTIC, VIRTUAL_THREAD_FIBERS, LOG_SUBPROCESS, - WARN_LOCALE, EXCEPTIONS_STORE_JAVA, EXCEPTIONS_PRINT_JAVA, EXCEPTIONS_PRINT_UNCAUGHT_JAVA, diff --git a/tool/docker-configs.yaml b/tool/docker-configs.yaml index 6f00072150a2..df0a13208800 100644 --- a/tool/docker-configs.yaml +++ b/tool/docker-configs.yaml @@ -6,11 +6,8 @@ rpm: &rpm yaml: libyaml-devel cext: gcc make c++: gcc-c++ - set-locale: - - ENV LANG=en_US.UTF-8 deb: &deb - locale: locales tar: specs: netbase zlib: libz-dev @@ -18,45 +15,34 @@ deb: &deb yaml: libyaml-dev cext: gcc make c++: g++ - set-locale: - # Uncomment the en_US.UTF-8 line in /etc/locale.gen - - RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen - # locale-gen generates locales for all uncommented locales in /etc/locale.gen - - RUN locale-gen - - ENV LANG=en_US.UTF-8 # Too old g++ #ol7: # base: oraclelinux:7-slim # # --enablerepo needed for libyaml-devel # install: RUN yum install --enablerepo=ol7_optional_latest -y -# locale: # <<: *rpm ol8: base: oraclelinux:8-slim # --enablerepo needed for libyaml-devel install: RUN microdnf install --enablerepo=ol8_codeready_builder -y - locale: glibc-langpack-en <<: *rpm ol9: base: oraclelinux:9-slim # --enablerepo needed for libyaml-devel install: RUN microdnf install --enablerepo=ol9_codeready_builder -y - locale: glibc-langpack-en <<: *rpm fedora37: base: fedora:37 install: RUN dnf install -y - locale: glibc-langpack-en <<: *rpm fedora38: base: fedora:38 install: RUN dnf install -y - locale: glibc-langpack-en <<: *rpm ubuntu1804: diff --git a/tool/docker.rb b/tool/docker.rb index fcda7a97b700..0092c329927c 100644 --- a/tool/docker.rb +++ b/tool/docker.rb @@ -116,8 +116,6 @@ def docker(*args) run_post_install_hook = rebuild_openssl packages = [] - packages << distro.fetch('locale') - packages << distro.fetch('tar') packages << distro.fetch('specs') if full_test @@ -140,12 +138,8 @@ def docker(*args) "FROM #{distro.fetch('base')}", *proxy_vars, [distro.fetch('install'), *packages.compact].join(' '), - *distro.fetch('set-locale'), ] - # Check the locale is properly generated - lines << 'RUN locale -a | grep en_US.utf8' - lines << 'WORKDIR /test' lines << 'RUN useradd -ms /bin/bash test'