Skip to content

Commit

Permalink
Merge pull request #53 from OpenVicProject/fix/polish-todos
Browse files Browse the repository at this point in the history
Add `\x8F` to `Ę` conversion for Windows-1252
  • Loading branch information
Hop311 authored Aug 2, 2024
2 parents 8472800 + c7c0809 commit f754eb9
Show file tree
Hide file tree
Showing 5 changed files with 284 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ jobs:
with:
platform: ${{ matrix.platform }}
target: ${{ matrix.target }}
sconsflags: arch=${{ matrix.arch }} build_ovdl_library=yes run_ovdl_tests=yes
sconsflags: arch=${{ matrix.arch }} build_ovdl_library=yes run_ovdl_tests=yes ubuntu_gcc_invalid_char_hang_bug=${{ matrix.runner == 'ubuntu-20.04' }}

- name: Delete compilation files
if: ${{ matrix.platform == 'windows' }}
Expand Down
2 changes: 2 additions & 0 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ opts.Add(BoolVariable("run_ovdl_tests", "Build and run the openvic dataloader te
opts.Add(BoolVariable("build_ovdl_library", "Build the openvic dataloader library.", env.get("build_ovdl_library", not env.is_standalone)))
opts.Add(BoolVariable("build_ovdl_headless", "Build the openvic dataloader headless executable", env.is_standalone))

opts.Add(BoolVariable("ubuntu_gcc_invalid_char_hang_bug", "Skips test section which triggers a hang build for gcc-12 on ubuntu-20", False))

env.FinalizeOptions()

env.exposed_includes = []
Expand Down
110 changes: 66 additions & 44 deletions src/openvic-dataloader/detail/Convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,22 @@
#include "v2script/ParseState.hpp"

namespace ovdl::convert {
struct MappedChar {
char value;
std::string_view utf8;

constexpr bool is_invalid() const { return value == 0; }
constexpr bool is_pass() const { return value == 1; }
};
constexpr MappedChar invalid_map { 0, "" };
constexpr MappedChar pass_map { 1, "" };

struct map_value {
std::string_view _value;

constexpr map_value() noexcept : _value("") {}
constexpr map_value(std::nullptr_t) noexcept : _value("\0") {}
constexpr map_value(std::nullptr_t) noexcept : _value("\0", 1) {}
constexpr explicit map_value(std::string_view val) noexcept : _value(val) {}

constexpr bool is_invalid() const {
static constexpr map_value invalid_value() noexcept {
return map_value(nullptr);
}

constexpr bool is_invalid() const noexcept {
return !_value.empty() && _value[0] == '\0';
}

constexpr bool is_pass() const {
constexpr bool is_pass() const noexcept {
return _value.empty();
}

Expand Down Expand Up @@ -203,13 +197,19 @@ namespace ovdl::convert {
.map<'\xFC'>("ü")
.map<'\xFD'>("ý")
.map<'\xFE'>("þ")
.map<'\xFF'>("ÿ");
.map<'\xFF'>("ÿ")

// Paradox being special, invalid Windows-1252
// Used for (semantically incorrect) Polish localization TODOs
.map<'\x8F'>("Ę");

template<typename Reader>
static constexpr map_value try_parse(Reader& reader) {
auto index = map.try_parse(reader);
if (index) {
return map_value(map[index]);
} else if (*reader.position() < 0) {
return map_value::invalid_value();
}
return {};
}
Expand Down Expand Up @@ -358,6 +358,8 @@ namespace ovdl::convert {
auto index = map.try_parse(reader);
if (index) {
return map_value(map[index]);
} else if (*reader.position() < 0) {
return map_value::invalid_value();
}
return {};
}
Expand Down Expand Up @@ -405,6 +407,11 @@ namespace ovdl::convert {
break;
// Skip Ascii and Utf8 encoding
default: {
// If within ASCII range
if (c >= CharT {}) {
break;
}

map_value val = {};
CharT char_array[] { c, CharT() };
auto input = lexy::range_input(&char_array[0], &char_array[1]);
Expand Down Expand Up @@ -454,19 +461,24 @@ namespace ovdl::convert {
auto begin = reader.position();
auto last_it = begin;
while (reader.peek() != eof) {
map_value val = try_parse_map(state.encoding(), reader);
// If not within ASCII range
if (*reader.position() < 0) {
map_value val = try_parse_map(state.encoding(), reader);

if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
}
}

if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
while (reader.peek() != eof && *reader.position() > 0) {
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
}

reader.bump();
result.append(last_it, reader.position());
last_it = reader.position();
}
Expand Down Expand Up @@ -503,19 +515,24 @@ namespace ovdl::convert {
auto begin = reader.position();
auto last_it = begin;
while (reader.peek() != eof) {
map_value val = try_parse_map(state.encoding(), reader);
// If not within ASCII range
if (*reader.position() < 0) {
map_value val = try_parse_map(state.encoding(), reader);

if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
}
}

if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
while (reader.peek() != eof && *reader.position() > 0) {
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
}

reader.bump();
result.append(last_it, reader.position());
last_it = reader.position();
}
Expand Down Expand Up @@ -550,19 +567,24 @@ namespace ovdl::convert {
auto begin = reader.position();
auto last_it = begin;
while (reader.peek() != eof) {
map_value val = try_parse_map(state.encoding(), reader);
// If not within ASCII range
if (*reader.position() < 0) {
map_value val = try_parse_map(state.encoding(), reader);

if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
if (val.is_invalid()) {
Error::on_invalid_character(state, reader);
reader.bump();
continue;
} else if (!val.is_pass()) {
result.append(val._value);
last_it = reader.position();
continue;
}
}

reader.bump();
while (reader.peek() != eof && *reader.position() > 0) {
reader.bump();
}
result.append(last_it, reader.position());
last_it = reader.position();
}
Expand Down
5 changes: 5 additions & 0 deletions tests/SCsub
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ tests_env.tests_sources = env.GlobRecursive("*.cpp", [source_path])

SConscript("deps/SCsub", {"env": tests_env })

# Blame Ubuntu 22's GCC-12 distribution for this crap
# Compiler bug hangs if it can see if there is any reference to \x8F in a character
if env["ubuntu_gcc_invalid_char_hang_bug"]:
tests_env.Append(CPPDEFINES=["_OVDL_TEST_UBUNTU_GCC_12_BUG_"])

tests_program = tests_env.UnitTest(
source=tests_env.tests_sources,
target=os.path.join(BINDIR, tests_name),
Expand Down
Loading

0 comments on commit f754eb9

Please sign in to comment.