From d089386a1869c35ebf7a064078e7e6272ebf18f6 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 19 Nov 2024 23:28:40 +0100 Subject: [PATCH 1/5] Move table creator helpers to the start of Oracle test No real changes, just make it possible to use these helpers in the unit test to be added. This commit is best viewed using Git --color-moved option. --- tests/oracle/test-oracle.cpp | 135 ++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/tests/oracle/test-oracle.cpp b/tests/oracle/test-oracle.cpp index d2924cd59..f38ea3451 100644 --- a/tests/oracle/test-oracle.cpp +++ b/tests/oracle/test-oracle.cpp @@ -25,6 +25,74 @@ using namespace soci::tests; std::string connectString; backend_factory const &backEnd = *soci::factory_oracle(); +// Helpers for creating tables for different tests. +struct table_creator_one : public table_creator_base +{ + table_creator_one(soci::session & sql) + : table_creator_base(sql) + { + sql << "create table soci_test(id number(10,0), val number(8,0), c char, " + "str varchar2(20), sh number, ll number, ul number, d number, " + "num76 numeric(7,6), " + "tm date, i1 number, i2 number, i3 number, name varchar2(20))"; + } +}; + +struct table_creator_two : public table_creator_base +{ + table_creator_two(soci::session & sql) + : table_creator_base(sql) + { + sql << "create table soci_test(num_float number, num_int numeric(4,0)," + " name varchar2(20), sometime date, chr char)"; + } +}; + +struct table_creator_three : public table_creator_base +{ + table_creator_three(soci::session & sql) + : table_creator_base(sql) + { + sql << "create table soci_test(name varchar2(100) not null, " + "phone varchar2(15))"; + } +}; + +struct table_creator_four : public table_creator_base +{ + table_creator_four(soci::session & sql) + : table_creator_base(sql) + { + sql << "create table soci_test(val number)"; + } +}; + +struct table_creator_for_xml : table_creator_base +{ + table_creator_for_xml(soci::session& sql) + : table_creator_base(sql) + { + sql << "create table soci_test(id integer, x xmltype)"; + } +}; + +struct table_creator_for_clob : table_creator_base +{ + table_creator_for_clob(soci::session& sql) + : table_creator_base(sql) + { + sql << "create table soci_test(id integer, s clob)"; + } +}; + +struct table_creator_for_blob : public tests::table_creator_base +{ + table_creator_for_blob(soci::session &sql) : tests::table_creator_base(sql) + { + sql << "create table soci_test(id integer, b blob)"; + } +}; + struct table_creator_for_timestamp : public tests::table_creator_base { table_creator_for_timestamp(soci::session &sql) : tests::table_creator_base(sql) @@ -1411,73 +1479,6 @@ TEST_CASE("Bulk iterators", "[oracle][bulkiters]") // Support for soci Common Tests // -struct table_creator_one : public table_creator_base -{ - table_creator_one(soci::session & sql) - : table_creator_base(sql) - { - sql << "create table soci_test(id number(10,0), val number(8,0), c char, " - "str varchar2(20), sh number, ll number, ul number, d number, " - "num76 numeric(7,6), " - "tm date, i1 number, i2 number, i3 number, name varchar2(20))"; - } -}; - -struct table_creator_two : public table_creator_base -{ - table_creator_two(soci::session & sql) - : table_creator_base(sql) - { - sql << "create table soci_test(num_float number, num_int numeric(4,0)," - " name varchar2(20), sometime date, chr char)"; - } -}; - -struct table_creator_three : public table_creator_base -{ - table_creator_three(soci::session & sql) - : table_creator_base(sql) - { - sql << "create table soci_test(name varchar2(100) not null, " - "phone varchar2(15))"; - } -}; - -struct table_creator_four : public table_creator_base -{ - table_creator_four(soci::session & sql) - : table_creator_base(sql) - { - sql << "create table soci_test(val number)"; - } -}; - -struct table_creator_for_xml : table_creator_base -{ - table_creator_for_xml(soci::session& sql) - : table_creator_base(sql) - { - sql << "create table soci_test(id integer, x xmltype)"; - } -}; - -struct table_creator_for_clob : table_creator_base -{ - table_creator_for_clob(soci::session& sql) - : table_creator_base(sql) - { - sql << "create table soci_test(id integer, s clob)"; - } -}; - -struct table_creator_for_blob : public tests::table_creator_base -{ - table_creator_for_blob(soci::session &sql) : tests::table_creator_base(sql) - { - sql << "create table soci_test(id integer, b blob)"; - } -}; - class test_context :public test_context_common { public: From 2b9afba3f5f2c85494d456c572948b4aa0f5a8fa Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 19 Nov 2024 23:29:46 +0100 Subject: [PATCH 2/5] Make procedure_creator_base helper more flexible and rename it This will allow to use it for creating functions as well as procedures (and also procedures with a name other than "soci_test" if this is ever needed). --- tests/oracle/test-oracle.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/oracle/test-oracle.cpp b/tests/oracle/test-oracle.cpp index f38ea3451..73fe48077 100644 --- a/tests/oracle/test-oracle.cpp +++ b/tests/oracle/test-oracle.cpp @@ -307,27 +307,30 @@ TEST_CASE("Oracle rowid", "[oracle][rowid]") } // Stored procedures -class procedure_creator_base +class creator_base { public: - procedure_creator_base(session& sql) - : msession(sql) { drop(); } + explicit creator_base(session& sql, std::string what = "procedure soci_test") + : msession(sql), what_{std::move(what)} { drop(); } + + ~creator_base() { drop();} - virtual ~procedure_creator_base() { drop();} private: void drop() { - try { msession << "drop procedure soci_test"; } catch (soci_error&) {} + try { msession << "drop " + what_; } catch (soci_error&) {} } session& msession; - SOCI_NOT_COPYABLE(procedure_creator_base) + std::string const what_; + + SOCI_NOT_COPYABLE(creator_base) }; -struct procedure_creator : procedure_creator_base +struct procedure_creator : creator_base { procedure_creator(soci::session & sql) - : procedure_creator_base(sql) + : creator_base(sql) { sql << "create or replace procedure soci_test(output out varchar2," @@ -393,20 +396,20 @@ namespace soci }; } -struct in_out_procedure_creator : public procedure_creator_base +struct in_out_procedure_creator : public creator_base { in_out_procedure_creator(soci::session & sql) - : procedure_creator_base(sql) + : creator_base(sql) { sql << "create or replace procedure soci_test(s in out varchar2)" " as begin s := s || s; end;"; } }; -struct returns_null_procedure_creator : public procedure_creator_base +struct returns_null_procedure_creator : public creator_base { returns_null_procedure_creator(soci::session & sql) - : procedure_creator_base(sql) + : creator_base(sql) { sql << "create or replace procedure soci_test(s in out varchar2)" " as begin s := NULL; end;"; @@ -842,10 +845,10 @@ struct person_table_creator : public table_creator_base } }; -struct times100_procedure_creator : public procedure_creator_base +struct times100_procedure_creator : public creator_base { times100_procedure_creator(soci::session & sql) - : procedure_creator_base(sql) + : creator_base(sql) { sql << "create or replace procedure soci_test(id in out number)" " as begin id := id * 100; end;"; From 5e9f2000013874a10ffbf52a7d45e45874db5400 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 19 Nov 2024 23:30:26 +0100 Subject: [PATCH 3/5] Read entire Oracle CLOB and not just its length in characters We need to read the entire contents of the CLOB in Oracle backend and not just the number of bytes corresponding to its length in characters as returned by OCILobGetLength() because this may (and will) be strictly less than its full size in bytes for any encoding using multiple bytes per character, such as the de facto standard UTF-8. Also make reading CLOBs more efficient by doing what Oracle documentation suggests and using the LOB chunk size for reading. Finally, add a unit test checking that using non-ASCII strings in UTF-8 (which had to be enabled for the CI) with CLOBs does work. This commit is best viewed ignoring whitespace-only changes. --- scripts/ci/oracle.sh | 3 ++ src/backends/oracle/standard-into-type.cpp | 57 ++++++++++++---------- tests/oracle/test-oracle.cpp | 50 +++++++++++++++++++ 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/scripts/ci/oracle.sh b/scripts/ci/oracle.sh index 4afba9e9b..e7c0d4653 100644 --- a/scripts/ci/oracle.sh +++ b/scripts/ci/oracle.sh @@ -15,6 +15,9 @@ export ORACLE_SID=XE # Add path to Oracle libraries. export LD_LIBRARY_PATH=$ORACLE_HOME/lib +# We need to tell Oracle to use UTF-8 for the tests using non-ASCII strings. +export NLS_LANG=.AL32UTF8 + # Execute any command in the Oracle container: pass the command with its # arguments directly to the function. oracle_exec() diff --git a/src/backends/oracle/standard-into-type.cpp b/src/backends/oracle/standard-into-type.cpp index 5c0e78f19..8106490c9 100644 --- a/src/backends/oracle/standard-into-type.cpp +++ b/src/backends/oracle/standard-into-type.cpp @@ -235,41 +235,48 @@ void oracle_standard_into_type_backend::pre_fetch() void oracle::read_from_lob(oracle_session_backend& session, OCILobLocator * lobp, std::string & value) { - ub4 len; - - sword res = OCILobGetLength(session.svchp_, session.errhp_, - lobp, &len); + // We can't get the CLOB size in bytes directly, only in characters, which + // is useless for UTF-8 as it doesn't tell us how much memory do we + // actually need for storing it, so we'd have to allocate 4 bytes for every + // character which could be a huge overkill. So instead read the CLOB in + // chunks of its natural size until we get everything. + ub4 len = 0; + sword res = OCILobGetChunkSize(session.svchp_, session.errhp_, lobp, &len); if (res != OCI_SUCCESS) { throw_oracle_soci_error(res, session.errhp_); } - std::vector buf(len); + value.clear(); + + if (!len) + return; - if (len != 0) + // Read the LOB in chunks into the buffer while anything remains to be read. + std::vector buf(len); + ub4 offset = 1; + do { - ub4 lenChunk = len; - ub4 offset = 1; - do + // By setting the input length to 0, we tell Oracle to read as many + // bytes as possible (so called "streaming" mode). + ub4 lenChunk = 0; + res = OCILobRead(session.svchp_, session.errhp_, lobp, + &lenChunk, offset, + &buf[0], len, + 0, 0, 0, 0); + + if (res == OCI_NEED_DATA) { - res = OCILobRead(session.svchp_, session.errhp_, - lobp, &lenChunk, - offset, - reinterpret_cast(&buf[offset - 1]), - len, 0, 0, 0, 0); - if (res == OCI_NEED_DATA) - { - offset += lenChunk; - } - else if (res != OCI_SUCCESS) - { - throw_oracle_soci_error(res, session.errhp_); - } + offset += lenChunk; + } + else if (res != OCI_SUCCESS) + { + throw_oracle_soci_error(res, session.errhp_); } - while (res == OCI_NEED_DATA); - } - value.assign(buf.begin(), buf.end()); + value.append(buf.begin(), buf.begin() + lenChunk); + } + while (res == OCI_NEED_DATA); } void oracle_standard_into_type_backend::post_fetch( diff --git a/tests/oracle/test-oracle.cpp b/tests/oracle/test-oracle.cpp index 73fe48077..df0338f4c 100644 --- a/tests/oracle/test-oracle.cpp +++ b/tests/oracle/test-oracle.cpp @@ -1478,6 +1478,56 @@ TEST_CASE("Bulk iterators", "[oracle][bulkiters]") sql << "drop table t"; } +TEST_CASE ( "Oracle CLOB", "[oracle][clob]" ) +{ + soci::session sql ( backEnd, connectString ); + + // Use a non-ASCII string to test that CLOBs work when byte count differs + // from character count. + // + // Note that this requires some Unicode encoding to be used, e.g. UTF-8 by + // setting NLS_LANG=.AL32UTF8 in the environment. + std::string const test_utf8{"Привет système"}; + + table_creator_for_clob clob_table(sql); + long_string ls; + ls.value = test_utf8; + sql << "insert into soci_test(id, s) values(1, :s)", use(ls); + ls.value.clear(); + sql << "select s from soci_test where id=1", into(ls); + CHECK(ls.value == test_utf8); + + // RAII helper to create the function we use below. + struct soci_repeat : creator_base + { + explicit soci_repeat(session& sql) + : creator_base(sql, "function soci_repeat") + { + sql << R"( +create function soci_repeat(s in string, xCount in integer) return clob as + tmp clob; +begin + for i in 1..xCount loop tmp := tmp || s; end loop; + return tmp; +end; +)" + ; + } + } soci_repeat_func(sql); + + // Append a big number of Unicode chars to the CLOB to test that things + // work with CLOBs larger than a single chunk (which is ~8KiB by default). + // + // Note: Oracle 11 used in the CI tests doesn't seem to handle characters + // outside of the BMP correctly even if current Oracle versions have no + // problems with them, so don't use them here for now. + unsigned xCount = 10000; + sql << "select :s || soci_repeat('Я', :xCount) from dual", + use(ls), use(xCount), into(ls); + + REQUIRE(ls.value.length() == test_utf8.length() + 2*xCount); +} + // // Support for soci Common Tests // From ba202d03f4894043057457c84ab1d5e37c1e4f68 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 20 Nov 2024 15:13:36 +0100 Subject: [PATCH 4/5] Remove unused offset from OCILobRead() calls On "streaming mode" the offset is ignored, except for the first call, so don't bother updating it. Also restructure the code in a slightly simpler way. --- src/backends/oracle/standard-into-type.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/backends/oracle/standard-into-type.cpp b/src/backends/oracle/standard-into-type.cpp index 8106490c9..7593d2b96 100644 --- a/src/backends/oracle/standard-into-type.cpp +++ b/src/backends/oracle/standard-into-type.cpp @@ -254,29 +254,33 @@ void oracle::read_from_lob(oracle_session_backend& session, // Read the LOB in chunks into the buffer while anything remains to be read. std::vector buf(len); - ub4 offset = 1; - do + for (bool done = false; !done; ) { // By setting the input length to 0, we tell Oracle to read as many // bytes as possible (so called "streaming" mode). ub4 lenChunk = 0; res = OCILobRead(session.svchp_, session.errhp_, lobp, - &lenChunk, offset, + &lenChunk, + 1, // Only used for the first chunk, ignored later. &buf[0], len, 0, 0, 0, 0); - if (res == OCI_NEED_DATA) + switch (res) { - offset += lenChunk; - } - else if (res != OCI_SUCCESS) - { - throw_oracle_soci_error(res, session.errhp_); + case OCI_NEED_DATA: + // Nothing to do, just continue reading. + break; + + case OCI_SUCCESS: + done = true; + break; + + default: + throw_oracle_soci_error(res, session.errhp_); } value.append(buf.begin(), buf.begin() + lenChunk); } - while (res == OCI_NEED_DATA); } void oracle_standard_into_type_backend::post_fetch( From 0db57970c79ef7612201a8269ae7a7807747365d Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 20 Nov 2024 15:18:44 +0100 Subject: [PATCH 5/5] Avoid extra copy of the data in oracle::read_from_lob() Read directly into the provided string instead of reading into a temporary buffer and then copying into the string. --- src/backends/oracle/standard-into-type.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/backends/oracle/standard-into-type.cpp b/src/backends/oracle/standard-into-type.cpp index 7593d2b96..366ca8af1 100644 --- a/src/backends/oracle/standard-into-type.cpp +++ b/src/backends/oracle/standard-into-type.cpp @@ -253,16 +253,18 @@ void oracle::read_from_lob(oracle_session_backend& session, return; // Read the LOB in chunks into the buffer while anything remains to be read. - std::vector buf(len); for (bool done = false; !done; ) { + auto const prevSize = value.size(); + value.resize(prevSize + len); + // By setting the input length to 0, we tell Oracle to read as many // bytes as possible (so called "streaming" mode). ub4 lenChunk = 0; res = OCILobRead(session.svchp_, session.errhp_, lobp, &lenChunk, 1, // Only used for the first chunk, ignored later. - &buf[0], len, + const_cast(value.data()) + prevSize, len, 0, 0, 0, 0); switch (res) @@ -279,8 +281,12 @@ void oracle::read_from_lob(oracle_session_backend& session, throw_oracle_soci_error(res, session.errhp_); } - value.append(buf.begin(), buf.begin() + lenChunk); + value.resize(prevSize + lenChunk); } + + // We may have over-allocated the string, especially when a big chunk size + // is used, so release the unused memory. + value.shrink_to_fit(); } void oracle_standard_into_type_backend::post_fetch(