Skip to content

Commit 0367525

Browse files
committed
Remove GSL in favor of stdlib
We mostly used GSL for its `span` class. After moving to the C++20 standard, we now have `std::span` available to us. It is more explicit in how it works because it's driven by the standard, and it allows us not to rely on a third party library. `std::span` does no bound checks, so indexed access had to be guarded with explicit checks when necessary. Some helper functions were introduced, including equality operator. `Expect` macros were replaced with `if` statements that throw `std::invalid_argument` if the contract is not upheld. Changelog-changed: GSL is removed as dependency Changelog-changed: gsl::span is replaced with std::span Signed-off-by: Michal Siedlaczek <[email protected]>
1 parent d645b4e commit 0367525

36 files changed

+236
-171
lines changed

CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ target_link_libraries(pisa
127127
Boost::boost
128128
mio
129129
mio_base
130-
GSL
131130
spdlog
132131
fmt::fmt
133132
range-v3

external/CMakeLists.txt

-3
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ add_library(simdcomp STATIC ${CMAKE_CURRENT_SOURCE_DIR}/simdcomp/src/simdbitpack
4747
${CMAKE_CURRENT_SOURCE_DIR}/simdcomp/src/simdcomputil.c
4848
)
4949

50-
# Add GSL
51-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/GSL EXCLUDE_FROM_ALL)
52-
5350
# Add Boost
5451
if (NOT PISA_SYSTEM_BOOST)
5552
add_subdirectory(boost-cmake)

include/pisa/codec/block_codec_registry.hpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#pragma once
22

3+
#include <algorithm>
34
#include <memory>
5+
#include <span>
46
#include <string_view>
57

68
#include <fmt/format.h>
7-
#include <gsl/span>
89

910
#include "codec/block_codec.hpp"
1011

@@ -44,6 +45,6 @@ struct BlockCodecRegistry {
4445
/**
4546
* Lists the names of all known block codecs.
4647
*/
47-
[[nodiscard]] constexpr auto get_block_codec_names() -> gsl::span<std::string_view const>;
48+
[[nodiscard]] constexpr auto get_block_codec_names() -> std::span<std::string_view const>;
4849

4950
} // namespace pisa

include/pisa/concepts/container.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
// Copyright 2024 PISA developers
32
//
43
// Licensed under the Apache License, Version 2.0 (the "License");

include/pisa/invert.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#pragma once
22

33
#include <optional>
4+
#include <span>
45
#include <thread>
56
#include <vector>
67

7-
#include <gsl/span>
88
#include <range/v3/view/iota.hpp>
99
#include <tbb/blocked_range.h>
1010

@@ -16,7 +16,7 @@ namespace pisa { namespace invert {
1616
using PostingIterator = typename std::vector<Posting>::iterator;
1717
using Documents = std::unordered_map<Term_Id, std::vector<Document_Id>>;
1818
using Frequencies = std::unordered_map<Term_Id, std::vector<Frequency>>;
19-
using DocumentRange = gsl::span<gsl::span<Term_Id const>>;
19+
using DocumentRange = std::span<std::span<Term_Id const>>;
2020

2121
/// Inverted index abstraction used internally in the inverting process.
2222
///
@@ -42,7 +42,7 @@ namespace pisa { namespace invert {
4242

4343
/// A single slice view over a chunk of a forward index.
4444
struct ForwardIndexSlice {
45-
gsl::span<gsl::span<Term_Id const>> documents;
45+
std::span<std::span<Term_Id const>> documents;
4646
ranges::iota_view<Document_Id, Document_Id> document_ids;
4747
};
4848

include/pisa/io.hpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
#include <exception>
44
#include <filesystem>
55
#include <iostream>
6+
#include <span>
67
#include <string>
78
#include <vector>
89

9-
#include <gsl/span>
10-
1110
namespace pisa::io {
1211

1312
/// Indicates that a file was not found.
@@ -45,6 +44,6 @@ void for_each_line(std::istream& is, Function fn) {
4544
[[nodiscard]] auto load_data(std::string const& data_file) -> std::vector<char>;
4645

4746
/// Writes bytes to a file.
48-
void write_data(std::string const& data_file, gsl::span<std::byte const> bytes);
47+
void write_data(std::string const& data_file, std::span<std::byte const> bytes);
4948

5049
} // namespace pisa::io

include/pisa/linear_quantizer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include <cstdint>
44

55
#include <fmt/format.h>
6-
#include <gsl/gsl_assert>
76

87
namespace pisa {
98

include/pisa/memory_source.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <filesystem>
44
#include <memory>
5+
#include <span>
56
#include <vector>
67

7-
#include <gsl/span>
88
#include <mio/mmap.hpp>
99

1010
namespace pisa {
@@ -29,7 +29,7 @@ class MemorySource {
2929
/// Constructs a memory source from a vector.
3030
///
3131
/// NOTE: This is non-owning source, so tread carefully!
32-
[[nodiscard]] static auto from_span(gsl::span<char> span) -> MemorySource;
32+
[[nodiscard]] static auto from_span(std::span<char> span) -> MemorySource;
3333

3434
/// Constructs a memory source using a memory mapped file.
3535
///
@@ -65,13 +65,13 @@ class MemorySource {
6565
[[nodiscard]] auto size() const -> size_type;
6666

6767
/// Full span over memory.
68-
[[nodiscard]] auto span() const -> gsl::span<value_type const>;
68+
[[nodiscard]] auto span() const -> std::span<value_type const>;
6969

7070
/// Subspan of memory.
7171
///
7272
/// \throws std::out_of_range if offset + size is out of bounds
73-
[[nodiscard]] auto subspan(size_type offset, size_type size = gsl::dynamic_extent) const
74-
-> gsl::span<value_type const>;
73+
[[nodiscard]] auto subspan(size_type offset, size_type size = std::dynamic_extent) const
74+
-> std::span<value_type const>;
7575

7676
/// Type erasure interface. Any type implementing it are supported as memory source.
7777
struct Interface {

include/pisa/payload_vector.hpp

+20-20
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
#pragma once
22

3+
#include <algorithm>
34
#include <filesystem>
45
#include <fstream>
56
#include <iostream>
67
#include <iterator>
78
#include <optional>
9+
#include <span>
810
#include <string_view>
911
#include <vector>
1012

1113
#include <fmt/format.h>
12-
#include <gsl/gsl_assert>
13-
#include <gsl/span>
1414

1515
namespace pisa {
1616

@@ -24,8 +24,8 @@ namespace detail {
2424
using value_type = Payload_View;
2525
using difference_type = std::make_signed_t<size_type>;
2626

27-
typename gsl::span<size_type const>::iterator offset_iter;
28-
typename gsl::span<std::byte const>::iterator payload_iter;
27+
typename std::span<size_type const>::iterator offset_iter;
28+
typename std::span<std::byte const>::iterator payload_iter;
2929

3030
constexpr auto operator++() -> Payload_Vector_Iterator& {
3131
++offset_iter;
@@ -194,7 +194,7 @@ auto encode_payload_vector(InputIterator first, InputIterator last, PayloadEncod
194194
}
195195

196196
template <typename Payload, typename PayloadEncodingFn>
197-
auto encode_payload_vector(gsl::span<Payload const> values, PayloadEncodingFn encoding_fn) {
197+
auto encode_payload_vector(std::span<Payload const> values, PayloadEncodingFn encoding_fn) {
198198
return encode_payload_vector(values.begin(), values.end(), encoding_fn);
199199
}
200200

@@ -207,13 +207,13 @@ auto encode_payload_vector(InputIterator first, InputIterator last) {
207207
});
208208
}
209209

210-
inline auto encode_payload_vector(gsl::span<std::string const> values) {
210+
inline auto encode_payload_vector(std::span<std::string const> values) {
211211
return encode_payload_vector(values.begin(), values.end());
212212
}
213213

214214
template <typename... T>
215-
constexpr auto unpack_head(gsl::span<std::byte const> mem)
216-
-> std::tuple<T..., gsl::span<std::byte const>> {
215+
constexpr auto unpack_head(std::span<std::byte const> mem)
216+
-> std::tuple<T..., std::span<std::byte const>> {
217217
static_assert(detail::all_pod<T...>::value);
218218
auto offset = detail::sizeofs<T...>::value;
219219
if (offset > mem.size()) {
@@ -223,10 +223,10 @@ constexpr auto unpack_head(gsl::span<std::byte const> mem)
223223
}
224224
auto tail = mem.subspan(offset);
225225
auto head = detail::unpack<T...>(mem.data());
226-
return std::tuple_cat(head, std::tuple<gsl::span<std::byte const>>(tail));
226+
return std::tuple_cat(head, std::tuple<std::span<std::byte const>>(tail));
227227
}
228228

229-
[[nodiscard]] inline auto split(gsl::span<std::byte const> mem, std::size_t offset) {
229+
[[nodiscard]] inline auto split(std::span<std::byte const> mem, std::size_t offset) {
230230
if (offset > mem.size()) {
231231
throw std::runtime_error(
232232
fmt::format("Cannot split span of size {} at position {}", mem.size(), offset)
@@ -236,14 +236,14 @@ constexpr auto unpack_head(gsl::span<std::byte const> mem)
236236
}
237237

238238
template <typename T>
239-
[[nodiscard]] auto cast_span(gsl::span<std::byte const> mem) -> gsl::span<T const> {
239+
[[nodiscard]] auto cast_span(std::span<std::byte const> mem) -> std::span<T const> {
240240
auto type_size = sizeof(T);
241241
if (mem.size() % type_size != 0) {
242242
throw std::runtime_error(
243243
fmt::format("Failed to cast byte-span to span of T of size {}", type_size)
244244
);
245245
}
246-
return gsl::make_span(reinterpret_cast<T const*>(mem.data()), mem.size() / type_size);
246+
return std::span(reinterpret_cast<T const*>(mem.data()), mem.size() / type_size);
247247
}
248248

249249
template <typename Payload_View = std::string_view>
@@ -257,17 +257,17 @@ class Payload_Vector {
257257
explicit Payload_Vector(Payload_Vector_Buffer const& container)
258258
: offsets_(container.offsets), payloads_(container.payloads) {}
259259

260-
Payload_Vector(gsl::span<size_type const> offsets, gsl::span<std::byte const> payloads)
260+
Payload_Vector(std::span<size_type const> offsets, std::span<std::byte const> payloads)
261261
: offsets_(offsets), payloads_(payloads) {}
262262

263263
template <typename ContiguousContainer>
264264
[[nodiscard]] constexpr static auto from(ContiguousContainer&& mem) -> Payload_Vector {
265-
return from(gsl::make_span(reinterpret_cast<std::byte const*>(mem.data()), mem.size()));
265+
return from(std::span(reinterpret_cast<std::byte const*>(mem.data()), mem.size()));
266266
}
267267

268-
[[nodiscard]] static auto from(gsl::span<std::byte const> mem) -> Payload_Vector {
268+
[[nodiscard]] static auto from(std::span<std::byte const> mem) -> Payload_Vector {
269269
size_type length;
270-
gsl::span<std::byte const> tail;
270+
std::span<std::byte const> tail;
271271
try {
272272
std::tie(length, tail) = unpack_head<size_type>(mem);
273273
} catch (std::runtime_error const& err) {
@@ -276,7 +276,7 @@ class Payload_Vector {
276276
);
277277
}
278278

279-
gsl::span<std::byte const> offsets, payloads;
279+
std::span<std::byte const> offsets, payloads;
280280
try {
281281
std::tie(offsets, payloads) = split(tail, (length + 1U) * sizeof(size_type));
282282
} catch (std::runtime_error const& err) {
@@ -314,8 +314,8 @@ class Payload_Vector {
314314
}
315315

316316
private:
317-
gsl::span<size_type const> offsets_;
318-
gsl::span<std::byte const> payloads_;
317+
std::span<size_type const> offsets_;
318+
std::span<std::byte const> payloads_;
319319
};
320320

321321
/// Find the position of `value` in a sorted range.
@@ -339,7 +339,7 @@ auto binary_search(Iter begin, Iter end, T value, Compare cmp = std::less<>{})
339339
/// It calls the function overload that takes iterators. See that overload's documentation for more
340340
/// information.
341341
template <typename T, typename Compare = std::less<T>>
342-
auto binary_search(gsl::span<std::add_const_t<T>> range, T value, Compare cmp = std::less<T>{})
342+
auto binary_search(std::span<std::add_const_t<T>> range, T value, Compare cmp = std::less<T>{})
343343
-> std::optional<std::ptrdiff_t> {
344344
return pisa::binary_search(range.begin(), range.end(), value, cmp);
345345
}

include/pisa/reorder_docids.hpp

+13-9
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
#include <cstdint>
55
#include <fstream>
66
#include <random>
7+
#include <span>
78
#include <string>
89
#include <vector>
910

10-
#include <gsl/span>
1111
#include <spdlog/spdlog.h>
1212

1313
#include "binary_freq_collection.hpp"
1414
#include "payload_vector.hpp"
1515
#include "recursive_graph_bisection.hpp"
16+
#include "span.hpp"
1617
#include "util/inverted_index_utils.hpp"
1718
#include "util/progress.hpp"
1819

@@ -80,8 +81,8 @@ namespace detail {
8081
forward_index fwd = options.input_fwd
8182
? forward_index::read(*options.input_fwd)
8283
: forward_index::from_inverted_index(
83-
options.input_basename, options.min_length, options.compress_fwd
84-
);
84+
options.input_basename, options.min_length, options.compress_fwd
85+
);
8586

8687
if (options.output_fwd) {
8788
forward_index::write(fwd, *options.output_fwd);
@@ -137,7 +138,7 @@ struct ReorderOptions {
137138
inline auto reorder_postings(
138139
binary_freq_collection const& input,
139140
std::string_view output_basename,
140-
gsl::span<std::uint32_t const> mapping
141+
std::span<std::uint32_t const> mapping
141142
) {
142143
pisa::progress progress("Reassigning IDs in posting lists", input.size());
143144

@@ -149,7 +150,7 @@ inline auto reorder_postings(
149150
std::vector<std::pair<std::uint32_t, std::uint32_t>> posting_list;
150151
for (const auto& seq: input) {
151152
for (size_t i = 0; i < seq.docs.size(); ++i) {
152-
posting_list.emplace_back(mapping[seq.docs.begin()[i]], seq.freqs.begin()[i]);
153+
posting_list.emplace_back(pisa::at(mapping, seq.docs.begin()[i]), seq.freqs.begin()[i]);
153154
}
154155

155156
std::sort(posting_list.begin(), posting_list.end());
@@ -169,7 +170,7 @@ inline auto reorder_postings(
169170
inline auto reorder_lexicon(
170171
std::string const& input_lexicon,
171172
std::string const& output_lexicon,
172-
gsl::span<std::uint32_t const> mapping
173+
std::span<std::uint32_t const> mapping
173174
)
174175

175176
{
@@ -187,16 +188,19 @@ inline auto reorder_lexicon(
187188
inline auto reorder_sizes(
188189
binary_collection const& input_sizes,
189190
std::uint64_t num_docs,
190-
gsl::span<std::uint32_t const> mapping,
191+
std::span<std::uint32_t const> mapping,
191192
std::string_view output_basename
192193
) {
193194
pisa::progress progress("Reordering document sizes", num_docs);
194195
auto sizes = *input_sizes.begin();
195196
if (sizes.size() != num_docs) {
196197
throw std::invalid_argument("Invalid sizes file");
197198
}
199+
if (mapping.size() != num_docs) {
200+
throw std::invalid_argument("Invalid mapping size");
201+
}
198202

199-
auto size_sequence = gsl::span(sizes.begin(), sizes.size());
203+
auto size_sequence = std::span(sizes.begin(), sizes.size());
200204
std::vector<std::uint32_t> new_sizes(num_docs);
201205
for (size_t i = 0; i < num_docs; ++i) {
202206
new_sizes[mapping[i]] = size_sequence[i];
@@ -212,7 +216,7 @@ inline void reorder_from_mapping(
212216
binary_freq_collection const& input_collection,
213217
binary_collection const& input_sizes,
214218
ReorderOptions const& options,
215-
gsl::span<std::uint32_t const> mapping
219+
std::span<std::uint32_t const> mapping
216220
) {
217221
auto num_docs = input_collection.num_docs();
218222
reorder_sizes(input_sizes, num_docs, mapping, options.output_basename);

include/pisa/sharding.hpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
#pragma once
22

33
#include <optional>
4+
#include <span>
45

5-
#include <gsl/span>
66
#include <spdlog/spdlog.h>
77

8-
#include "io.hpp"
98
#include "type_safe.hpp"
109
#include "vec_map.hpp"
1110

@@ -18,10 +17,10 @@ format_shard(std::string_view basename, Shard_Id shard, std::string_view suffix
1817

1918
auto resolve_shards(std::string_view basename, std::string_view suffix = {}) -> std::vector<Shard_Id>;
2019

21-
auto mapping_from_files(std::istream* full_titles, gsl::span<std::istream*> shard_titles)
20+
auto mapping_from_files(std::istream* full_titles, std::span<std::istream*> shard_titles)
2221
-> VecMap<Document_Id, Shard_Id>;
2322

24-
auto mapping_from_files(std::string const& full_titles, gsl::span<std::string const> shard_titles)
23+
auto mapping_from_files(std::string const& full_titles, std::span<std::string const> shard_titles)
2524
-> VecMap<Document_Id, Shard_Id>;
2625

2726
auto create_random_mapping(

0 commit comments

Comments
 (0)