Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storage prefixes always #954

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions libgraph/include/katana/PropertyGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,31 @@ class KATANA_EXPORT PropertyGraph {
const katana::RDGLoadOptions& opts = katana::RDGLoadOptions());

/// Make a property graph from topology
// [deprecated("please provide a storage prefix")]
static Result<std::unique_ptr<PropertyGraph>> Make(
const URI& rdg_dir, GraphTopology&& topo_to_assign);

/// Make a property graph from topology and associate it with an ephemeral
/// storage prefix. This is approximates an in-memory graph.
static Result<std::unique_ptr<PropertyGraph>> MakeEphemeral(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I support Tyler's suggestion of keeping the name Make

GraphTopology&& topo_to_assign);

/// Make a property graph from topology
static Result<std::unique_ptr<PropertyGraph>> Make(
const URI& rdg_dir, GraphTopology&& topo_to_assign);
[[deprecated("please provide a storage prefix")]] static Result<
std::unique_ptr<PropertyGraph>>
Make(GraphTopology&& topo_to_assign);

/// Make a property graph from topology and type arrays and associate it with
/// an ephemeral storage prefix. This approximates an in-memory graph.
static Result<std::unique_ptr<PropertyGraph>> MakeEphemeral(
GraphTopology&& topo_to_assign, EntityTypeIDArray&& node_entity_type_ids,
EntityTypeIDArray&& edge_entity_type_ids,
EntityTypeManager&& node_type_manager,
EntityTypeManager&& edge_type_manager);

/// Make a property graph from topology and type arrays
// [deprecated("please provide a storage prefix")]
static Result<std::unique_ptr<PropertyGraph>> Make(
[[deprecated("please provide a storage prefix")]] static Result<
std::unique_ptr<PropertyGraph>>
Make(
GraphTopology&& topo_to_assign, EntityTypeIDArray&& node_entity_type_ids,
EntityTypeIDArray&& edge_entity_type_ids,
EntityTypeManager&& node_type_manager,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,9 @@ struct ClusteringImplementationBase {
// Thus any property index indirection of the original topology has to be dropped.
katana::GraphTopology topo_copy =
GraphTopology::CopyWithoutPropertyIndexes(pfg_from.topology());
return katana::PropertyGraph::Make(std::move(topo_copy));
// PR question: is this okay? My guess is that this will be written to
// rarely if ever?
return katana::PropertyGraph::MakeEphemeral(std::move(topo_copy));
}

/**
Expand Down Expand Up @@ -681,7 +683,8 @@ struct ClusteringImplementationBase {

GraphTopology topo_next{
std::move(prefix_edges_count), std::move(out_dests_next)};
auto pfg_next_res = katana::PropertyGraph::Make(std::move(topo_next));
auto pfg_next_res =
katana::PropertyGraph::MakeEphemeral(std::move(topo_next));

if (!pfg_next_res) {
return pfg_next_res.error();
Expand Down
3 changes: 2 additions & 1 deletion libgraph/src/BuildGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,8 @@ ImportData::ValueFromArrowScalar(std::shared_ptr<arrow::Scalar> scalar) {
katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::ConvertToPropertyGraph(
katana::GraphComponents&& graph_comps, katana::TxnContext* txn_ctx) {
auto pg_result = katana::PropertyGraph::Make(std::move(graph_comps.topology));
auto pg_result =
katana::PropertyGraph::MakeEphemeral(std::move(graph_comps.topology));
if (!pg_result) {
return pg_result.error().WithContext("adding topology");
}
Expand Down
78 changes: 51 additions & 27 deletions libgraph/src/PropertyGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,25 @@ katana::PropertyGraph::Make(
std::move(rdg_file), std::move(rdg), txn_ctx);
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::Make(
const katana::URI& rdg_dir, katana::GraphTopology&& topo_to_assign) {
return Make(
rdg_dir, std::move(topo_to_assign),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumNodes()),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumEdges()),
EntityTypeManager{}, EntityTypeManager{});
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::MakeEphemeral(katana::GraphTopology&& topo_to_assign) {
return MakeEphemeral(
std::move(topo_to_assign),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumNodes()),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumEdges()),
EntityTypeManager{}, EntityTypeManager{});
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::Make(katana::GraphTopology&& topo_to_assign) {
return std::make_unique<katana::PropertyGraph>(
Expand All @@ -245,12 +264,36 @@ katana::PropertyGraph::Make(katana::GraphTopology&& topo_to_assign) {

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::Make(
const katana::URI& rdg_dir, katana::GraphTopology&& topo_to_assign) {
return Make(
rdg_dir, std::move(topo_to_assign),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumNodes()),
MakeDefaultEntityTypeIDArray(topo_to_assign.NumEdges()),
EntityTypeManager{}, EntityTypeManager{});
const katana::URI& rdg_dir, katana::GraphTopology&& topo_to_assign,
NUMAArray<EntityTypeID>&& node_entity_type_ids,
NUMAArray<EntityTypeID>&& edge_entity_type_ids,
EntityTypeManager&& node_type_manager,
EntityTypeManager&& edge_type_manager) {
auto retval = std::make_unique<katana::PropertyGraph>(
std::unique_ptr<katana::RDGFile>(), katana::RDG{},
std::move(topo_to_assign), std::move(node_entity_type_ids),
std::move(edge_entity_type_ids), std::move(node_type_manager),
std::move(edge_type_manager));
// It doesn't make sense to pass a RDGFile to the constructor because this
// PropertyGraph wasn't loaded from a file. But all PropertyGraphs have an
// associated storage location, so set one here.
retval->rdg().set_rdg_dir(rdg_dir);
return retval;
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::MakeEphemeral(
katana::GraphTopology&& topo_to_assign,
NUMAArray<EntityTypeID>&& node_entity_type_ids,
NUMAArray<EntityTypeID>&& edge_entity_type_ids,
EntityTypeManager&& node_type_manager,
EntityTypeManager&& edge_type_manager) {
auto rdg = KATANA_CHECKED(RDG::MakeEphemeral());
return std::make_unique<katana::PropertyGraph>(
std::unique_ptr<katana::RDGFile>(), std::move(rdg),
std::move(topo_to_assign), std::move(node_entity_type_ids),
std::move(edge_entity_type_ids), std::move(node_type_manager),
std::move(edge_type_manager));
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
Expand Down Expand Up @@ -323,25 +366,6 @@ katana::PropertyGraph::MakeEmptyProjectedGraph(
pg, 0, nodes_bitset, std::move(original_to_projected_nodes_mapping), {});
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::Make(
const katana::URI& rdg_dir, katana::GraphTopology&& topo_to_assign,
NUMAArray<EntityTypeID>&& node_entity_type_ids,
NUMAArray<EntityTypeID>&& edge_entity_type_ids,
EntityTypeManager&& node_type_manager,
EntityTypeManager&& edge_type_manager) {
auto retval = std::make_unique<katana::PropertyGraph>(
std::unique_ptr<katana::RDGFile>(), katana::RDG{},
std::move(topo_to_assign), std::move(node_entity_type_ids),
std::move(edge_entity_type_ids), std::move(node_type_manager),
std::move(edge_type_manager));
// It doesn't make sense to pass a RDGFile to the constructor because this
// PropertyGraph wasn't loaded from a file. But all PropertyGraphs have an
// associated storage location, so set one here.
retval->rdg().set_rdg_dir(rdg_dir);
return retval;
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
katana::PropertyGraph::Copy(katana::TxnContext* txn_ctx) const {
return Copy(
Expand Down Expand Up @@ -1593,7 +1617,7 @@ katana::CreateSymmetricGraph(katana::PropertyGraph* pg) {
katana::no_stats());

GraphTopology sym_topo(std::move(out_indices), std::move(out_dests));
return katana::PropertyGraph::Make(std::move(sym_topo));
return katana::PropertyGraph::MakeEphemeral(std::move(sym_topo));
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
Expand Down Expand Up @@ -1660,7 +1684,7 @@ katana::CreateTransposeGraphTopology(const GraphTopology& topology) {
katana::no_stats());

GraphTopology transpose_topo{std::move(out_indices), std::move(out_dests)};
return katana::PropertyGraph::Make(std::move(transpose_topo));
return katana::PropertyGraph::MakeEphemeral(std::move(transpose_topo));
}

bool
Expand Down
2 changes: 1 addition & 1 deletion libgraph/src/TopologyGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ MakeTopologyImpl(F builder_fun) {
builder_fun(builder);

katana::GraphTopology topo = builder.ConvertToCSR();
auto res = katana::PropertyGraph::Make(std::move(topo));
auto res = katana::PropertyGraph::MakeEphemeral(std::move(topo));
KATANA_LOG_ASSERT(res);
return std::move(res.value());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ SubGraphNodeSet(

katana::GraphTopology sub_g_topo{
std::move(out_indices), std::move(out_dests)};
auto sub_g_res = katana::PropertyGraph::Make(std::move(sub_g_topo));
auto sub_g_res = katana::PropertyGraph::MakeEphemeral(std::move(sub_g_topo));

return sub_g_res;
}
Expand Down
2 changes: 1 addition & 1 deletion libgraph/test/TestTypedPropertyGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ MakeFileGraph(
katana::GraphTopology topo{
indices.data(), indices.size(), dests.data(), dests.size()};

auto g_res = katana::PropertyGraph::Make(std::move(topo));
auto g_res = katana::PropertyGraph::MakeEphemeral(std::move(topo));
KATANA_LOG_ASSERT(g_res);

std::unique_ptr<katana::PropertyGraph> g = std::move(g_res.value());
Expand Down
6 changes: 4 additions & 2 deletions libgraph/test/property-graph-transposed-view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ TestTransposedView() {
builder_tr.AddEdge(n2, n1);
}

auto pg = KATANA_CHECKED(PropertyGraph::Make(builder.ConvertToCSR()));
auto pg =
KATANA_CHECKED(PropertyGraph::MakeEphemeral(builder.ConvertToCSR()));
TransposedGraphView pg_tr_view = pg->BuildView<TransposedGraphView>();

auto pg_tr = KATANA_CHECKED(PropertyGraph::Make(builder_tr.ConvertToCSR()));
auto pg_tr =
KATANA_CHECKED(PropertyGraph::MakeEphemeral(builder_tr.ConvertToCSR()));

for (Edge e : pg_tr_view.OutEdges()) {
KATANA_LOG_VASSERT(
Expand Down
4 changes: 2 additions & 2 deletions libkatana_python_native/src/ImportData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ katana::python::InitImportData(py::module& m) {
[](py::array_t<PropertyGraph::Edge> edge_indices,
py::array_t<PropertyGraph::Node> edge_destinations)
-> Result<std::shared_ptr<PropertyGraph>> {
return KATANA_CHECKED(katana::PropertyGraph::Make(
return KATANA_CHECKED(katana::PropertyGraph::MakeEphemeral(
TopologyFromCSR(edge_indices, edge_destinations)));
Comment on lines +42 to 43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A reasonable usage of this python function is to call it and then call write on the result to store it to a specific location. Will that work correctly? It relies on writing the RDG to a different location compared to where it started (in the tmp dir).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought a lot about that use case when I was writing this so it should work correctly. But I need to check to see if we are testing it currently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure I test it from Python.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you point me in the right direction to short-circuit my search a little?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pytest.mark.required_env("KATANA_TEST_DATASETS")

Through the power of searching for write then looking at the tests for importing.

},
R"""(
Expand Down Expand Up @@ -75,7 +75,7 @@ katana::python::InitImportData(py::module& m) {
edge_types_owned.begin());
EntityTypeManager node_type_manager_owned = node_type_manager;
EntityTypeManager edge_type_manager_owned = edge_type_manager;
return KATANA_CHECKED(katana::PropertyGraph::Make(
return KATANA_CHECKED(katana::PropertyGraph::MakeEphemeral(
TopologyFromCSR(edge_indices, edge_destinations),
std::move(node_types_owned), std::move(edge_types_owned),
std::move(node_type_manager_owned),
Expand Down
2 changes: 1 addition & 1 deletion libsupport/include/katana/Env.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ KATANA_EXPORT bool GetEnv(const std::string& var_name);
/// \param var_name name of the variable
/// \param[out] ret where to store the value of environment variable
/// \return true if environment variable set and value was successfully parsed;
/// false otherwise
/// false otherwise (with no change to value pointed to by \p ret)
KATANA_EXPORT bool GetEnv(const std::string& var_name, bool* ret);
KATANA_EXPORT bool GetEnv(const std::string& var_name, int* ret);
KATANA_EXPORT bool GetEnv(const std::string& var_name, double* ret);
Expand Down
21 changes: 21 additions & 0 deletions libsupport/include/katana/URI.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ class KATANA_EXPORT URI {
static Result<URI> MakeFromFile(const std::string& str);
/// Append a '-' and then a random string to input
static Result<URI> MakeRand(const std::string& str);
/// Make a URI for a local user-configurable temporary directory
/// The URI will encode "/tmp" unless one of the following environment
/// variables is set (later list entries overrule previous entries):
/// 1. TMP
/// 2. TMPDIR
/// 3. KATANA_TMPDIR
/// The contents of the "most senior" set variable from this list will be used
/// if any are set.
// TODO (scober): Add a signal handler to clear this directory
static Result<URI> MakeTempDir();

static std::string JoinPath(const std::string& dir, const std::string& file);

Expand Down Expand Up @@ -60,6 +70,17 @@ class KATANA_EXPORT URI {
/// generate a new uri that is this uri with `prefix-XXXXX` appended where
/// XXXX is a random alpha numeric string
URI RandFile(std::string_view prefix) const;
/// An overload of RandFile provided for clarity at call sites
URI RandSubdir(std::string_view prefix) const { return RandFile(prefix); }

bool IsPrefixOf(const URI& other) const {
return scheme_ == other.scheme_ &&
other.path_.compare(0, path_.size(), path_) == 0;
}
bool HasAsPrefix(const URI& other) const {
return scheme_ == other.scheme_ &&
path_.compare(0, other.path_.size(), other.path_) == 0;
}

URI operator+(char rhs) const;
URI operator+(const std::string& rhs) const;
Expand Down
15 changes: 15 additions & 0 deletions libsupport/src/URI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <fmt/format.h>

#include "katana/Env.h"
#include "katana/ErrorCode.h"
#include "katana/Logging.h"
#include "katana/Random.h"
Expand Down Expand Up @@ -271,6 +272,20 @@ katana::URI::MakeRand(const std::string& str) {
return res.value();
}

katana::Result<katana::URI>
katana::URI::MakeTempDir() {
std::string tmp_dir = "/tmp";

// NB: these are intended to be ordered by increasing specificity
GetEnv("TMP", &tmp_dir);
GetEnv("TMPDIR", &tmp_dir);
GetEnv("KATANA_TMPDIR", &tmp_dir);

auto tmp_uri = KATANA_CHECKED(Make(tmp_dir));

return tmp_uri.Join("katana-tmp");
}

std::string
katana::URI::JoinPath(const std::string& dir, const std::string& file) {
return DoJoinPath(dir, file);
Expand Down
18 changes: 18 additions & 0 deletions libsupport/test/uri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ TestDecode() {
katana::URI::Decode("host%3A8020/path") == "host:8020/path");
}

void
TestPrefix() {
katana::URI full = Str2Uri("abc/def/ghi");
katana::URI prefix = Str2Uri("abc/d");
katana::URI not_prefix = Str2Uri("jkl/mn");
katana::URI other_not_prefix = Str2Uri("s3://abc/def");

KATANA_LOG_ASSERT(prefix.IsPrefixOf(full));
KATANA_LOG_ASSERT(full.HasAsPrefix(prefix));

KATANA_LOG_ASSERT(!not_prefix.IsPrefixOf(full));
KATANA_LOG_ASSERT(!full.HasAsPrefix(not_prefix));
KATANA_LOG_ASSERT(!other_not_prefix.IsPrefixOf(full));
KATANA_LOG_ASSERT(!full.HasAsPrefix(other_not_prefix));
}

} // namespace

int
Expand All @@ -87,5 +103,7 @@ main() {

TestDecode();

TestPrefix();

return 0;
}
Loading