Skip to content

Commit 267d682

Browse files
authored
Merge pull request #567 from kirilg/r1.3
TensorFlow Serving r1.3
2 parents 1a88c25 + 2c5b4e1 commit 267d682

28 files changed

+490
-99
lines changed

tensorflow

Submodule tensorflow updated 777 files

tensorflow_serving/apis/BUILD

-31
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ filegroup(
2121
load("//tensorflow_serving:serving.bzl", "serving_proto_library")
2222
load("//tensorflow_serving:serving.bzl", "serving_proto_library_py")
2323
load("//tensorflow_serving:serving.bzl", "serving_go_grpc_library")
24-
load("@org_tensorflow//tensorflow/core:platform/default/build_config.bzl", "tf_pyclif_proto_library")
2524

2625
serving_proto_library(
2726
name = "get_model_metadata_proto",
@@ -67,12 +66,6 @@ serving_proto_library_py(
6766
],
6867
)
6968

70-
tf_pyclif_proto_library(
71-
name = "input_pyclif",
72-
proto_lib = ":input_proto",
73-
proto_srcfile = "input.proto",
74-
)
75-
7669
serving_proto_library(
7770
name = "model_proto",
7871
srcs = ["model.proto"],
@@ -91,12 +84,6 @@ serving_proto_library_py(
9184
deps = [],
9285
)
9386

94-
tf_pyclif_proto_library(
95-
name = "model_pyclif",
96-
proto_lib = ":model_proto",
97-
proto_srcfile = "model.proto",
98-
)
99-
10087
serving_proto_library(
10188
name = "predict_proto",
10289
srcs = ["predict.proto"],
@@ -178,12 +165,6 @@ serving_proto_library_py(
178165
],
179166
)
180167

181-
tf_pyclif_proto_library(
182-
name = "classification_pyclif",
183-
proto_lib = ":classification_proto",
184-
proto_srcfile = "classification.proto",
185-
)
186-
187168
serving_proto_library(
188169
name = "inference_proto",
189170
srcs = ["inference.proto"],
@@ -210,12 +191,6 @@ serving_proto_library_py(
210191
],
211192
)
212193

213-
tf_pyclif_proto_library(
214-
name = "inference_pyclif",
215-
proto_lib = ":inference_proto",
216-
proto_srcfile = "inference.proto",
217-
)
218-
219194
serving_proto_library(
220195
name = "regression_proto",
221196
srcs = ["regression.proto"],
@@ -239,12 +214,6 @@ serving_proto_library_py(
239214
],
240215
)
241216

242-
tf_pyclif_proto_library(
243-
name = "regression_pyclif",
244-
proto_lib = ":regression_proto",
245-
proto_srcfile = "regression.proto",
246-
)
247-
248217
cc_library(
249218
name = "classifier",
250219
hdrs = ["classifier.h"],

tensorflow_serving/config/model_server_config.proto

+1-5
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,7 @@ message ModelConfig {
3838
// (This cannot be changed once a model is in serving.)
3939
string model_platform = 4;
4040

41-
// DEPRECATED: This field is deprecated. For now it's still obeyed as long as
42-
// 'model_version_policy' is not set. If 'model_version_policy' is set, then
43-
// the value of this field is ignored.
44-
FileSystemStoragePathSourceConfig.VersionPolicy version_policy = 5
45-
[deprecated = true];
41+
reserved 5;
4642

4743
// Version policy for the model indicating how many versions of the model to
4844
// be served at the same time.

tensorflow_serving/core/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ cc_library(
8787
deps = [
8888
":loader",
8989
":source_adapter",
90+
"//tensorflow_serving/resources:resource_util",
9091
"//tensorflow_serving/resources:resource_values",
9192
"//tensorflow_serving/util:any_ptr",
9293
"//tensorflow_serving/util:optional",

tensorflow_serving/core/loader.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ class Loader {
7171
/// the estimate must specify the instance to which each resource is
7272
/// bound.
7373
/// 4. The estimate must be monotonically non-increasing, i.e. it cannot
74-
/// increase over time.
74+
/// increase over time. Reasons to have it potentially decrease over time
75+
// include: (a) replace conservative estimate with actual measurement
76+
// once loaded in memory; (b) load process consumes extra transient
77+
// memory that is not used in steady-state after the load completes.
7578
///
7679
/// @return an estimate of the resources the servable will consume once
7780
/// loaded. If the servable has already been loaded, returns an estimate of

tensorflow_serving/core/simple_loader.h

+80-12
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ limitations under the License.
2626
#include "tensorflow/core/platform/types.h"
2727
#include "tensorflow_serving/core/loader.h"
2828
#include "tensorflow_serving/core/source_adapter.h"
29+
#include "tensorflow_serving/resources/resource_util.h"
2930
#include "tensorflow_serving/resources/resource_values.h"
3031
#include "tensorflow_serving/util/any_ptr.h"
3132
#include "tensorflow_serving/util/optional.h"
@@ -62,6 +63,9 @@ namespace serving {
6263
// };
6364
// std::unique_ptr<Loader> loader(new SimpleLoader<time_t>(
6465
// servable_creator, resource_estimator));
66+
//
67+
// This class is not thread-safe. Synchronization is assumed to be done by the
68+
// caller.
6569
template <typename ServableType>
6670
class SimpleLoader : public Loader {
6771
public:
@@ -80,7 +84,19 @@ class SimpleLoader : public Loader {
8084
// and hence the serving system cannot enforce resource safety.
8185
static ResourceEstimator EstimateNoResources();
8286

87+
// Constructor that takes a single resource estimator, to use for estimating
88+
// the resources needed during load as well as post-load.
8389
SimpleLoader(Creator creator, ResourceEstimator resource_estimator);
90+
91+
// Constructor that takes two resource estimators: one to use for estimating
92+
// the resources needed during load, as well as a second one that gives a
93+
// different estimate after loading has finished. See the documentation on
94+
// Loader::EstimateResources() for (a) potential reasons the estimate might
95+
// decrease, and (b) correctness constraints on how the estimate is allowed to
96+
// change over time.
97+
SimpleLoader(Creator creator, ResourceEstimator resource_estimator,
98+
ResourceEstimator post_load_resource_estimator);
99+
84100
~SimpleLoader() override = default;
85101

86102
Status EstimateResources(ResourceAllocation* estimate) const override;
@@ -94,11 +110,20 @@ class SimpleLoader : public Loader {
94110
private:
95111
Creator creator_;
96112

113+
// A function that estimates the resources needed to load the servable.
97114
ResourceEstimator resource_estimator_;
98115

99-
// The memoized estimated resource requirement of the session bundle servable.
116+
// An optional function that estimates the resources needed for the servable
117+
// after it has been loaded. (If omitted, 'resource_estimator_' should be used
118+
// for all estimates, i.e. before, during and after load.)
119+
optional<ResourceEstimator> post_load_resource_estimator_;
120+
121+
// The memoized estimated resource requirement of the servable.
100122
mutable optional<ResourceAllocation> memoized_resource_estimate_;
101123

124+
std::unique_ptr<ResourceUtil> resource_util_;
125+
Resource ram_resource_;
126+
102127
std::unique_ptr<ServableType> servable_;
103128

104129
TF_DISALLOW_COPY_AND_ASSIGN(SimpleLoader);
@@ -180,7 +205,23 @@ SimpleLoader<ServableType>::EstimateNoResources() {
180205
template <typename ServableType>
181206
SimpleLoader<ServableType>::SimpleLoader(Creator creator,
182207
ResourceEstimator resource_estimator)
183-
: creator_(creator), resource_estimator_(resource_estimator) {}
208+
: creator_(creator), resource_estimator_(resource_estimator) {
209+
ResourceUtil::Options resource_util_options;
210+
resource_util_options.devices = {{device_types::kMain, 1}};
211+
resource_util_ =
212+
std::unique_ptr<ResourceUtil>(new ResourceUtil(resource_util_options));
213+
214+
ram_resource_ = resource_util_->CreateBoundResource(
215+
device_types::kMain, resource_kinds::kRamBytes);
216+
}
217+
218+
template <typename ServableType>
219+
SimpleLoader<ServableType>::SimpleLoader(
220+
Creator creator, ResourceEstimator resource_estimator,
221+
ResourceEstimator post_load_resource_estimator)
222+
: SimpleLoader(creator, resource_estimator) {
223+
post_load_resource_estimator_ = post_load_resource_estimator;
224+
}
184225

185226
template <typename ServableType>
186227
Status SimpleLoader<ServableType>::EstimateResources(
@@ -198,8 +239,36 @@ Status SimpleLoader<ServableType>::EstimateResources(
198239

199240
template <typename ServableType>
200241
Status SimpleLoader<ServableType>::Load() {
201-
const Status status = creator_(&servable_);
202-
return status;
242+
TF_RETURN_IF_ERROR(creator_(&servable_));
243+
244+
if (post_load_resource_estimator_) {
245+
// Save the during-load estimate (may be able to use the memoized value).
246+
ResourceAllocation during_load_resource_estimate;
247+
TF_RETURN_IF_ERROR(EstimateResources(&during_load_resource_estimate));
248+
249+
// Obtain the post-load estimate, and store it as the memoized value.
250+
ResourceAllocation post_load_resource_estimate;
251+
TF_RETURN_IF_ERROR(
252+
(*post_load_resource_estimator_)(&post_load_resource_estimate));
253+
memoized_resource_estimate_ = post_load_resource_estimate;
254+
255+
// Release any transient memory used only during load to the OS.
256+
const uint64 during_load_ram_estimate = resource_util_->GetQuantity(
257+
ram_resource_, during_load_resource_estimate);
258+
const uint64 post_load_ram_estimate =
259+
resource_util_->GetQuantity(ram_resource_, post_load_resource_estimate);
260+
if (post_load_ram_estimate < during_load_ram_estimate) {
261+
const uint64 transient_ram_estimate =
262+
during_load_ram_estimate - post_load_ram_estimate;
263+
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
264+
"load with "
265+
<< transient_ram_estimate;
266+
::tensorflow::port::MallocExtension_ReleaseToSystem(
267+
transient_ram_estimate);
268+
}
269+
}
270+
271+
return Status::OK();
203272
}
204273

205274
template <typename ServableType>
@@ -219,14 +288,13 @@ void SimpleLoader<ServableType>::Unload() {
219288

220289
// If we have a main-memory footprint estimate, release that amount of memory
221290
// to the OS.
222-
for (const ResourceAllocation::Entry& entry :
223-
resource_estimate.resource_quantities()) {
224-
if (entry.resource().device() == device_types::kMain &&
225-
entry.resource().kind() == resource_kinds::kRamBytes) {
226-
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() with "
227-
<< entry.quantity();
228-
::tensorflow::port::MallocExtension_ReleaseToSystem(entry.quantity());
229-
}
291+
const uint64 memory_estimate =
292+
resource_util_->GetQuantity(ram_resource_, resource_estimate);
293+
if (memory_estimate > 0) {
294+
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
295+
"unload with "
296+
<< memory_estimate;
297+
::tensorflow::port::MallocExtension_ReleaseToSystem(memory_estimate);
230298
}
231299
}
232300

tensorflow_serving/core/simple_loader_test.cc

+57-1
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,69 @@ TEST(SimpleLoaderTest, ResourceEstimation) {
9696
*estimate = want;
9797
return Status::OK();
9898
}));
99-
for (int i = 0; i < 2; ++i) {
99+
100+
{
101+
ResourceAllocation got;
102+
TF_ASSERT_OK(loader->EstimateResources(&got));
103+
EXPECT_THAT(got, EqualsProto(want));
104+
}
105+
106+
// The estimate should remain the same after load.
107+
TF_ASSERT_OK(loader->Load());
108+
{
100109
ResourceAllocation got;
101110
TF_ASSERT_OK(loader->EstimateResources(&got));
102111
EXPECT_THAT(got, EqualsProto(want));
103112
}
104113
}
105114

115+
TEST(SimpleLoaderTest, ResourceEstimationWithPostLoadRelease) {
116+
const auto pre_load_resources = CreateProto<ResourceAllocation>(
117+
"resource_quantities { "
118+
" resource { "
119+
" device: 'main' "
120+
" kind: 'processing' "
121+
" } "
122+
" quantity: 42 "
123+
"} ");
124+
const auto post_load_resources = CreateProto<ResourceAllocation>(
125+
"resource_quantities { "
126+
" resource { "
127+
" device: 'main' "
128+
" kind: 'processing' "
129+
" } "
130+
" quantity: 17 "
131+
"} ");
132+
std::unique_ptr<Loader> loader(new SimpleLoader<int>(
133+
[](std::unique_ptr<int>* servable) {
134+
servable->reset(new int);
135+
return Status::OK();
136+
},
137+
[&pre_load_resources](ResourceAllocation* estimate) {
138+
*estimate = pre_load_resources;
139+
return Status::OK();
140+
},
141+
[&post_load_resources](ResourceAllocation* estimate) {
142+
*estimate = post_load_resources;
143+
return Status::OK();
144+
}));
145+
146+
// Run it twice, to exercise memoization.
147+
for (int i = 0; i < 2; ++i) {
148+
ResourceAllocation got;
149+
TF_ASSERT_OK(loader->EstimateResources(&got));
150+
EXPECT_THAT(got, EqualsProto(pre_load_resources));
151+
}
152+
153+
// The estimate should switch to the post-load one after load.
154+
TF_ASSERT_OK(loader->Load());
155+
{
156+
ResourceAllocation got;
157+
TF_ASSERT_OK(loader->EstimateResources(&got));
158+
EXPECT_THAT(got, EqualsProto(post_load_resources));
159+
}
160+
}
161+
106162
// Verify that the error returned by the Creator is propagates back through
107163
// Load.
108164
TEST(SimpleLoaderTest, LoadError) {

tensorflow_serving/g3doc/METADATA

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: "TensorFlow Serving"
22
g3doc: {
33
include: "/learning/serving/g3doc/METADATA"
4-
sitemap_file: "/learning/serving/g3doc/users/sitemap.md"
4+
sitemap_file: "/learning/serving/g3doc/sitemap.md"
55
}
66

tensorflow_serving/g3doc/setup.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ in the documentation, you can add the flags `-c opt --copt=-msse4.1
182182
subset of these flags). For example:
183183

184184
```shell
185-
bazel build -c opt --config=mkl --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
185+
bazel build -c opt --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
186186
```
187187

188188
Note: These instruction sets are not available on all machines, especially with

tensorflow_serving/g3doc/signature_defs.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ constants. Specifically:
5454
C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h).
5555

5656
In addition, SavedModel provides a
57-
[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/utils.py)
57+
[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_def_utils.py)
5858
to help build a signature-def.
5959

6060
## Sample structures

0 commit comments

Comments
 (0)