diff --git a/clients/ruby/client/.rspec b/clients/ruby/client/.rspec new file mode 100644 index 0000000..83e16f8 --- /dev/null +++ b/clients/ruby/client/.rspec @@ -0,0 +1,2 @@ +--color +--require spec_helper diff --git a/clients/ruby/client/lib/relevanced_client.rb b/clients/ruby/client/lib/relevanced_client.rb index fe24f5d..cb106fc 100644 --- a/clients/ruby/client/lib/relevanced_client.rb +++ b/clients/ruby/client/lib/relevanced_client.rb @@ -21,32 +21,30 @@ def initialize(host, port) @thrift_transport.open() end - def ping() - @thrift_client.ping() + def add_document_to_centroid(centroid_id, document_id, ignore_already_in_centroid=false) + add_documents_to_centroid(centroid_id, [document_id], ignore_already_in_centroid) end - def get_server_metadata() - @thrift_client.getServerMetadata() + def add_documents_to_centroid(centroid_id, document_ids, ignore_already_in_centroid=false) + request = AddDocumentsToCentroidRequest.new + request.centroidId = centroid_id + request.documentIds = document_ids + request.ignoreAlreadyInCentroid = ignore_already_in_centroid + @thrift_client.addDocumentsToCentroid(request) end - def get_document_similarity(centroid_id, document_id) - @thrift_client.getDocumentSimilarity(centroid_id, document_id) - end - - def multi_get_text_similarity(centroid_id_list, text, lang=Language::EN) - @thrift_client.multiGetTextSimilarity( - centroid_id_list, text, lang - ) + def create_document(text, lang=Language::EN) + @thrift_client.createDocument(text, lang) end - def multi_get_document_similarity(centroid_id_list, document_id) - @thrift_client.multiGetDocumentSimilarity( - centroid_id_list, document_id + def create_document_with_id(document_id, text, lang=Language::EN) + @thrift_client.createDocumentWithID( + document_id, text, lang ) end - def get_text_similarity(centroid_id, text, lang=Language::EN) - @thrift_client.getTextSimilarity(centroid_id, text, lang) + def get_server_metadata() + @thrift_client.getServerMetadata() end def get_centroid_similarity(centroid_1_id, centroid_2_id) @@ -55,18 +53,12 @@ def get_centroid_similarity(centroid_1_id, centroid_2_id) ) end - def create_document(text, lang=Language::EN) - @thrift_client.createDocument(text, lang) - end - - def create_document_with_id(document_id, text, lang=Language::EN) - @thrift_client.createDocumentWithID( - document_id, text, lang - ) + def get_document_similarity(centroid_id, document_id) + @thrift_client.getDocumentSimilarity(centroid_id, document_id) end - def delete_document(document_id) - @thrift_client.deleteDocument(document_id) + def get_text_similarity(centroid_id, text, lang=Language::EN) + @thrift_client.getTextSimilarity(centroid_id, text, lang) end def create_centroid(centroid_id, ignore_existing=false) @@ -76,42 +68,86 @@ def create_centroid(centroid_id, ignore_existing=false) @thrift_client.createCentroid(request) end - def delete_centroid(centroid_id) - @thrift_client.deleteCentroid(centroid_id) + def delete_centroid(centroid_id, ignore_missing=false) + request = DeleteCentroidRequest.new + request.id = centroid_id + request.ignoreMissing = ignore_missing + @thrift_client.deleteCentroid(request) end - def list_all_documents_for_centroid(centroid_id) - @thrift_client.listAllDocumentsForCentroid(centroid_id) + def delete_document(document_id, ignore_missing=false) + request = DeleteDocumentRequest.new + request.id = document_id + request.ignoreMissing = ignore_missing + @thrift_client.deleteDocument(request) end - def list_centroid_document_range(centroid_id, offset, count) - @thrift_client.listCentroidDocumentRange(centroid_id, offset, count) + def join_centroid(centroid_id) + request = JoinCentroidRequest.new + request.id = centroid_id + @thrift_client.joinCentroid(request) end - def list_centroid_document_range_from_id(centroid_id, document_id, count) - @thrift_client.listCentroidDocumentRangeFromID(centroid_id, document_id, count) + def multi_create_centroids(centroid_ids, ignore_existing=false) + request = MultiCreateCentroidsRequest.new + request.ids = centroid_ids + request.ignoreExisting = ignore_existing + @thrift_client.multiCreateCentroids(request) + end + + def multi_delete_centroids(centroid_ids, ignore_missing=false) + request = MultiDeleteCentroidsRequest.new + request.ids = centroid_ids + request.ignoreMissing = ignore_missing + @thrift_client.multiDeleteCentroids(request) + end + + def multi_delete_documents(document_ids, ignore_missing=false) + request = MultiDeleteDocumentsRequest.new + request.ids = document_ids + request.ignoreMissing = ignore_missing + @thrift_client.multiDeleteDocuments(request) end - def add_document_to_centroid(centroid_id, document_id) - @thrift_client.addDocumentToCentroid( - centroid_id, document_id + def multi_get_document_similarity(centroid_id_list, document_id) + @thrift_client.multiGetDocumentSimilarity( + centroid_id_list, document_id ) end - def remove_document_from_centroid(centroid_id, document_id) - @thrift_client.removeDocumentFromCentroid( - centroid_id, document_id + def multi_get_text_similarity(centroid_id_list, text, lang=Language::EN) + @thrift_client.multiGetTextSimilarity( + centroid_id_list, text, lang ) end - def join_centroid(centroid_id) - @thrift_client.joinCentroid(centroid_id) + def multi_join_centroids(centroid_ids) + request = MultiJoinCentroidsRequest.new + request.ids = centroid_ids + @thrift_client.multiJoinCentroids(request) end def list_all_centroids() @thrift_client.listAllCentroids() end + def list_all_documents() + @thrift_client.listAllDocuments() + end + + def list_all_documents_for_centroid(centroid_id) + @thrift_client.listAllDocumentsForCentroid(centroid_id) + end + + def list_centroid_document_range(centroid_id, offset, count) + @thrift_client.listCentroidDocumentRange(centroid_id, offset, count) + end + + def list_centroid_document_range_from_id(centroid_id, document_id, count) + @thrift_client.listCentroidDocumentRangeFromID(centroid_id, document_id, count) + end + + def list_centroid_range(offset, count) @thrift_client.listCentroidRange(offset, count) end @@ -120,10 +156,6 @@ def list_centroid_range_from_id(centroid_id, count) @thrift_client.listCentroidRangeFromID(centroid_id, count) end - def list_all_documents() - @thrift_client.listAllDocuments() - end - def list_document_range(offset, count) @thrift_client.listDocumentRange(offset, count) end @@ -135,5 +167,22 @@ def list_document_range_from_id(document_id, count) def list_unused_documents(limit) @thrift_client.listUnusedDocuments(limit) end + + def ping() + @thrift_client.ping() + end + + def remove_document_from_centroid(centroid_id, document_id, ignore_not_in_centroid=false) + remove_documents_from_centroid(centroid_id, [document_id], ignore_not_in_centroid) + end + + def remove_documents_from_centroid(centroid_id, document_ids, ignore_not_in_centroid=false) + request = RemoveDocumentsFromCentroidRequest.new + request.centroidId = centroid_id + request.documentIds = document_ids + request.ignoreNotInCentroid = ignore_not_in_centroid + @thrift_client.removeDocumentsFromCentroid(request) + end + end end diff --git a/clients/ruby/client/spec/client_spec.rb b/clients/ruby/client/spec/client_spec.rb new file mode 100644 index 0000000..3a28138 --- /dev/null +++ b/clients/ruby/client/spec/client_spec.rb @@ -0,0 +1,201 @@ +require './lib/relevanced_client' + +describe 'relevanced_client' do + before(:all) do + @client = RelevancedClient::Client.new('localhost', 8097) + end + before(:each) do + existing_docs = @client.list_all_documents.documents + existing_centroids = @client.list_all_centroids.centroids + if existing_docs + @client.multi_delete_documents(existing_docs) + end + if existing_centroids + @client.multi_delete_centroids(existing_centroids) + end + end + describe 'centroid CRUD' do + it 'works' do + @client.create_centroid('c-1') + centroids = @client.list_all_centroids.centroids + expect(centroids).to eq(['c-1']) + + @client.multi_create_centroids(['c-2', 'c-3']) + centroids = @client.list_all_centroids.centroids + expect(centroids).to eq(['c-1', 'c-2', 'c-3']) + + @client.multi_create_centroids(['c-4', 'c-5', 'c-6']) + centroids = @client.list_all_centroids.centroids + expect(centroids).to eq(['c-1', 'c-2', 'c-3', 'c-4', 'c-5', 'c-6']) + + @client.delete_centroid('c-2') + centroids = @client.list_all_centroids.centroids + expect(centroids).to eq(['c-1', 'c-3', 'c-4', 'c-5', 'c-6']) + + @client.multi_delete_centroids(['c-4', 'c-6']) + centroids = @client.list_all_centroids.centroids + expect(centroids).to eq(['c-1', 'c-3', 'c-5']) + end + end + describe 'document CRUD' do + it 'works' do + ['d1', 'd2', 'd3', 'd4', 'd5'].each do |doc| + @client.create_document_with_id(doc, 'some text') + end + docs = @client.list_all_documents.documents + expect(docs).to eq(['d1', 'd2', 'd3', 'd4', 'd5']) + + @client.delete_document('d2') + docs = @client.list_all_documents.documents + expect(docs).to eq(['d1', 'd3', 'd4', 'd5']) + + @client.multi_delete_documents(['d3', 'd5']) + docs = @client.list_all_documents.documents + expect(docs).to eq(['d1', 'd4']) + end + end + describe 'centroid-document CRUD' do + before(:each) do + ['d1', 'd2', 'd3', 'd4', 'd5'].each do |doc| + @client.create_document_with_id(doc, 'some text') + end + @client.multi_create_centroids(['c1', 'c2', 'c3']) + end + it 'works' do + docs = [ + @client.list_all_documents_for_centroid('c1').documents, + @client.list_all_documents_for_centroid('c2').documents, + @client.list_all_documents_for_centroid('c3').documents, + ] + expect(docs).to eq([[], [], []]) + @client.add_document_to_centroid('c1', 'd1') + @client.add_document_to_centroid('c2', 'd3') + @client.add_document_to_centroid('c2', 'd4') + @client.add_document_to_centroid('c2', 'd5') + + + docs = [ + @client.list_all_documents_for_centroid('c1').documents, + @client.list_all_documents_for_centroid('c2').documents, + @client.list_all_documents_for_centroid('c3').documents, + ] + expect(docs).to eq([['d1'], ['d3', 'd4', 'd5'], []]) + + @client.add_documents_to_centroid('c3', ['d3', 'd4']) + + docs = [ + @client.list_all_documents_for_centroid('c1').documents, + @client.list_all_documents_for_centroid('c2').documents, + @client.list_all_documents_for_centroid('c3').documents, + ] + expect(docs).to eq([['d1'], ['d3', 'd4', 'd5'], ['d3', 'd4']]) + + @client.remove_documents_from_centroid('c2', ['d3', 'd5']) + + docs = [ + @client.list_all_documents_for_centroid('c1').documents, + @client.list_all_documents_for_centroid('c2').documents, + @client.list_all_documents_for_centroid('c3').documents, + ] + expect(docs).to eq([['d1'], ['d4'], ['d3', 'd4']]) + + @client.remove_document_from_centroid('c1', 'd1') + docs = [ + @client.list_all_documents_for_centroid('c1').documents, + @client.list_all_documents_for_centroid('c2').documents, + @client.list_all_documents_for_centroid('c3').documents, + ] + expect(docs).to eq([[], ['d4'], ['d3', 'd4']]) + + end + end + describe 'joining' do + before(:each) do + ['d1', 'd2', 'd3', 'd4', 'd5'].each do |doc| + @client.create_document_with_id(doc, 'some text') + end + @client.multi_create_centroids(['c1', 'c2', 'c3']) + end + it 'works - single' do + @client.add_documents_to_centroid('c2', ['d2', 'd3']) + res = @client.join_centroid('c2') + expect(res.id).to eq('c2') + end + it 'works - multi' do + @client.add_documents_to_centroid('c2', ['d2', 'd3']) + @client.add_documents_to_centroid('c3', ['d1']) + res = @client.multi_join_centroids(['c2', 'c3']) + expect(res.ids).to eq(['c2', 'c3']) + end + end + describe 'similarity requests' do + before(:each) do + data = { + "monkeys" => { + "d_monkeys_1" => 'monkey gorilla ape banana cat', + "d_monkeys_2" => 'gorilla gorilla gorilla fish banana', + "d_monkeys_3" => 'ape lemur gorilla monkey grill' + }, + "cars" => { + "d_cars_1" => 'engine motor wheel gear clutch', + "d_cars_2" => 'motor motor nascar left wheel', + "d_cars_3" => 'mudflap column engine engine engine' + } + } + @client.multi_create_centroids(data.keys) + data.each do |centroid_id, docs| + docs.each do |k, v| + @client.create_document_with_id(k, v) + end + @client.add_documents_to_centroid(centroid_id, docs.keys) + end + @client.multi_join_centroids(data.keys) + end + it 'works - single doc' do + res1 = @client.get_document_similarity('monkeys', 'd_monkeys_1') + res2 = @client.get_document_similarity('cars', 'd_monkeys_1') + expect(res1).to be > res2 + expect(res1).to be > 0 + expect(res1).to be < 1.0001 + expect(res2).to be >= 0 + expect(res2).to be < 1.0001 + end + it 'works - multi doc' do + text = 'monkey lemur ape ape gear' + @client.create_document_with_id('some-doc', text) + res = @client.multi_get_document_similarity(['monkeys', 'cars'], 'some-doc') + scores = res.scores + expect(scores.keys).to eq(['cars', 'monkeys']) + res1 = scores['monkeys'] + res2 = scores['cars'] + expect(res1).to be > res2 + expect(res1).to be > 0 + expect(res1).to be < 1.0001 + expect(res2).to be > 0 + expect(res2).to be < 1.001 + end + it 'works - single text' do + text = 'monkey lemur ape ape gear' + res1 = @client.get_text_similarity('monkeys', text) + res2 = @client.get_text_similarity('cars', text) + expect(res1).to be > res2 + expect(res1).to be > 0 + expect(res1).to be < 1.0001 + expect(res2).to be > 0 + expect(res2).to be < 1.001 + end + it 'works - multi text' do + text = 'monkey lemur ape ape gear' + res = @client.multi_get_text_similarity(['monkeys', 'cars'], text) + scores = res.scores + expect(scores.keys).to eq(['cars', 'monkeys']) + res1 = scores['monkeys'] + res2 = scores['cars'] + expect(res1).to be > res2 + expect(res1).to be > 0 + expect(res1).to be < 1.0001 + expect(res2).to be > 0 + expect(res2).to be < 1.001 + end + end +end diff --git a/clients/ruby/client/spec/spec_helper.rb b/clients/ruby/client/spec/spec_helper.rb new file mode 100644 index 0000000..25dc80a --- /dev/null +++ b/clients/ruby/client/spec/spec_helper.rb @@ -0,0 +1,96 @@ +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# The `.rspec` file also contains a few flags that are not defaults but that +# users commonly want. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # These two settings work together to allow you to limit a spec run + # to individual examples or groups you care about by tagging them with + # `:focus` metadata. When nothing is tagged with `:focus`, all examples + # get run. + config.filter_run :focus + config.run_all_when_everything_filtered = true + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + config.disable_monkey_patching! + + # This setting enables warnings. It's recommended, but in some cases may + # be too noisy due to issues in dependencies. + config.warnings = true + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = 'doc' + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end diff --git a/src/RelevancedProtocol.thrift b/src/RelevancedProtocol.thrift index 18347b7..ca1a65f 100644 --- a/src/RelevancedProtocol.thrift +++ b/src/RelevancedProtocol.thrift @@ -109,7 +109,7 @@ struct MultiDeleteDocumentsResponse { struct MultiDeleteCentroidsRequest { 1: required list ids; - 2: required bool ignoreMissing; + 2: optional bool ignoreMissing; } struct MultiDeleteCentroidsResponse { @@ -122,12 +122,12 @@ struct CreateCentroidResponse { struct CreateCentroidRequest { 1: required string id; - 2: required bool ignoreExisting; + 2: optional bool ignoreExisting; } struct MultiCreateCentroidsRequest { 1: required list ids; - 2: required bool ignoreExisting; + 2: optional bool ignoreExisting; } struct MultiCreateCentroidsResponse { @@ -136,7 +136,7 @@ struct MultiCreateCentroidsResponse { struct DeleteCentroidRequest { 1: required string id; - 2: required bool ignoreMissing; + 2: optional bool ignoreMissing; } struct DeleteCentroidResponse { @@ -173,7 +173,7 @@ struct RemoveDocumentsFromCentroidResponse { struct MultiJoinCentroidsRequest { 1: required list ids; - 2: required bool ignoreMissing; + 2: optional bool ignoreMissing; } struct MultiJoinCentroidsResponse {