diff --git a/.gitignore b/.gitignore index 5e1422c..b57c652 100644 --- a/.gitignore +++ b/.gitignore @@ -42,9 +42,11 @@ build-iPhoneSimulator/ # for a library or gem, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: -# Gemfile.lock -# .ruby-version -# .ruby-gemset +Gemfile.lock +.ruby-version +.ruby-gemset # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: .rvmrc + +/.byebug_history diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..8c18f1a --- /dev/null +++ b/.rspec @@ -0,0 +1,2 @@ +--format documentation +--color diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..d7b3785 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,217 @@ +AllCops: + TargetRubyVersion: 2.3 + Include: + - 'Rakefile' + +Metrics/LineLength: + Max: 140 + +# Removes the requirement for using double quotes only for string interpolation. +Style/StringLiterals: + Enabled: true + +# These complexity and length metrics tend to require a bunch of high-touch refactoring +# in existing projects. Leaving them high for now, and we can slowly lower them to standard +# levels in the near future. +Metrics/ModuleLength: + Max: 200 + +Metrics/ClassLength: + Max: 230 + +Metrics/MethodLength: + Max: 50 + +Metrics/AbcSize: + Max: 75 + +Metrics/CyclomaticComplexity: + Max: 20 + +Metrics/PerceivedComplexity: + Max: 20 + +# Allow long keyword parameter lists +Metrics/ParameterLists: + Max: 15 + CountKeywordArgs: false + +# This enforces bad style and can break things. +# See: https://github.com/bbatsov/rubocop/issues/2614 +Performance/Casecmp: + Enabled: false + +# This requires the use of alias rather than alias_method, which seems totally arbitrary +Style/Alias: + Enabled: false + +# This cop enforces that submodules/subclasses be defined like this: +# +# class Foo::Bar +# +# rather than like this: +# +# module Foo +# class Bar +# +# This is actually semantically different, and there are valid reasons for wanting to use the latter +# form because of the way the former does funky stuff to the namespace. +Style/ClassAndModuleChildren: + Enabled: false + +# This forces you to use class instance variables rather than class variables, which seems pretty +# situation-specific +Style/ClassVars: + Enabled: false + +# This makes you do things like this: +# variable = if test +# 'abc-123' +# else +# 'def-456' +# end +# +# I think this is harder to read than assigning the variable within the conditional. +Style/ConditionalAssignment: + Enabled: false + +# This cop forces you to put a return at the beginning of a block of code rather than having an if statement +# whose body carries to the end of the function. For example: +# +# def foo +# ... +# if test +# ... +# end +# end +# +# would be considered bad, and the cop would force you to put a `return if !test` before that block and +# then remove the if. The problem is that this hides intent, since the if test does have a purpose in +# readability, and it could also be easier for future changes to miss the return statement and add code +# after it expecting it to be executed. +Style/GuardClause: + Enabled: false + +# This is pretty much the same thing as the one above. Inside a loop, it forces you to use next to skip +# iteration rather than using an if block that runs to the end of the loop, and it suffers from the same +# problems as above. +Style/Next: + Enabled: false + +Style/IndentArray: + EnforcedStyle: consistent + +# This forces you to change simple if/unless blocks to the conditional form like: `return 2 if badness`. +# Unfortunately there are a number of cases where it makes sense to use the block form even for simple statements, +# and the modifier form can be easy to miss when scanning code. +Style/IfUnlessModifier: + Enabled: false + +# This requires you to implement respond_to_missing? anywhere that you implement method_missing, but I think that +# is a lof of a pain. +Style/MethodMissing: + Enabled: false + +# This cop forces the use of unless in all negated if statements. Since unless is a source of so many arguments +# and there seems to be no purpose in enforcing its use, disable it. +Style/NegatedIf: + Enabled: false + +# This will force you to use methods like .positive? and .zero? rather than > 0 and == 0. But why? +Style/NumericPredicate: + Enabled: false + +# This one enforces that functions with names like has_value? be renamed to value?. There are many cases where +# doing so would make the code more difficult to parse. +Style/PredicateName: + Enabled: false + +# By default this will force you to use specific names for arguments for enumerable and other methods, +# which I don't understand even a little bit. +Style/SingleLineBlockParams: + Methods: [] + +# This rule disallows you from parenthesizing the test in ternary operations, so that: +# (p == 100) ? 'success' : '' +# must be written as: +# p == 100 ? 'success' : '' +# I can't possibly be the only one who finds the latter a bit harder to read, can I? +Style/TernaryParentheses: + Enabled: false + +# Allow trivial methods that have ? at the end. +Style/TrivialAccessors: + AllowPredicates: true + +# It's ok to make a small array of words without using a %w +Style/WordArray: + MinSize: 5 + +# Some people really like to put lines at the beginning and end of class bodies, while other people +# really don't. It doesn't really seem to matter. +Style/EmptyLinesAroundClassBody: + Enabled: false + +# This forces you to put a comment like this at the top of every single file: +# frozen_string_literal: true +# In Ruby 3, string literals will be frozen by default, so doing so future-proofs +# the code, but in the meantime it's a huge pain in the ass. +Style/FrozenStringLiteralComment: + Enabled: false + +# this forces you to use the lambda keyword rather than -> for multiline lambdas, which seems totally arbitrary +Style/Lambda: + Enabled: false + +# Force indentation for milti-line expressions and method calls +Style/MultilineOperationIndentation: + EnforcedStyle: indented + +Style/MultilineMethodCallIndentation: + EnforcedStyle: indented + +# This disallows the use of $1, $2 from regular expressions, which seems to make no sense whatsoever +Style/PerlBackrefs: + Enabled: false + +# This enforces that multi-line array literals do not end in a comma. For example: +# +# foo = [ +# 1, +# 2 +# ] +Style/TrailingCommaInLiteral: + EnforcedStyleForMultiline: no_comma + +# Same as above but for method arguments rather than array entries. +Style/TrailingCommaInArguments: + EnforcedStyleForMultiline: no_comma + +# This forces you to replace things like: `[1, 2, 3].length == 0` with `[1,2,3].empty?`. The problem is that +# not all things that implement length also implement empty? so you will get errors that cannot be resolved, +# and the cop will encourage you to do things that are incorrect. +Style/ZeroLengthPredicate: + Enabled: false + +# Enforce alignment of multi-line assignments to be like this: +# variable = if test +# ... +# end +Lint/EndAlignment: + AlignWith: variable + +# This cop will require you to replace or prefix method arguments that go unused with underscores. The problem +# is that while seeming to solve no problem this could easily cause issues where someone editing the code to +# begin using the variable forgets to remove the underscore. Also, if you replace the argument with _, then +# information about the meaning of that argument is lost. +Lint/UnusedMethodArgument: + Enabled: false + +# Same as above but with block arguments. +Lint/UnusedBlockArgument: + Enabled: false + +# This cop forces all rescue blocks to do something with the exception. Sometimes you just have an exception +# you want to rescue but do nothing about. +Lint/HandleExceptions: + Enabled: false diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..0ae240f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +sudo: false +language: ruby +rvm: + - 2.3.1 +before_install: gem install bundler -v 1.13.1 diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..dd81c42 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in breakers.gemspec +gemspec diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 264c55d..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2016 Department of Veterans Affairs - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..19fa33d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,31 @@ +As a work of the United States Government, this project is in the +public domain within the United States. + +Additionally, we waive copyright and related rights in the work +worldwide through the CC0 1.0 Universal public domain dedication. + +## CC0 1.0 Universal Summary + +This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode). + +### No Copyright + +The person who associated a work with this deed has dedicated the work to +the public domain by waiving all of his or her rights to the work worldwide +under copyright law, including all related and neighboring rights, to the +extent allowed by law. + +You can copy, modify, distribute and perform the work, even for commercial +purposes, all without asking permission. + +### Other Information + +In no way are the patent or trademark rights of any person affected by CC0, +nor are the rights that other persons may have in the work or in how the +work is used, such as publicity or privacy rights. + +Unless expressly stated otherwise, the person who associated a work with +this deed makes no warranties about the work, and disclaims liability for +all uses of the work, to the fullest extent permitted by applicable law. +When using or citing the work, you should not imply endorsement by the +author or the affirmer. diff --git a/README.md b/README.md index dc5229e..2a536ab 100644 --- a/README.md +++ b/README.md @@ -1 +1,163 @@ -# breakers \ No newline at end of file +# Breakers + +Breakers is a Ruby gem that implements the circuit breaker pattern for Ruby using a Faraday middleware. It is designed to handle the case +where your app communicates with one or more backend services over HTTP and those services could possibly go down. Data about the success +and failure of requests is recorded in Redis, and the gem uses this to determine when an outage occurs. While a service is marked as down, +requests will continue to flow through occasionally to check if it has returned to being alive. + +## Installation + +Add this line to your application's Gemfile: + +```ruby +gem 'breakers' +``` + +And then execute: + + $ bundle + +Or install it yourself as: + + $ gem install breakers + +## Quick Start + +```ruby +service = Breakers::Service.new( + name: 'messaging', + request_matcher: proc { |request_env| request_env.url.host =~ /.*messaging\.va\.gov/ } +) + +client = Breakers::Client.new(redis_connection: redis, services: [service]) + +Breakers.set_client(client) + +connection = Faraday.new do |conn| + conn.use :breakers + conn.adapter Faraday.default_adapter +end + +response = connection.get 'http://messaging.va.gov/query' +``` + +This will track all requests to messaging.va.gov and will stop sending requests to it for one minute when the error rate reaches 50% over a +two minute period. + +## Usage + +For more advanced usage and an explanation of the code above, keep reading. + +### Services + +In an application where you rely on a number of backend services with different endpoints, outage characteristics, and levels of reliability, +breakers lets you configure each of those services globally and then apply a Faraday middleware that uses them to track changes. Services +are defined like this: + +```ruby +service = Breakers::Service.new( + name: 'messaging', + request_matcher: proc { |request_env| request_env.url.host =~ /.*messaging\.va\.gov/ }, + seconds_before_retry: 60, + error_threshold: 50 +) +``` + +The name parameter is used for logging and reporting only. On each request, the block will be called with the request's environment, and +the block should return true if the service applies to it. + +Each service can be further configured with the following: + +* `seconds_before_retry` - The number of seconds to wait before sending a new request when an outage is reported. Every N seconds, a new request will be sent, and if it succeeds the outage will be ended. Defaults to 60. +* `error_threshold` - The percentage of errors over which an outage will be reported. Defaults to 50. +* `data_retention_seconds` - The number of seconds for which data will be stored in Redis for successful and unsuccessful request counts. See below for information on the structure of data within Redis. Defaults to 30 days. + +### Client + +A Breakers::Client is the data structure that contains all of the information needed to operate the system, and it provides a query API for +accessing the current state. It is initialized with a redis connection and one or more services, with options for a set of plugins and a logger: + +```ruby +client = Breakers::Client.new( + redis_connection: redis, + services: [service], + logger: logger, + plugins: [plugin] +) +``` + +The logger should conform to Ruby's Logger API. See more information on plugins below. + +### Global Configuration + +The client can be configured globally with: + +```ruby +Breakers.set_client(client) +``` + +In a Rails app, it makes sense to create the services and client in an initializer and then apply them with this call. If you would like to +namespace the data in Redis with a prefix, you can make that happen with: + +```ruby +Breakers.redis_prefix = 'custom-' +``` + +The default prefix is brk-. + +### Using the Middleware + +Once the global configuration is in place, use the middleware as you would normally in Faraday: + +```ruby +Faraday.new('http://va.gov') do |conn| + conn.use :breakers + conn.adapter Faraday.default_adapter +end +``` + +### Logging + +The client takes an optional `logger:` argument that can accept an object that conforms to Ruby's Logger interface. If provided, it will +log on request errors and outage beginnings and endings. + +### Plugins + +If you would like to track events in another way, you can also pass plugins to the client with the `plugins:` argument. Plugins should +be instances that implement the following interface: + +```ruby +class ExamplePlugin + def on_outage_begin(outage); end + + def on_outage_end(outage); end + + def on_error(service, request_env, response_env); end + + def on_success(service, request_env, response_env); end +end +``` + +It's ok for your plugin to implement only part of this interface. + +### Redis Data Structure + +Data is stored in Redis with the following structure: + +* {prefix}-{service_name}-errors-{unix_timestamp} - A set of keys that store the number of errors by service for each minute. By default these are kept for one month, but you can customize that timestamp with the `data_retention_seconds` argument when creating a service. +* {prefix}-{service_name}-successes-{unix_timestamp} - Same as above but counts for successful requests. +* {prefix}-{service_name}-outages - A sorted set that stores the actual outages. The sort value is the unix timestamp at which the outage occurred, and each entry stores a JSON document containing the start and end times for the outage. + +## Development + +After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. + +To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). + +## Contributing + +Bug reports and pull requests are welcome on GitHub at https://github.com/department-of-veterans-affairs/breakers. + +## License + +The gem is available as open source under the terms of the Creative Commons Zero 1.0 Universal License. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..4c774a2 --- /dev/null +++ b/Rakefile @@ -0,0 +1,6 @@ +require 'bundler/gem_tasks' +require 'rspec/core/rake_task' + +RSpec::Core::RakeTask.new(:spec) + +task default: :spec diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..0c373da --- /dev/null +++ b/bin/console @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +require 'bundler/setup' +require 'breakers' + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +# (If you use this, don't forget to add pry to your Gemfile!) +# require "pry" +# Pry.start + +require 'irb' +IRB.start diff --git a/bin/setup b/bin/setup new file mode 100755 index 0000000..dce67d8 --- /dev/null +++ b/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/breakers.gemspec b/breakers.gemspec new file mode 100644 index 0000000..cce0b4b --- /dev/null +++ b/breakers.gemspec @@ -0,0 +1,45 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'breakers/version' + +Gem::Specification.new do |spec| + spec.name = 'breakers' + spec.version = Breakers::VERSION + spec.authors = ['Aubrey Holland'] + spec.email = ['aubrey@adhocteam.us'] + + spec.summary = 'Handle outages to backend systems with a Faraday middleware' + spec.description = 'This is a Faraday middleware that detects backend outages and reacts to them' + spec.homepage = 'https://github.com/department-of-veterans-affairs/breakers' + spec.license = 'CC0 1.0 Universal' + + # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' + # to allow pushing to a single host or delete this section to allow pushing to any host. + if spec.respond_to?(:metadata) + spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'" + else + raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.' + end + + spec.files = `git ls-files -z`.split("\x0").reject do |f| + f.match(%r{^(test|spec|features)/}) + end + spec.bindir = 'exe' + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ['lib'] + + spec.add_dependency 'faraday' + spec.add_dependency 'multi_json' + spec.add_dependency 'sinatra' + + spec.add_development_dependency 'bundler' + spec.add_development_dependency 'byebug' + spec.add_development_dependency 'fakeredis' + spec.add_development_dependency 'rake' + spec.add_development_dependency 'rspec' + spec.add_development_dependency 'rubocop' + spec.add_development_dependency 'simplecov' + spec.add_development_dependency 'timecop' + spec.add_development_dependency 'webmock' +end diff --git a/lib/breakers.rb b/lib/breakers.rb new file mode 100644 index 0000000..7d160b5 --- /dev/null +++ b/lib/breakers.rb @@ -0,0 +1,29 @@ +require 'breakers/client' +require 'breakers/outage' +require 'breakers/service' +require 'breakers/uptime_middleware' +require 'breakers/version' + +require 'faraday' + +module Breakers + Faraday::Middleware.register_middleware(breakers: lambda { UptimeMiddleware }) + + # rubocop:disable Style/AccessorMethodName + def self.set_client(client) + @client = client + end + # rubocop:enable Style/AccessorMethodName + + def self.client + @client + end + + def self.redis_prefix=(prefix) + @redis_prefix = prefix + end + + def self.redis_prefix + @redis_prefix || 'brk-' + end +end diff --git a/lib/breakers/client.rb b/lib/breakers/client.rb new file mode 100644 index 0000000..9fb9690 --- /dev/null +++ b/lib/breakers/client.rb @@ -0,0 +1,21 @@ +module Breakers + class Client + attr_reader :services + attr_reader :plugins + attr_reader :redis_connection + attr_reader :logger + + def initialize(redis_connection:, services:, plugins: nil, logger: nil) + @redis_connection = redis_connection + @services = Array(services) + @plugins = Array(plugins) + @logger = logger + end + + def service_for_request(request_env:) + @services.find do |service| + service.handles_request?(request_env) + end + end + end +end diff --git a/lib/breakers/outage.rb b/lib/breakers/outage.rb new file mode 100644 index 0000000..30d05a2 --- /dev/null +++ b/lib/breakers/outage.rb @@ -0,0 +1,93 @@ +require 'multi_json' + +module Breakers + class Outage + attr_reader :service + attr_reader :body + + def self.find_last(service:) + data = Breakers.client.redis_connection.zrange(outages_key(service: service), -1, -1)[0] + data && new(service: service, data: data) + end + + def self.in_range(service:, start_time:, end_time:) + data = Breakers.client.redis_connection.zrangebyscore( + outages_key(service: service), + start_time.to_i, + end_time.to_i + ) + data.map { |item| new(service: service, data: item) } + end + + def self.create(service:) + data = MultiJson.dump(start_time: Time.now.utc.to_i) + Breakers.client.redis_connection.zadd(outages_key(service: service), Time.now.utc.to_i, data) + + Breakers.client.logger&.error(msg: 'Breakers outage beginning', service: service.name) + + Breakers.client.plugins.each do |plugin| + plugin.on_outage_begin(Outage.new(service: service, data: data)) if plugin.respond_to?(:on_outage_begin) + end + end + + def self.outages_key(service:) + "#{Breakers.redis_prefix}#{service.name}-outages" + end + + def initialize(service:, data:) + @body = MultiJson.load(data) + @service = service + end + + def ended? + @body.key?('end_time') + end + + def end! + new_body = @body.dup + new_body['end_time'] = Time.now.utc.to_i + replace_body(body: new_body) + + Breakers.client.logger&.info(msg: 'Breakers outage ending', service: @service.name) + Breakers.client.plugins.each do |plugin| + plugin.on_outage_end(self) if plugin.respond_to?(:on_outage_begin) + end + end + + def start_time + @body['start_time'] && Time.at(@body['start_time']).utc + end + + def end_time + @body['end_time'] && Time.at(@body['end_time']).utc + end + + def last_test_time + (@body['last_test_time'] && Time.at(@body['last_test_time']).utc) || start_time + end + + def update_last_test_time! + new_body = @body.dup + new_body['last_test_time'] = Time.now.utc.to_i + replace_body(body: new_body) + end + + def ready_for_retest?(wait_seconds:) + (Time.now.utc - last_test_time) > wait_seconds + end + + protected + + def key + "#{Breakers.redis_prefix}#{@service.name}-outages" + end + + def replace_body(body:) + Breakers.client.redis_connection.multi do + Breakers.client.redis_connection.zrem(key, MultiJson.dump(@body)) + Breakers.client.redis_connection.zadd(key, start_time.to_i, MultiJson.dump(body)) + end + @body = body + end + end +end diff --git a/lib/breakers/service.rb b/lib/breakers/service.rb new file mode 100644 index 0000000..714966a --- /dev/null +++ b/lib/breakers/service.rb @@ -0,0 +1,116 @@ +module Breakers + class Service + DEFAULT_OPTS = { + seconds_before_retry: 60, + error_threshold: 50, + data_retention_seconds: 60 * 60 * 24 * 30 + }.freeze + + def initialize(opts) + @configuration = DEFAULT_OPTS.merge(opts) + end + + def name + @configuration[:name] + end + + def handles_request?(request_env) + @configuration[:request_matcher].call(request_env) + end + + def seconds_before_retry + @configuration[:seconds_before_retry] + end + + def add_error + increment_key(key: errors_key) + maybe_create_outage + end + + def add_success + increment_key(key: successes_key) + end + + def last_outage + Outage.find_last(service: self) + end + + def outages_in_range(start_time:, end_time:) + Outage.in_range( + service: self, + start_time: start_time, + end_time: end_time + ) + end + + def successes_in_range(start_time:, end_time:, sample_seconds: 3600) + values_in_range(start_time: start_time, end_time: end_time, type: :successes, sample_seconds: sample_seconds) + end + + def errors_in_range(start_time:, end_time:, sample_seconds: 3600) + values_in_range(start_time: start_time, end_time: end_time, type: :errors, sample_seconds: sample_seconds) + end + + protected + + def errors_key(time: nil) + "#{Breakers.redis_prefix}#{name}-errors-#{align_time_on_minute(time: time).to_i}" + end + + def successes_key(time: nil) + "#{Breakers.redis_prefix}#{name}-successes-#{align_time_on_minute(time: time).to_i}" + end + + def values_in_range(start_time:, end_time:, type:, sample_seconds:) + start_time = align_time_on_minute(time: start_time) + end_time = align_time_on_minute(time: end_time) + keys = [] + times = [] + while start_time <= end_time + times << start_time + if type == :errors + keys << errors_key(time: start_time) + elsif type == :successes + keys << successes_key(time: start_time) + end + start_time += sample_seconds + end + Breakers.client.redis_connection.mget(keys).each_with_index.map do |value, idx| + { count: value.to_i, time: times[idx] } + end + end + + def increment_key(key:) + Breakers.client.redis_connection.multi do + Breakers.client.redis_connection.incr(key) + Breakers.client.redis_connection.expire(key, @configuration[:data_retention_seconds]) + end + end + + # Take the current or given time and round it down to the nearest minute + def align_time_on_minute(time: nil) + time = (time || Time.now.utc).to_i + time - (time % 60) + end + + def maybe_create_outage + data = Breakers.client.redis_connection.multi do + Breakers.client.redis_connection.get(errors_key(time: Time.now.utc)) + Breakers.client.redis_connection.get(errors_key(time: Time.now.utc - 60)) + Breakers.client.redis_connection.get(successes_key(time: Time.now.utc)) + Breakers.client.redis_connection.get(successes_key(time: Time.now.utc - 60)) + end + failure_count = data[0].to_i + data[1].to_i + success_count = data[2].to_i + data[3].to_i + + if failure_count > 0 && success_count == 0 + Outage.create(service: self) + else + failure_rate = failure_count / (failure_count + success_count).to_f + if failure_rate >= @configuration[:error_threshold] / 100.0 + Outage.create(service: self) + end + end + end + end +end diff --git a/lib/breakers/uptime_middleware.rb b/lib/breakers/uptime_middleware.rb new file mode 100644 index 0000000..78fdf21 --- /dev/null +++ b/lib/breakers/uptime_middleware.rb @@ -0,0 +1,87 @@ +require 'faraday' +require 'multi_json' + +module Breakers + class UptimeMiddleware < Faraday::Middleware + def initialize(app) + super(app) + end + + def call(request_env) + service = Breakers.client.service_for_request(request_env: request_env) + + if !service + return @app.call(request_env) + end + + last_outage = service.last_outage + + if last_outage && !last_outage.ended? + if last_outage.ready_for_retest?(wait_seconds: service.seconds_before_retry) + handle_request(service: service, request_env: request_env, current_outage: last_outage) + else + outage_response(outage: last_outage, service: service) + end + else + handle_request(service: service, request_env: request_env) + end + end + + protected + + def outage_response(outage:, service:) + Faraday::Response.new.tap do |response| + response.finish( + status: 503, + body: "Outage detected on #{service.name} beginning at #{outage.start_time.to_i}", + response_headers: {} + ) + end + end + + def handle_request(service:, request_env:, current_outage: nil) + return @app.call(request_env).on_complete do |response_env| + if response_env.status >= 500 + handle_error( + service: service, + request_env: request_env, + response_env: response_env, + error: response_env.status, + current_outage: current_outage + ) + else + service.add_success + current_outage&.end! + + Breakers.client.plugins.each do |plugin| + plugin.on_success(service, request_env, response_env) if plugin.respond_to?(:on_success) + end + end + end + rescue => e + handle_error( + service: service, + request_env: request_env, + response_env: nil, + error: "#{e.class.name} - #{e.message}", + current_outage: current_outage + ) + raise + end + + def handle_error(service:, request_env:, response_env:, error:, current_outage: nil) + service.add_error + current_outage&.update_last_test_time! + + Breakers.client.logger&.warn( + msg: 'Breakers failed request', + service: service.name, + url: request_env.url.to_s, + error: error + ) + Breakers.client.plugins.each do |plugin| + plugin.on_error(service, request_env, response_env) if plugin.respond_to?(:on_error) + end + end + end +end diff --git a/lib/breakers/version.rb b/lib/breakers/version.rb new file mode 100644 index 0000000..87e91c2 --- /dev/null +++ b/lib/breakers/version.rb @@ -0,0 +1,3 @@ +module Breakers + VERSION = '0.1.0'.freeze +end diff --git a/spec/breakers_spec.rb b/spec/breakers_spec.rb new file mode 100644 index 0000000..fd988c6 --- /dev/null +++ b/spec/breakers_spec.rb @@ -0,0 +1,7 @@ +require 'spec_helper' + +describe Breakers do + it 'has a version number' do + expect(Breakers::VERSION).not_to be nil + end +end diff --git a/spec/example_plugin.rb b/spec/example_plugin.rb new file mode 100644 index 0000000..5f5e7db --- /dev/null +++ b/spec/example_plugin.rb @@ -0,0 +1,13 @@ +class ExamplePlugin + def on_outage_begin(outage) + end + + def on_outage_end(outage) + end + + def on_error(service, request_env, response_env) + end + + def on_success(service, request_env, response_env) + end +end diff --git a/spec/integration_spec.rb b/spec/integration_spec.rb new file mode 100644 index 0000000..91d7fa0 --- /dev/null +++ b/spec/integration_spec.rb @@ -0,0 +1,342 @@ +require 'logger' +require 'spec_helper' + +describe 'integration suite' do + let(:redis) { Redis.new } + let(:service) do + Breakers::Service.new( + name: 'VA', + request_matcher: proc { |request_env| request_env.url.host =~ /.*va.gov/ }, + seconds_before_retry: 60, + error_threshold: 50 + ) + end + let(:logger) { Logger.new(nil) } + let(:plugin) { ExamplePlugin.new } + let(:client) do + Breakers::Client.new( + redis_connection: redis, + services: [service], + logger: logger, + plugins: [plugin] + ) + end + let(:connection) do + Faraday.new('http://va.gov') do |conn| + conn.use :breakers + conn.adapter Faraday.default_adapter + end + end + + before do + Breakers.set_client(client) + end + + context 'with a 500' do + let(:now) { Time.now.utc } + + before do + Timecop.freeze(now) + stub_request(:get, 'va.gov').to_return(status: 500) + end + + it 'adds a failure to redis' do + connection.get '/' + rounded_time = now.to_i - (now.to_i % 60) + expect(redis.get("brk-VA-errors-#{rounded_time.to_i}").to_i).to eq(1) + end + + it 'creates an outage' do + connection.get '/' + expect(service.last_outage).to be + end + + it 'logs the error' do + expect(logger).to receive(:warn).with( + msg: 'Breakers failed request', service: 'VA', url: 'http://va.gov/', error: 500 + ) + connection.get '/' + end + + it 'tells plugins about the error' do + expect(plugin).to receive(:on_error).with(service, instance_of(Faraday::Env), instance_of(Faraday::Env)) + connection.get '/' + end + + it 'logs the outage' do + expect(logger).to receive(:error).with(msg: 'Breakers outage beginning', service: 'VA') + connection.get '/' + end + + it 'tells plugins about the outage' do + expect(plugin).to receive(:on_outage_begin).with(instance_of(Breakers::Outage)) + connection.get '/' + end + + it 'lets me query for errors in a time range' do + connection.get '/' + counts = service.errors_in_range(start_time: now - 120, end_time: now, sample_seconds: 60) + count = counts.map { |c| c[:count] }.inject(0) { |a, b| a + b } + expect(count).to eq(1) + end + end + + context 'with a timeout' do + let(:now) { Time.now.utc } + + before do + Timecop.freeze(now) + stub_request(:get, 'va.gov').to_timeout + end + + it 'adds a failure to redis' do + begin + connection.get '/' + rescue Faraday::TimeoutError + end + rounded_time = now.to_i - (now.to_i % 60) + expect(redis.get("brk-VA-errors-#{rounded_time.to_i}").to_i).to eq(1) + end + + it 'raises the exception' do + expect { connection.get '/' }.to raise_error(Faraday::TimeoutError) + end + + it 'logs the error' do + expect(logger).to receive(:warn).with( + msg: 'Breakers failed request', service: 'VA', url: 'http://va.gov/', error: 'Faraday::TimeoutError - execution expired' + ) + + begin + connection.get '/' + rescue Faraday::TimeoutError + end + end + + it 'tells plugins about the timeout' do + expect(plugin).to receive(:on_error).with(service, instance_of(Faraday::Env), nil) + begin + connection.get '/' + rescue Faraday::TimeoutError + end + end + end + + context 'with some other error' do + let(:now) { Time.now.utc } + + before do + Timecop.freeze(now) + stub_request(:get, 'va.gov').to_raise('bogus error') + end + + it 'adds a failure to redis' do + begin + connection.get '/' + rescue + end + rounded_time = now.to_i - (now.to_i % 60) + expect(redis.get("brk-VA-errors-#{rounded_time.to_i}").to_i).to eq(1) + end + + it 'raises the exception' do + expect { connection.get '/' }.to raise_error(StandardError) + end + + it 'logs the error' do + expect(logger).to receive(:warn).with( + msg: 'Breakers failed request', service: 'VA', url: 'http://va.gov/', error: 'StandardError - bogus error' + ) + + begin + connection.get '/' + rescue + end + end + + it 'tells plugins about the timeout' do + expect(plugin).to receive(:on_error).with(service, instance_of(Faraday::Env), nil) + begin + connection.get '/' + rescue + end + end + end + + context 'there is an outage that started less than a minute ago' do + let(:start_time) { Time.now.utc - 30 } + let(:now) { Time.now.utc } + before do + Timecop.freeze(now) + redis.zadd('brk-VA-outages', start_time.to_i, MultiJson.dump(start_time: start_time.to_i)) + end + + it 'should return a 503' do + response = connection.get '/' + expect(response.status).to eq(503) + end + + it 'should include information about the outage in the body' do + response = connection.get '/' + expect(response.body).to eq("Outage detected on VA beginning at #{start_time.to_i}") + end + end + + context 'there is a completed outage' do + let(:start_time) { Time.now.utc - (60 * 60) } + let(:end_time) { Time.now.utc - 60 } + let(:now_time) { Time.now.utc } + before do + Timecop.freeze(now_time) + redis.zadd('brk-VA-outages', start_time.to_i, MultiJson.dump(start_time: start_time.to_i, end_time: end_time)) + stub_request(:get, 'va.gov').to_return(status: 200) + end + + it 'makes the request' do + response = connection.get '/' + expect(response.status).to eq(200) + end + + it 'adds a success to redis' do + connection.get '/' + rounded_time = now_time.to_i - (now_time.to_i % 60) + count = redis.get("brk-VA-successes-#{rounded_time}") + expect(count).to eq('1') + end + + it 'informs the plugin about the success' do + expect(plugin).to receive(:on_success).with(service, instance_of(Faraday::Env), instance_of(Faraday::Env)) + connection.get '/' + end + end + + context 'there is an outage that started over a minute ago' do + let(:start_time) { Time.now.utc - 120 } + let(:now) { Time.now.utc } + before do + Timecop.freeze(now) + redis.zadd('brk-VA-outages', start_time.to_i, MultiJson.dump(start_time: start_time.to_i)) + end + + it 'lets me query for the outage by time range' do + outages = service.outages_in_range(start_time: start_time, end_time: now) + expect(outages.count).to eq(1) + expect(outages.first.start_time.to_i).to eq(start_time.to_i) + expect(outages.first.end_time).to be_nil + end + + context 'and the new request is successful' do + before do + stub_request(:get, 'va.gov').to_return(status: 200, body: 'abcdef') + end + + it 'should make the request' do + connection.get '/' + expect(WebMock).to have_requested(:get, 'va.gov') + end + + it 'returns the data from the response' do + response = connection.get '/' + expect(response.body).to eq('abcdef') + expect(response.status).to eq(200) + end + + it 'calls off the outage' do + connection.get '/' + expect(service.last_outage).to be_ended + end + + it 'logs the end of the outage' do + expect(logger).to receive(:info).with(msg: 'Breakers outage ending', service: 'VA') + connection.get '/' + end + + it 'tells the plugin about the end of the outage' do + expect(plugin).to receive(:on_outage_end).with(instance_of(Breakers::Outage)) + connection.get '/' + end + + it 'records the end time in the outage' do + connection.get '/' + expect(service.last_outage.end_time.to_i).to eq(now.to_i) + end + end + + context 'and the new request is not successful' do + before do + stub_request(:get, 'va.gov').to_return(status: 500, body: 'abcdef') + end + + it 'should make the request' do + connection.get '/' + expect(WebMock).to have_requested(:get, 'va.gov') + end + + it 'returns a 500' do + response = connection.get '/' + expect(response.status).to eq(500) + end + + it 'updates the last_test_time in the outate' do + connection.get '/' + expect(service.last_outage.last_test_time.to_i).to eq(now.to_i) + end + + it 'gets a 503 when making another request' do + connection.get '/' + response = connection.get '/' + expect(response.status).to eq(503) + end + end + end + + context 'on a request to a non-service' do + before do + stub_request(:get, 'http://whitehouse.gov').to_return(status: 200, body: 'POTUS') + end + + it 'returns the status and body from the response' do + response = connection.get('http://whitehouse.gov') + expect(response.status).to eq(200) + expect(response.body).to eq('POTUS') + end + end + + context 'with a bunch of successes over the last few minutes' do + let(:now) { Time.now.utc } + + before do + Timecop.freeze(now - 90) + stub_request(:get, 'va.gov').to_return(status: 200, body: 'abcdef') + 60.times { connection.get '/' } + + Timecop.freeze(now - 30) + stub_request(:get, 'va.gov').to_return(status: 200, body: 'abcdef') + 40.times { connection.get '/' } + end + + it 'does not record an outage on a single failure' do + stub_request(:get, 'va.gov').to_return(status: 500) + connection.get '/' + expect(service.last_outage).not_to be + end + + it 'does not record an outage after 99 errors' do + stub_request(:get, 'va.gov').to_return(status: 500) + 99.times { connection.get '/' } + expect(service.last_outage).not_to be + end + + it 'records an outage after 100 errors' do + stub_request(:get, 'va.gov').to_return(status: 500) + 100.times { connection.get '/' } + expect(service.last_outage).to be + end + + it 'lets me query for successes in a time range' do + counts = service.successes_in_range(start_time: now - 120, end_time: now, sample_seconds: 60) + count = counts.map { |c| c[:count] }.inject(0) { |a, b| a + b } + expect(count).to eq(100) + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..1a2d765 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,19 @@ +$LOAD_PATH.unshift File.expand_path('../lib', __FILE__) + +require 'byebug' +require 'fakeredis/rspec' +require 'faraday' +require 'rspec' +require 'simplecov' +require 'timecop' +require 'webmock/rspec' + +SimpleCov.start do + minimum_coverage 95 +end + +WebMock.disable_net_connect!(allow: '127.0.0.1') + +require 'breakers' + +require_relative 'example_plugin'