diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..2d53567 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in histogram.gemspec +gemspec diff --git a/README.rdoc b/README.rdoc index ce8b877..371d7d8 100644 --- a/README.rdoc +++ b/README.rdoc @@ -16,7 +16,7 @@ and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram]. # by default, uses Freedman-Diaconis method to calculate optimal number of bins # and the bin values are midpoints between the bin edges (bins, freqs) = data.histogram - # equivalent to: data.histogram(:fd, :tp => :avg) + # equivalent to: data.histogram(:fd, :bin_boundary => :avg) === Multiple types of binning behavior: @@ -26,7 +26,7 @@ and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram]. (bins, freqs) = data.histogram([-3,-1,4,5,6]) # custom bins # bins are midpoints, but can be set as minima - (bins, freqs) = data.histogram([-3,-1,4,5,6], :tp => :min) # custom bins with :min + (bins, freqs) = data.histogram([-3,-1,4,5,6], :bin_boundary => :min) # custom bins with :min # can also set the bin_width (which interpolates between the min and max of the set) (bins, freqs) = data.histogram(:bin_width => 0.5) @@ -37,13 +37,12 @@ Sometimes, we want to create histograms where the bins are calculated based on all the data sets. That way, the resulting frequencies will all line up: # returns [bins, freq1, freq2 ...] - (bins, *freqs) = data.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]]) + (bins, *freqs) = set1.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]]) === Histograms with weights/fractions: - # histogramming with weights (uses the second array for weights) - w_heights = [data, [3,3,8,8,9,9,3,3,3,3]] - w_heights.histogram(20) + # histogramming with weights + data.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3]) === Works great with {NArray}[http://narray.rubyforge.org/], too: diff --git a/histogram.gemspec b/histogram.gemspec index 22f281f..588caec 100644 --- a/histogram.gemspec +++ b/histogram.gemspec @@ -23,6 +23,7 @@ Gem::Specification.new do |spec| "rake ~> 10.1.0", "simplecov ~> 0.7.1", "rspec ~> 2.13.0", + "narray", ].each do |argline| spec.add_development_dependency *argline.split(' ', 2).compact end diff --git a/lib/histogram.rb b/lib/histogram.rb index 1d0305d..8bce6d1 100644 --- a/lib/histogram.rb +++ b/lib/histogram.rb @@ -117,9 +117,8 @@ def number_bins(methd=:fd) # (bins, *freqs) = ar.histogram(30, :bin_boundary => :avg, :other_sets => [3,3,4,4,5], [-1,0,0,3,3,6]) # (ar_freqs, other1, other2) = freqs # - # # histogramming with heights (uses the second array for heights) - # w_heights = [ar, [3,3,8,8,9,9,3,3,3,3]] - # w_heights.histogram(20) + # # histogramming with weights + # w_weights.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3]) # # # with NArray # require 'histogram/narray' @@ -140,10 +139,8 @@ def number_bins(methd=:fd) # It is useful if you just want a certain number of bins and for the sets # to share the exact same bins. In this case returns [bins, freqs(caller), # freqs1, freqs2 ...] - # * Can also deal with parallel arrays where the first array is the x values - # to histogram and the next array is the y values (or intensities) to be - # applied in the histogram. (checks for !first_value.is_a?(Numeric)) - # * Return value + # * Can also deal with weights. :weights should provide parallel arrays to + # the caller and any :other_sets provided. def histogram(*args) make_freqs_proc = lambda do |obj, len| if obj.is_a?(Array) @@ -186,20 +183,24 @@ def histogram(*args) bins = number_bins(bins) end - have_frac_freqs = !self[0].is_a?(Numeric) + weights = + if opts[:weights] + have_frac_freqs = true + opts[:weights][0].is_a?(Numeric) ? [ opts[:weights] ] : opts[:weights] + else + [] + end # we need to know the limits of the bins if we need to define our own bins if opts[:bin_width] || !bins_array_like - (xvals, yvals) = have_frac_freqs ? [self[0], self[1]] : [self, nil] - _min = opts[:min] || xvals.min - _max = opts[:max] || xvals.max - other_sets.each do |vec| - (xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil] - v_min = opts[:min] || xvals.min - v_max = opts[:max] || xvals.max - if v_min < _min ; _min = v_min end - if v_max > _max ; _max = v_max end - end + calc_min, calc_max = + unless opts[:min] && opts[:max] + (mins, maxs) = all.map(&:minmax).transpose + [mins.min, maxs.max] + end + _min = opts[:min] || calc_min + _max = opts[:max] || calc_max + if opts[:bin_width] bins = [] _min.step(_max, opts[:bin_width]) {|v| bins << v } @@ -220,9 +221,7 @@ def histogram(*args) end case bin_boundary when :avg - freqs_ar = all.map do |vec| - - (xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil] + freqs_ar = all.zip(weights).map do |xvals, yvals| _freqs = make_freqs_proc.call(xvals, bins.size) @@ -251,9 +250,7 @@ def histogram(*args) _freqs end when :min - freqs_ar = all.map do |vec| - - (xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil] + freqs_ar = all.zip(weights).map do |xvals, yvals| #_freqs = VecI.new(bins.size, 0) _freqs = make_freqs_proc.call(xvals, bins.size) @@ -290,9 +287,7 @@ def histogram(*args) NArray.float(bins) end - freqs_ar = all.map do |vec| - - (xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil] + freqs_ar = all.zip(weights).map do |xvals, yvals| # initialize arrays _freqs = make_freqs_proc.call(xvals, bins) diff --git a/spec/histogram_spec.rb b/spec/histogram_spec.rb index f95c0f4..0664354 100644 --- a/spec/histogram_spec.rb +++ b/spec/histogram_spec.rb @@ -2,141 +2,133 @@ require 'histogram' -class Array - def to_f - self.map {|v| v.to_f } - end - - def round(n=nil) - self.map {|v| v.to_f.round(n) } +RSpec::Matchers.define :be_within_rounding_error_of do |expected| + match do |actual| + (act, exp) = [actual, expected].map {|ar| ar.map {|v| v.to_f.round(8) } } + act.should == exp end end shared_examples 'something that can histogram' do it 'makes histograms with the specified number of bins' do (bins, freqs) = obj0.histogram(5) - bins.should be_a(obj0.class) - freqs.should be_a(obj0.class) - bins.round(8).should == [1,3,5,7,9].round(8) - freqs.round(8).should == [2,2,2,2,3].round(8) + [bins, freqs].each {|ar| ar.should be_a(obj0.class) } + [bins,freqs].zip( [ [1,3,5,7,9], [2,2,2,2,3] ] ).each do |ar, exp| + ar.should be_within_rounding_error_of exp + end end it 'returns bins as the min boundary if given that option' do (bins, freqs) = obj0.histogram(5, :bin_boundary => :min) - bins.round(8).should == [0,2,4,6,8].round(8) - freqs.round(8).should == [2,2,2,2,3].round(8) + [bins, freqs].zip( [ [0,2,4,6,8], [2,2,2,2,3] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end end it 'makes histograms when given the bins' do - bins, freqs = obj1.histogram([1,3,5,7,9], :bin_boundary => :avg) - bins.round(8).should == [1,3,5,7,9].round(8) - freqs.round(8).should == [3,1,1,2,3].round(8) + bins, freqs = obj1.histogram([1,3,5,7,9]) + [bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end end it 'interprets bins as the min boundary when given the bin_boundary option' do bins, freqs = obj2.histogram([1,3,5,7,9], :bin_boundary => :min) - bins.round(8).should == [1,3,5,7,9].round(8) - freqs.round(8).should == [3,0,2,2,3].round(8) + [bins, freqs].zip( [ [1,3,5,7,9], [3,0,2,2,3] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end end -# it 'can histogram multiple sets' do - #(bins, freq1, freq2, freq3) = @obj4.histogram([1,2,3,4], :tp => :avg, :other_sets => [@obj5, @obj5]) - #bins.enums [1,2,3,4].to_f - #freq1.enums [2.0, 2.0, 2.0, 3.0] - #freq2.enums [0.0, 5.0, 0.0, 1.0] - #freq3.enums freq2 - #end + it 'can histogram multiple sets' do + (bins, freq1, freq2, freq3) = obj3.histogram([1,2,3,4], :other_sets => [obj4, obj4]) + bins.should be_within_rounding_error_of [1,2,3,4] + freq1.should be_within_rounding_error_of [2.0, 2.0, 2.0, 3.0] + freq2.should be_within_rounding_error_of [0.0, 5.0, 0.0, 1.0] + freq3.should be_within_rounding_error_of freq2 + end + + it 'works with a given min val' do + (bins, freqs) = obj5.histogram(4, :min => 2, :bin_boundary => :min) + [bins, freqs].zip( [ [2.0, 3.5, 5.0, 6.5], [4.0, 1.0, 2.0, 3.0] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end + end + + it 'works with a given max val' do + (bins, freqs) = obj5.histogram(4, :max => 7, :bin_boundary => :min) + [bins, freqs].zip( [ [1.0, 2.5, 4.0, 5.5] ,[2.0, 3.0, 2.0, 3.0] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end + end + + it 'works with given min/max vals' do + (bins, freqs) = obj5.histogram(4, :min => 2, :max => 7, :bin_boundary => :min) + [bins, freqs].zip( [ [2.0, 3.25, 4.5, 5.75], [4.0, 1.0, 1.0, 4.0] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end + end + + it 'can use equal weights' do + weights = Array.new(obj1.size, 3) + bins, freqs = obj1.histogram([1,3,5,7,9], :weights => weights) + [bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3].map {|v| v * 3} ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end + end + + it 'can use unequal weights' do + weights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2] + (bins, freqs) = obj1.histogram([1,3,5,7,9], :weights => weights) + [bins, freqs].zip( [ [1,3,5,7,9], [10, 0, 50, 0, 0.4] ] ) do |ar, exp| + ar.should be_within_rounding_error_of exp + end + end end describe Histogram do - let(:data) do - [ (0..10).to_a, - [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9], - [-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10], - ].to_f + tmp = { + obj0: (0..10).to_a, + obj1: [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9], + obj2: [-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10], + obj3: [1, 1, 2, 2, 3, 3, 4, 4, 4], + obj4: [2, 2, 2, 2, 2, 4], + obj5: [1,2,3,3,3,4,5,6,7,8], + } + data = tmp.each {|k,v| [k, v.map(&:to_f).extend(Histogram)] } + + let(:data) { data } + + data.each do |obj, ar| + let(obj) { ar.map(&:to_f).extend(Histogram) } end describe Array do - it_behaves_like 'something that can histogram' do - [:obj0, :obj1, :obj2].each_with_index do |obj,i| - let(obj) { data[i].dup.extend(Histogram) } + it_behaves_like 'something that can histogram' + end + + begin + describe NArray do + data.each do |obj, ar| + let(obj) { NArray.to_na(ar).to_f.extend(Histogram) } end + it_behaves_like 'something that can histogram' end + rescue + puts "" + puts "YOU NEED NArray installed to run NArray tests!" + puts "" end -end + describe 'calculating bins' do + it 'calculates :sturges, :scott, :fd, or :middle' do + answers = [6,3,4,4] + [:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ| + ar = [0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17].extend(Histogram) + # these are merely frozen, not checked to see if correct + ar.number_bins(mth).should == answ + end + end + end +end - #it 'can take height values' do - #obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9] - #heights = Array.new(obj2.size, 3) - #obj = [obj2, heights] - #bins, freqs = obj.histogram([1,3,5,7,9], :tp => :avg) - #bins.enums [1,3,5,7,9].to_f - #freqs.enums [3,1,1,2,3].map {|v| v * 3} - - #obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9] - #heights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2] - #obj = [obj2, heights] - #(bins, freqs) = obj.histogram([1,3,5,7,9], :tp => :avg) - #bins.enums [1,3,5,7,9].to_f - #freqs.enums [10, 0, 50, 0, 0.4] - #end - - #it 'works with given min and max vals' do - #[1,2,3,3,3,4,5,6,7,8].histogram(4, :min => 2, :tp => :min).first.first.is 2.0 - #[1,2,3,3,3,4,5,6,7,8].histogram(4, :max => 7, :tp => :min).first.last.is 5.5 # since the bin-width is 1.5 - #bs = [1,2,3,3,3,4,5,6,7,8].histogram(4, :min => 2, :max => 7, :tp => :min) - #bs.first.first.is 2.0 - #bs.first.last.is 5.75 # bin-width of 1.25 - #end - - - - - - -#TestArrays = [[0,1,2,3,4,5,6,7,8,9,10], [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9], - #[-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10], [1, 1, 2, 2, 3, 3, 4, 4, 4], - #[2, 2, 2, 2, 2, 4]] - -#require 'histogram/array' -#class LilClass < Array - #include Histogram -#end - -#describe 'calculating bins' do - #it 'calculates :sturges, :scott, :fd, or :middle' do - #answers = [6,3,4,4] - #[:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ| - #ar = LilClass.new([0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17]) - ## these are merely frozen, not checked to see if correct - #ar.number_bins(mth).is answ - #end - #end -#end - -#describe 'histogramming an Array' do - #before do - #TestArrays.each_with_index do |ar,i| - #instance_variable_set("@obj#{i+1}", ar) - #end - #end - #behaves_like 'a histogram' -#end - -#begin - #require 'histogram/narray' - #describe 'histogramming an NArray' do - #before do - #TestArrays.each_with_index do |ar,i| - #instance_variable_set("@obj#{i+1}", NArray.to_na(ar).to_f) - #end - #end - #behaves_like 'a histogram' - #end -#rescue LoadError - #puts "" - #puts "YOU NEED NArray installed to run NArray tests!" - #puts "" -#end