Skip to content

Commit

Permalink
working on getting narray working again
Browse files Browse the repository at this point in the history
  • Loading branch information
jtprince committed Aug 27, 2013
1 parent b63bb5b commit 3c18391
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 140 deletions.
4 changes: 4 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
source 'https://rubygems.org'

# Specify your gem's dependencies in histogram.gemspec
gemspec
11 changes: 5 additions & 6 deletions README.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram].
# by default, uses Freedman-Diaconis method to calculate optimal number of bins
# and the bin values are midpoints between the bin edges
(bins, freqs) = data.histogram
# equivalent to: data.histogram(:fd, :tp => :avg)
# equivalent to: data.histogram(:fd, :bin_boundary => :avg)

=== Multiple types of binning behavior:

Expand All @@ -26,7 +26,7 @@ and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram].
(bins, freqs) = data.histogram([-3,-1,4,5,6]) # custom bins

# bins are midpoints, but can be set as minima
(bins, freqs) = data.histogram([-3,-1,4,5,6], :tp => :min) # custom bins with :min
(bins, freqs) = data.histogram([-3,-1,4,5,6], :bin_boundary => :min) # custom bins with :min

# can also set the bin_width (which interpolates between the min and max of the set)
(bins, freqs) = data.histogram(:bin_width => 0.5)
Expand All @@ -37,13 +37,12 @@ Sometimes, we want to create histograms where the bins are calculated based on
all the data sets. That way, the resulting frequencies will all line up:

# returns [bins, freq1, freq2 ...]
(bins, *freqs) = data.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]])
(bins, *freqs) = set1.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]])

=== Histograms with weights/fractions:

# histogramming with weights (uses the second array for weights)
w_heights = [data, [3,3,8,8,9,9,3,3,3,3]]
w_heights.histogram(20)
# histogramming with weights
data.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3])

=== Works great with {NArray}[http://narray.rubyforge.org/], too:

Expand Down
1 change: 1 addition & 0 deletions histogram.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
"rake ~> 10.1.0",
"simplecov ~> 0.7.1",
"rspec ~> 2.13.0",
"narray",
].each do |argline|
spec.add_development_dependency *argline.split(' ', 2).compact
end
Expand Down
49 changes: 22 additions & 27 deletions lib/histogram.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,8 @@ def number_bins(methd=:fd)
# (bins, *freqs) = ar.histogram(30, :bin_boundary => :avg, :other_sets => [3,3,4,4,5], [-1,0,0,3,3,6])
# (ar_freqs, other1, other2) = freqs
#
# # histogramming with heights (uses the second array for heights)
# w_heights = [ar, [3,3,8,8,9,9,3,3,3,3]]
# w_heights.histogram(20)
# # histogramming with weights
# w_weights.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3])
#
# # with NArray
# require 'histogram/narray'
Expand All @@ -140,10 +139,8 @@ def number_bins(methd=:fd)
# It is useful if you just want a certain number of bins and for the sets
# to share the exact same bins. In this case returns [bins, freqs(caller),
# freqs1, freqs2 ...]
# * Can also deal with parallel arrays where the first array is the x values
# to histogram and the next array is the y values (or intensities) to be
# applied in the histogram. (checks for !first_value.is_a?(Numeric))
# * Return value
# * Can also deal with weights. :weights should provide parallel arrays to
# the caller and any :other_sets provided.
def histogram(*args)
make_freqs_proc = lambda do |obj, len|
if obj.is_a?(Array)
Expand Down Expand Up @@ -186,20 +183,24 @@ def histogram(*args)
bins = number_bins(bins)
end

have_frac_freqs = !self[0].is_a?(Numeric)
weights =
if opts[:weights]
have_frac_freqs = true
opts[:weights][0].is_a?(Numeric) ? [ opts[:weights] ] : opts[:weights]
else
[]
end

# we need to know the limits of the bins if we need to define our own bins
if opts[:bin_width] || !bins_array_like
(xvals, yvals) = have_frac_freqs ? [self[0], self[1]] : [self, nil]
_min = opts[:min] || xvals.min
_max = opts[:max] || xvals.max
other_sets.each do |vec|
(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
v_min = opts[:min] || xvals.min
v_max = opts[:max] || xvals.max
if v_min < _min ; _min = v_min end
if v_max > _max ; _max = v_max end
end
calc_min, calc_max =
unless opts[:min] && opts[:max]
(mins, maxs) = all.map(&:minmax).transpose
[mins.min, maxs.max]
end
_min = opts[:min] || calc_min
_max = opts[:max] || calc_max

if opts[:bin_width]
bins = []
_min.step(_max, opts[:bin_width]) {|v| bins << v }
Expand All @@ -220,9 +221,7 @@ def histogram(*args)
end
case bin_boundary
when :avg
freqs_ar = all.map do |vec|

(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
freqs_ar = all.zip(weights).map do |xvals, yvals|

_freqs = make_freqs_proc.call(xvals, bins.size)

Expand Down Expand Up @@ -251,9 +250,7 @@ def histogram(*args)
_freqs
end
when :min
freqs_ar = all.map do |vec|

(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
freqs_ar = all.zip(weights).map do |xvals, yvals|

#_freqs = VecI.new(bins.size, 0)
_freqs = make_freqs_proc.call(xvals, bins.size)
Expand Down Expand Up @@ -290,9 +287,7 @@ def histogram(*args)
NArray.float(bins)
end

freqs_ar = all.map do |vec|

(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
freqs_ar = all.zip(weights).map do |xvals, yvals|

# initialize arrays
_freqs = make_freqs_proc.call(xvals, bins)
Expand Down
206 changes: 99 additions & 107 deletions spec/histogram_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,141 +2,133 @@

require 'histogram'

class Array
def to_f
self.map {|v| v.to_f }
end

def round(n=nil)
self.map {|v| v.to_f.round(n) }
RSpec::Matchers.define :be_within_rounding_error_of do |expected|
match do |actual|
(act, exp) = [actual, expected].map {|ar| ar.map {|v| v.to_f.round(8) } }
act.should == exp
end
end

shared_examples 'something that can histogram' do
it 'makes histograms with the specified number of bins' do
(bins, freqs) = obj0.histogram(5)
bins.should be_a(obj0.class)
freqs.should be_a(obj0.class)
bins.round(8).should == [1,3,5,7,9].round(8)
freqs.round(8).should == [2,2,2,2,3].round(8)
[bins, freqs].each {|ar| ar.should be_a(obj0.class) }
[bins,freqs].zip( [ [1,3,5,7,9], [2,2,2,2,3] ] ).each do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'returns bins as the min boundary if given that option' do
(bins, freqs) = obj0.histogram(5, :bin_boundary => :min)
bins.round(8).should == [0,2,4,6,8].round(8)
freqs.round(8).should == [2,2,2,2,3].round(8)
[bins, freqs].zip( [ [0,2,4,6,8], [2,2,2,2,3] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'makes histograms when given the bins' do
bins, freqs = obj1.histogram([1,3,5,7,9], :bin_boundary => :avg)
bins.round(8).should == [1,3,5,7,9].round(8)
freqs.round(8).should == [3,1,1,2,3].round(8)
bins, freqs = obj1.histogram([1,3,5,7,9])
[bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'interprets bins as the min boundary when given the bin_boundary option' do
bins, freqs = obj2.histogram([1,3,5,7,9], :bin_boundary => :min)
bins.round(8).should == [1,3,5,7,9].round(8)
freqs.round(8).should == [3,0,2,2,3].round(8)
[bins, freqs].zip( [ [1,3,5,7,9], [3,0,2,2,3] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

# it 'can histogram multiple sets' do
#(bins, freq1, freq2, freq3) = @obj4.histogram([1,2,3,4], :tp => :avg, :other_sets => [@obj5, @obj5])
#bins.enums [1,2,3,4].to_f
#freq1.enums [2.0, 2.0, 2.0, 3.0]
#freq2.enums [0.0, 5.0, 0.0, 1.0]
#freq3.enums freq2
#end
it 'can histogram multiple sets' do
(bins, freq1, freq2, freq3) = obj3.histogram([1,2,3,4], :other_sets => [obj4, obj4])
bins.should be_within_rounding_error_of [1,2,3,4]
freq1.should be_within_rounding_error_of [2.0, 2.0, 2.0, 3.0]
freq2.should be_within_rounding_error_of [0.0, 5.0, 0.0, 1.0]
freq3.should be_within_rounding_error_of freq2
end

it 'works with a given min val' do
(bins, freqs) = obj5.histogram(4, :min => 2, :bin_boundary => :min)
[bins, freqs].zip( [ [2.0, 3.5, 5.0, 6.5], [4.0, 1.0, 2.0, 3.0] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'works with a given max val' do
(bins, freqs) = obj5.histogram(4, :max => 7, :bin_boundary => :min)
[bins, freqs].zip( [ [1.0, 2.5, 4.0, 5.5] ,[2.0, 3.0, 2.0, 3.0] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'works with given min/max vals' do
(bins, freqs) = obj5.histogram(4, :min => 2, :max => 7, :bin_boundary => :min)
[bins, freqs].zip( [ [2.0, 3.25, 4.5, 5.75], [4.0, 1.0, 1.0, 4.0] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'can use equal weights' do
weights = Array.new(obj1.size, 3)
bins, freqs = obj1.histogram([1,3,5,7,9], :weights => weights)
[bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3].map {|v| v * 3} ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

it 'can use unequal weights' do
weights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2]
(bins, freqs) = obj1.histogram([1,3,5,7,9], :weights => weights)
[bins, freqs].zip( [ [1,3,5,7,9], [10, 0, 50, 0, 0.4] ] ) do |ar, exp|
ar.should be_within_rounding_error_of exp
end
end

end

describe Histogram do
let(:data) do
[ (0..10).to_a,
[0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9],
[-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10],
].to_f
tmp = {
obj0: (0..10).to_a,
obj1: [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9],
obj2: [-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10],
obj3: [1, 1, 2, 2, 3, 3, 4, 4, 4],
obj4: [2, 2, 2, 2, 2, 4],
obj5: [1,2,3,3,3,4,5,6,7,8],
}
data = tmp.each {|k,v| [k, v.map(&:to_f).extend(Histogram)] }

let(:data) { data }

data.each do |obj, ar|
let(obj) { ar.map(&:to_f).extend(Histogram) }
end

describe Array do
it_behaves_like 'something that can histogram' do
[:obj0, :obj1, :obj2].each_with_index do |obj,i|
let(obj) { data[i].dup.extend(Histogram) }
it_behaves_like 'something that can histogram'
end

begin
describe NArray do
data.each do |obj, ar|
let(obj) { NArray.to_na(ar).to_f.extend(Histogram) }
end
it_behaves_like 'something that can histogram'
end
rescue
puts ""
puts "YOU NEED NArray installed to run NArray tests!"
puts ""
end
end

describe 'calculating bins' do
it 'calculates :sturges, :scott, :fd, or :middle' do
answers = [6,3,4,4]
[:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ|
ar = [0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17].extend(Histogram)
# these are merely frozen, not checked to see if correct
ar.number_bins(mth).should == answ
end
end
end
end

#it 'can take height values' do
#obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
#heights = Array.new(obj2.size, 3)
#obj = [obj2, heights]
#bins, freqs = obj.histogram([1,3,5,7,9], :tp => :avg)
#bins.enums [1,3,5,7,9].to_f
#freqs.enums [3,1,1,2,3].map {|v| v * 3}

#obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
#heights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2]
#obj = [obj2, heights]
#(bins, freqs) = obj.histogram([1,3,5,7,9], :tp => :avg)
#bins.enums [1,3,5,7,9].to_f
#freqs.enums [10, 0, 50, 0, 0.4]
#end

#it 'works with given min and max vals' do
#[1,2,3,3,3,4,5,6,7,8].histogram(4, :min => 2, :tp => :min).first.first.is 2.0
#[1,2,3,3,3,4,5,6,7,8].histogram(4, :max => 7, :tp => :min).first.last.is 5.5 # since the bin-width is 1.5
#bs = [1,2,3,3,3,4,5,6,7,8].histogram(4, :min => 2, :max => 7, :tp => :min)
#bs.first.first.is 2.0
#bs.first.last.is 5.75 # bin-width of 1.25
#end






#TestArrays = [[0,1,2,3,4,5,6,7,8,9,10], [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9],
#[-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10], [1, 1, 2, 2, 3, 3, 4, 4, 4],
#[2, 2, 2, 2, 2, 4]]

#require 'histogram/array'
#class LilClass < Array
#include Histogram
#end

#describe 'calculating bins' do
#it 'calculates :sturges, :scott, :fd, or :middle' do
#answers = [6,3,4,4]
#[:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ|
#ar = LilClass.new([0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17])
## these are merely frozen, not checked to see if correct
#ar.number_bins(mth).is answ
#end
#end
#end

#describe 'histogramming an Array' do
#before do
#TestArrays.each_with_index do |ar,i|
#instance_variable_set("@obj#{i+1}", ar)
#end
#end
#behaves_like 'a histogram'
#end

#begin
#require 'histogram/narray'
#describe 'histogramming an NArray' do
#before do
#TestArrays.each_with_index do |ar,i|
#instance_variable_set("@obj#{i+1}", NArray.to_na(ar).to_f)
#end
#end
#behaves_like 'a histogram'
#end
#rescue LoadError
#puts ""
#puts "YOU NEED NArray installed to run NArray tests!"
#puts ""
#end

0 comments on commit 3c18391

Please sign in to comment.