Skip to content

Commit

Permalink
moved over to more liberal options and dynamic finding of num bins
Browse files Browse the repository at this point in the history
  • Loading branch information
jtprince committed Jan 27, 2010
1 parent ba19c13 commit 629ffa7
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 47 deletions.
3 changes: 3 additions & 0 deletions bin/histogram
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/ruby


raise NotImplementedError, "this executable still needs some work"

require 'optparse'
require 'rubygems'
require 'histogram'
Expand Down
98 changes: 66 additions & 32 deletions lib/histogram.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@

begin
require 'narray'
rescue loaderror
class narray
rescue LoadError
class NArray
end
end

module histogram
module Histogram

# returns (min, max)
def self.min_max(obj)
Expand All @@ -31,7 +31,7 @@ def self.sample_stats(obj)
end
std_dev = _sum_sq - ((_sum * _sum)/_len)
std_dev /= ( _len > 1 ? _len-1 : 1 )
[_sum.to_f/_len, math.sqrt(std_dev)]
[_sum.to_f/_len, Math.sqrt(std_dev)]
end

def self.iqrange(obj)
Expand Down Expand Up @@ -69,18 +69,21 @@ def number_bins(methd=:fd)
case methd
when :scott
(mean, stddev) = Histogram.sample_stats(self)
range / ( 3.5*stddev*(ar.size**(-1.0/3)) )
range / ( 3.5*stddev*(self.size**(-1.0/3)) )
when :sturges
(Math::log(self.size)/Math::log(2)) + 1
when :fd
range / ( 2*Histogram.iqrange(self)*ar.size**(-1.0/3) )
range / ( 2*Histogram.iqrange(self)*self.size**(-1.0/3) )
end
nbins = 1 if num <= 0
nbins = 1 if nbins <= 0
nbins.ceil.to_i
end
end

# Returns [bins, freqs]
#
# histogram(bins, opts)
# histogram(opts)
#
# Options:
#
Expand All @@ -95,17 +98,19 @@ def number_bins(methd=:fd)
# :min bins specify the minima for binning
#
# :bin_width => <float> width of a bin (overrides :bins)
#
# :other_sets => an array of other sets to histogram
#
# Examples
#
# require 'histogram/array'
# ar = [-2,1,2,3,3,3,4,5,6,6]
# # these return: [bins, freqencies]
# ar.histogram(20) # use 20 bins
# ar.histogram([-3,-1,4,5,6], :avg) # custom bins
# ar.histogram([-3,-1,4,5,6], :tp => :avg) # custom bins
#
# # returns [bins, freq1, freq2 ...]
# (bins, *freqs) = ar.histogram(30, :avg, [3,3,4,4,5], [-1,0,0,3,3,6])
# (bins, *freqs) = ar.histogram(30, :tp => :avg, :other_sets => [3,3,4,4,5], [-1,0,0,3,3,6])
# (ar_freqs, other1, other2) = freqs
#
# # histogramming with heights (uses the second array for heights)
Expand Down Expand Up @@ -135,26 +140,64 @@ def number_bins(methd=:fd)
# to histogram and the next array is the y values (or intensities) to be
# applied in the histogram. (checks for !first_value.is_a?(Numeric))
# * Return value
def histogram(opts={})
DEFAULT_OPTS = {
:bins => nil,
:tp => :avg,
:other_sets => []
}

make_freqs = lambda do |obj, len|
def histogram(*args)
make_freqs_proc = lambda do |obj, len|
if obj.is_a?(Array)
Array.new(len, 0.0)
elsif obj.is_a?(NArray)
NArray.float(len)
end
end

if args.size == 2
(bins, opts) = args
elsif args.size == 1
arg = args.shift
if arg.is_a?(Hash)
opts = arg
puts "ITs a hash!"
else
bins = arg
opts = {}
end
end

opts = ({ :tp => :avg, :other_sets => [] }).merge(opts)

bins = opts[:bins] if bins.equal?(opts)
bins = opts[:bins] if opts[:bins]

tp = opts[:tp]
other_sets = opts[:other_sets]

bins_array_like = bins.kind_of?(Array) || bins.kind_of?(NArray) || opts[:bin_width]
all = [self] + other_sets

if bins.is_a?(Symbol)
bins = number_bins(bins)
end

have_frac_freqs = !self[0].is_a?(Numeric)

# we need to know the limits of the bins if we need to define our own bins
if opts[:bin_width] || !bins_array_like
(xvals, yvals) = have_frac_freqs ? [self[0], self[1]] : [self, nil]
_min, _max = Histogram.min_max(xvals)
other_sets.each do |vec|
(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
v_min, v_max = Histogram.min_max(xvals)
if v_min < _min ; _min = v_min end
if v_max > _max ; _max = v_max end
end
if opts[:bin_width]
bins = []
_min.step(_max, opts[:bin_width]) {|v| bins << v }
end
end

_bins = nil
_freqs = nil
have_frac_freqs = !self[0].is_a?(Numeric)
if bins.kind_of?(Array) || bins.kind_of?(NArray)
if bins_array_like
########################################################
# ARRAY BINS:
########################################################
Expand All @@ -170,7 +213,7 @@ def histogram(opts={})

(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]

_freqs = make_freqs.call(xvals, bins.size)
_freqs = make_freqs_proc.call(xvals, bins.size)

break_points = []
(0...(bins.size)).each do |i|
Expand Down Expand Up @@ -202,7 +245,7 @@ def histogram(opts={})
(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]

#_freqs = VecI.new(bins.size, 0)
_freqs = make_freqs.call(xvals, bins.size)
_freqs = make_freqs_proc.call(xvals, bins.size)
(0...(xvals.size)).each do |i|
val = xvals[i]
height = have_frac_freqs ? yvals[i] : 1
Expand All @@ -220,21 +263,12 @@ def histogram(opts={})
_freqs
end
end
else
########################################################
# NUMBER OF BINS:
########################################################
else
# Create the scaling factor

(xvals, yvals) = have_frac_freqs ? [self[0], self[1]] : [self, nil]
_min, _max = Histogram.min_max(xvals)
other_sets.each do |vec|
(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]
v_min, v_max = Histogram.min_max(xvals)
if v_min < _min ; _min = v_min end
if v_max > _max ; _max = v_max end
end

dmin = _min.to_f
conv = bins.to_f/(_max - _min)

Expand All @@ -250,7 +284,7 @@ def histogram(opts={})
(xvals, yvals) = have_frac_freqs ? [vec[0], vec[1]] : [vec, nil]

# initialize arrays
_freqs = make_freqs.call(xvals, bins)
_freqs = make_freqs_proc.call(xvals, bins)
_len = size

# Create the histogram:
Expand Down
36 changes: 21 additions & 15 deletions spec/histogram_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
require File.expand_path(File.dirname(__FILE__) + "/spec_helper.rb")


class Array
def to_f
self.map {|v| v.to_f }
Expand All @@ -10,8 +9,8 @@ def to_f
shared 'a histogram' do

before do
# @obj1 #(0..10)
# obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
#@obj1 = (0..10).to_a
#@obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
end

it 'can make histograms (bins created on the fly)' do
Expand All @@ -22,23 +21,23 @@ def to_f
bins.enums [1,3,5,7,9].to_f
freqs.enums [2,2,2,2,3].to_f

bins,freqs = @obj1.histogram(5, :min)
bins,freqs = @obj1.histogram(5, :tp => :min)
bins.enums [0,2,4,6,8].to_f
freqs.enums [2,2,2,2,3].to_f
end

it 'can make histograms (given bins)' do
# Test with given bins:
bins, freqs = @obj2.histogram([1,3,5,7,9], :avg)
bins, freqs = @obj2.histogram([1,3,5,7,9], :tp => :avg)
bins.enums [1,3,5,7,9].to_f
freqs.enums [3,1,1,2,3].to_f
bins, freqs = @obj3.histogram([1,3,5,7,9], :min)
bins, freqs = @obj3.histogram([1,3,5,7,9], :tp => :min)
bins.enums [1,3,5,7,9].to_f
freqs.enums [3,0,2,2,3].to_f
end

it 'can histogram multiple sets' do
(bins, freq1, freq2, freq3) = @obj4.histogram([1,2,3,4], :avg, @obj5, @obj5)
(bins, freq1, freq2, freq3) = @obj4.histogram([1,2,3,4], :tp => :avg, :other_sets => [@obj5, @obj5])
bins.enums [1,2,3,4].to_f
freq1.enums [2.0, 2.0, 2.0, 3.0]
freq2.enums [0.0, 5.0, 0.0, 1.0]
Expand All @@ -49,14 +48,14 @@ def to_f
obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
heights = Array.new(obj2.size, 3)
obj = [obj2, heights]
bins, freqs = obj.histogram([1,3,5,7,9], :avg)
bins, freqs = obj.histogram([1,3,5,7,9], :tp => :avg)
bins.enums [1,3,5,7,9].to_f
freqs.enums [3,1,1,2,3].map {|v| v * 3}

obj2 = [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9]
heights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2]
obj = [obj2, heights]
(bins, freqs) = obj.histogram([1,3,5,7,9], :avg)
(bins, freqs) = obj.histogram([1,3,5,7,9], :tp => :avg)
bins.enums [1,3,5,7,9].to_f
freqs.enums [10, 0, 50, 0, 0.4]
end
Expand All @@ -66,15 +65,22 @@ def to_f
[-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10], [1, 1, 2, 2, 3, 3, 4, 4, 4],
[2, 2, 2, 2, 2, 4]]

require 'histogram/array'
class LilClass < Array
include Histogram
end

describe 'calculating bins' do
it 'calculates :sturges, :scott, or :fd' do

it 'calculates :sturges, :scott, :fd, or :middle' do
answers = [6,3,4,4]
[:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ|
ar = LilClass.new([0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17])
# these are merely frozen, not checked to see if correct
ar.number_bins(mth).is answ
end
end
end



require 'histogram/array'
describe 'histogramming an Array' do
before do
TestArrays.each_with_index do |ar,i|
Expand All @@ -86,7 +92,7 @@ def to_f

begin
require 'histogram/narray'
describe 'histogramming an NArray' do
xdescribe 'histogramming an NArray' do
before do
TestArrays.each_with_index do |ar,i|
instance_variable_set("@obj#{i+1}", NArray.to_na(ar).to_f)
Expand Down

0 comments on commit 629ffa7

Please sign in to comment.