From b9eb2f163852af7fd5c2773142f31a30b9d5b160 Mon Sep 17 00:00:00 2001 From: John Prince Date: Tue, 27 Aug 2013 15:40:11 -0600 Subject: [PATCH] version bump and clean up readme --- LICENSE | 5 +- README.rdoc => README.md | 36 +++++++------- VERSION | 1 - lib/histogram.rb | 105 ++++++++++++++++++++------------------- lib/histogram/array.rb | 1 - lib/histogram/narray.rb | 1 - lib/histogram/version.rb | 2 +- spec/histogram_spec.rb | 2 +- 8 files changed, 75 insertions(+), 78 deletions(-) rename README.rdoc => README.md (63%) delete mode 100644 VERSION diff --git a/LICENSE b/LICENSE index 7b877fe..d6ac0d1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,11 +1,10 @@ Copyright (c) 2006, 2007, 2008 The University of Texas at Austin Copyright (c) 2009, University of Colorado at Boulder and Howard Hughes -Medical Institute +Copyright (c) 2013, Brigham Young University The above copyright holders are collectively designated "COPYRIGHT HOLDER" -Software by John T. Prince under the direction of Edward M. Marcotte and -Natalie Ahn. +Software by John T. Prince By using this software the USER indicates that he or she has read, understood and will comply with the following: diff --git a/README.rdoc b/README.md similarity index 63% rename from README.rdoc rename to README.md index 371d7d8..c5d78de 100644 --- a/README.rdoc +++ b/README.md @@ -1,14 +1,12 @@ -= histogram +# histogram -Generates histograms similar to R's hist and numpy's histogram functions. The -interface is relatively stable and a decent test/spec suite is in place, but -please consider this alpha software. Inspired by {Richard Cottons's matlab -implementation}[http://www.mathworks.com/matlabcentral/fileexchange/21033-calculate-number-of-bins-for-histogram] -and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram]. +Generates histograms similar to R's hist and numpy's histogram functions. +Inspired somewhat by [Richard Cottons's matlab implementation](http://www.mathworks.com/matlabcentral/fileexchange/21033-calculate-number-of-bins-for-histogram) +and the wikipedia [histogram article](http://en.wikipedia.org/wiki/Histogram). -== Examples +## Examples -=== Typical usage: +### Typical usage: require 'histogram/array' # enables Array#histogram @@ -18,20 +16,22 @@ and the wikipedia {histogram article}[http://en.wikipedia.org/wiki/Histogram]. (bins, freqs) = data.histogram # equivalent to: data.histogram(:fd, :bin_boundary => :avg) -=== Multiple types of binning behavior: +### Multiple types of binning behavior: # :fd, :sturges, :scott, or :middle (median value between the three methods) data.histogram(:middle) (bins, freqs) = data.histogram(20) # use 20 bins (bins, freqs) = data.histogram([-3,-1,4,5,6]) # custom bins + (bins, freqs) = data.histogram(10, :min => 2, :max => 12) # 10 bins with set min and max + # bins are midpoints, but can be set as minima (bins, freqs) = data.histogram([-3,-1,4,5,6], :bin_boundary => :min) # custom bins with :min # can also set the bin_width (which interpolates between the min and max of the set) (bins, freqs) = data.histogram(:bin_width => 0.5) -=== Multiple Datasets: +### Multiple Datasets: Sometimes, we want to create histograms where the bins are calculated based on all the data sets. That way, the resulting frequencies will all line up: @@ -39,22 +39,22 @@ all the data sets. That way, the resulting frequencies will all line up: # returns [bins, freq1, freq2 ...] (bins, *freqs) = set1.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]]) -=== Histograms with weights/fractions: +### Histograms with weights/fractions: # histogramming with weights data.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3]) -=== Works great with {NArray}[http://narray.rubyforge.org/], too: +### Works with NArray objects - require 'histogram/narray' # enables NArray#histogram + require 'histogram/narray' # enables NArray#histogram # if the calling object is an NArray, the output is two NArrays: - NArray.float(20).random!(3).histogram(20) - # => [bins, freqs] # are both NArray.float objects + (bins, freqs) = NArray.float(20).random!(3).histogram(20) + # bins and freqs are both NArray.float objects -== Installation +## Installation gem install histogram -== See Also +## See Also -aggregate[http://github.com/josephruscio/aggregate], rsruby[http://github.com/alexgutteridge/rsruby] +aggregate(http://github.com/josephruscio/aggregate), rsruby(http://github.com/alexgutteridge/rsruby) diff --git a/VERSION b/VERSION deleted file mode 100644 index 429d94a..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.9 \ No newline at end of file diff --git a/lib/histogram.rb b/lib/histogram.rb index 7da93fa..6d68cfa 100644 --- a/lib/histogram.rb +++ b/lib/histogram.rb @@ -4,54 +4,56 @@ class NArray module Histogram - # returns (min, max) - def self.minmax(obj) - if obj.is_a?(Array) - obj.minmax - else - mn = obj[0] - mx = obj[0] - obj.each do |val| - if val < mn then mn = val end - if val > mx then mx = val end + class << self + # returns (min, max) + def minmax(obj) + if obj.is_a?(Array) + obj.minmax + else + mn = obj[0] + mx = obj[0] + obj.each do |val| + if val < mn then mn = val end + if val > mx then mx = val end + end + [mn, mx] end - [mn, mx] end - end - # returns (mean, standard_dev) - # if size == 0 returns [nil, nil] - def self.sample_stats(obj) - _len = obj.size - return [nil, nil] if _len == 0 - _sum = 0.0 ; _sum_sq = 0.0 - obj.each do |val| - _sum += val - _sum_sq += val * val + # returns (mean, standard_dev) + # if size == 0 returns [nil, nil] + def sample_stats(obj) + _len = obj.size + return [nil, nil] if _len == 0 + _sum = 0.0 ; _sum_sq = 0.0 + obj.each do |val| + _sum += val + _sum_sq += val * val + end + std_dev = _sum_sq - ((_sum * _sum)/_len) + std_dev /= ( _len > 1 ? _len-1 : 1 ) + [_sum.to_f/_len, Math.sqrt(std_dev)] end - std_dev = _sum_sq - ((_sum * _sum)/_len) - std_dev /= ( _len > 1 ? _len-1 : 1 ) - [_sum.to_f/_len, Math.sqrt(std_dev)] - end - # still need to spec this method! - def self.iqrange(obj) - srted = obj.sort - sz = srted.size - if sz % 2 == 0 - median_index_hi = sz / 2 - median_index_lo = (sz / 2) - 1 - # need to check this line for accuracy: - dist = median_index_hi / 2 - fq = srted[median_index_hi + dist] - tq = srted[median_index_lo - dist] - else - median_index = sz / 2 - dist = (median_index + 1) / 2 - fq = srted[median_index - dist] - tq = srted[median_index + dist] + # still need to spec this method! + def iqrange(obj) + srted = obj.sort + sz = srted.size + if sz % 2 == 0 + median_index_hi = sz / 2 + median_index_lo = (sz / 2) - 1 + # need to check this line for accuracy: + dist = median_index_hi / 2 + fq = srted[median_index_hi + dist] + tq = srted[median_index_lo - dist] + else + median_index = sz / 2 + dist = (median_index + 1) / 2 + fq = srted[median_index - dist] + tq = srted[median_index + dist] + end + (tq - fq).to_f end - (tq - fq).to_f end # returns(integer) takes :scott|:sturges|:fd|:middle @@ -62,9 +64,9 @@ def self.iqrange(obj) # implementation}[http://www.mathworks.com/matlabcentral/fileexchange/21033-calculate-number-of-bins-for-histogram] # and the {histogram page on # wikipedia}[http://en.wikipedia.org/wiki/Histogram] - def number_bins(methd=:fd) + def number_of_bins(methd=:fd) if methd == :middle - [:scott, :sturges, :fd].map {|v| number_bins(v) }.sort[1] + [:scott, :sturges, :fd].map {|v| number_of_bins(v) }.sort[1] else range = (self.max - self.min).to_f nbins = @@ -180,7 +182,7 @@ def histogram(*args) all = [self] + other_sets if bins.is_a?(Symbol) - bins = number_bins(bins) + bins = number_of_bins(bins) end weights = @@ -196,16 +198,15 @@ def histogram(*args) calc_min, calc_max = unless opts[:min] && opts[:max] (mins, maxs) = all.map {|ar| Histogram.minmax(ar) }.transpose - end [mins.min, maxs.max] end - _min = opts[:min] || calc_min - _max = opts[:max] || calc_max + end + _min = opts[:min] || calc_min + _max = opts[:max] || calc_max - if opts[:bin_width] - bins = [] - _min.step(_max, opts[:bin_width]) {|v| bins << v } - end + if opts[:bin_width] + bins = [] + _min.step(_max, opts[:bin_width]) {|v| bins << v } end _bins = nil diff --git a/lib/histogram/array.rb b/lib/histogram/array.rb index bb6c033..81e5db7 100644 --- a/lib/histogram/array.rb +++ b/lib/histogram/array.rb @@ -1,4 +1,3 @@ - require 'histogram' class Array diff --git a/lib/histogram/narray.rb b/lib/histogram/narray.rb index d2a2049..a9d5c66 100644 --- a/lib/histogram/narray.rb +++ b/lib/histogram/narray.rb @@ -1,4 +1,3 @@ - require 'histogram' class NArray diff --git a/lib/histogram/version.rb b/lib/histogram/version.rb index b2d65e7..f78958b 100644 --- a/lib/histogram/version.rb +++ b/lib/histogram/version.rb @@ -1,3 +1,3 @@ module Histogram - VERSION = "0.0.10" + VERSION = "0.1.0" end diff --git a/spec/histogram_spec.rb b/spec/histogram_spec.rb index 4180f46..b6604b1 100644 --- a/spec/histogram_spec.rb +++ b/spec/histogram_spec.rb @@ -129,7 +129,7 @@ [:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ| ar = [0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17].extend(Histogram) # these are merely frozen, not checked to see if correct - ar.number_bins(mth).should == answ + ar.number_of_bins(mth).should == answ end end end