Skip to content

Commit

Permalink
almost have searching complete
Browse files Browse the repository at this point in the history
  • Loading branch information
jtprince committed Feb 7, 2012
1 parent e3cc22b commit 29c5014
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 50 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ pkg

.RData
.Rhistory

*.dataset
138 changes: 88 additions & 50 deletions lib/ms/lipid/search.rb
Original file line number Diff line number Diff line change
@@ -1,48 +1,92 @@
require 'ms/spectrum'
require 'rserve/simpler' # TODO: move to integrated interface with rserve when available
require 'core_ext/array/in_groups'
require 'ms/lipid/search/hit'

module MS
class Lipid
class Search

# A Search::Bin is a range that contains the *entire* query spectrum
# (not just the portion covered by the range). the query spectrum, and
# an EVD that describes the probability that a peak's delta to nearest
# peak is that small by chance.
class Bin < Range
# the intensity value of the query spectrum should be a query
attr_accessor :query_spectrum
attr_accessor :evd

def initialize(range_obj, query_spectrum)
super(range_obj.begin, range_obj.end, range_obj.exclude_end?)
@query_spectrum = query_spectrum
end

# returns the nearest num_hits MS::Lipid::Search::Hits sorted by delta
# [with tie going to the lower m/z]
def nearest_hits(mz, num_hits=1)
mzs = @query_spectrum.mzs
queries = @query_spectrum.intensities
index = @query_spectrum.find_nearest_index(mz)
_min = index - (num_nearest-1)
(_min >= 0) || (_min = 0)
_max = index + (num_nearest-1)
(_max < @query_spectrum.size) || (_max = @query_spectrum - 1)
delta_index_pairs = (_min.._max).map {|i| [mz.-(mzs[i]).abs, i] }.sort[0, num_to_return]
# this could be improved by updating the evd if it happens to jump
# to the next major Search::Bin
# of course, this has the advantage that the next nearest peaks will
# have the same probability distribution as the nearest peak.

delta_index_pairs.map do |delta, index|
hit = Hit.new( :query => queries[index], :observed_mz => mz)
dev = (evd.type == :ppm) ? hit.ppm : hit.delta.abs
hit.pvalue = evd.pvalue(dev)
hit
end
end

def to_range
Range.new( self.begin, self.end, self.exclude_end? )
end
end

class EVD
DEFAULT_TYPE = :ppm
EVD_R = Rserve::Simpler.new
attr_accessor :location, :scale, :shape
# takes location, scale and shape parameters
def initialize(location, scale, shape)
attr_accessor :location, :scale, :shape
# type is :ppm or :amu
attr_accessor :type
def initialize(location, scale, shape, type=DEFAULT_TYPE)
@location, @scale, @shape = location, scale, shape
@type = type
end
# takes a ppm and returns the pvalue
# note that it will take the
def pvalue(ppm)
EVD_R.converse "pgev(log(#{ppm}), #{@location}, #{@scale}, #{@shape})"

# takes a deviation and returns the pvalue
def pvalue(dev)
EVD_R.converse "pgev(log(#{dev}), #{@location}, #{@scale}, #{@shape})"
end

# returns an EVD object
def self.ppms_to_evd(ppms)
%w(ismev evd).each do |lib|
reply = EVD_R.converse "library(#{lib})"
puts "REAPLY: #{reply}"
unless reply.size > 4 # ~roughly
$stderr.puts "The libraries ismev and evd must be installed in your R env!"
$stderr.puts "From within R:"
$stderr.puts %Q{install.packages("ismev") ; install.packages("evd")}
raise "must have R (rserve) and ismev and evd installed!"
end
def self.require_r_library(lib)
reply = EVD_R.converse "library(#{lib})"
puts "REAPLY: #{reply}"
unless reply.size > 4 # ~roughly
$stderr.puts "The libraries ismev and evd must be installed in your R env!"
$stderr.puts "From within R (works best if R is started with sudo or root for installing):"
$stderr.puts %Q{install.packages("ismev") ; install.packages("evd")}
raise "must have R (rserve) and ismev and evd installed!"
end
#ppmsl = ppms.map {|v| Math::log(v) }
#p EVD_R.converse("cor(c(1,2,3), c(4,4,6))", :ppmsl_r => ppmsl )
#params = EVD_R.converse("hist(ppms_r, 50)", :ppms_r => ppms )
params = EVD_R.converse("m <- gev.fit(log(ppms_r)); c(m$mle[1], m$mle[2], m$mle[3])", :ppms_r => ppms )
#EVD_R.command("svggev.diag(m)")
#EVD_R.pause
EVD.new(*params)
end
#ppmsl = log(ppms)
#gev.fit(ppmsl)
# pgev(log10(#{TEST_PVAL}), model$mle[1], model$mle[2], model$mle[3])

# returns an EVD object
def self.deviations_to_evd(type, devs)
%w(ismev evd).each {|lib| require_r_library(lib) }
params = EVD_R.converse("m <- gev.fit(log(devs_r))\n c(m$mle[1], m$mle[2], m$mle[3])", :devs_r => devs )
EVD.new(*params, type)
end
end


STANDARD_MODIFICATIONS = {
:proton => [1,2],
:ammonium => [1],
Expand Down Expand Up @@ -101,48 +145,42 @@ def create_search_spectrum(queries)
MS::Spectrum.new([mzs, query_groups])
end



def create_probability_function(queries, opts={})
opts = STANDARD_SEARCH.merge( opts )

spec = create_search_spectrum(queries)
query_spectrum = create_search_spectrum(queries)

# make sure we get the right bin size based on the input
ss = spec.mzs.size ; optimal_num_groups = 1
ss = query_spectrum.mzs.size ; optimal_num_groups = 1
(1..ss).each do |divisions|
if (ss.to_f / divisions) >= opts[:prob_min_bincnt]
optimal_num_groups = divisions
else ; break
end
end

rng = Random.new
# create a mock spectrum of intensity 1
mz_ranges = []
evds = spec.peaks.in_groups(optimal_num_groups,false).map do |peaks|
mz_range = Range.new( peaks.first.first, peaks.last.first )
mz_ranges << mz_range
random_mzs = opts[:num_rand_samples_per_bin].times.map { rng.rand(mz_range) }
puts "*****************************"
puts "MZRANGE: "
p mz_range
last_peaks_group = nil
search_bins = query_spectrum.peaks.in_groups(optimal_num_groups,false).each_cons(2).map do |peaks1, peaks2|
bin = MS::Lipid::Search::Bin.new( Range.new(peaks1.first.first, peaks2.first.first, true), query_spectrum )
last_peaks_group = peaks2
bin
end
_range = Range.new(last_peaks_group.first.first, last_peaks_group.last.first)
search_bins << MS::Lipid::Search::Bin.new(_range, query_spectrum) # inclusive

search_bins.map do |search_bin|
rng = Random.new
random_mzs = opts[:num_rand_samples_per_bin].times.map { rng.rand(search_bin.to_range) }
# find the deltas
diffs = random_mzs.map do |random_mz|
nearest_random_mz = spec.find_all_nearest(random_mz).first
nearest_random_mz = query_spectrum.find_nearest(random_mz)
delta = (random_mz - nearest_random_mz).abs
opts[:ppm] ? delta./(nearest_random_mz).*(1e6) : delta
end
#File.write("data_#{mz_range}.dataset", "deltas\n" + diffs.join("\n"))
EVD.ppms_to_evd(diffs)
end
File.open("plot_evd_params_ppm_#{!!opt[:ppm]}.data",'w') do |out|
out.puts %w(mzrange loc scale shape).join("\t")
evds.zip(mz_ranges) do |evd, range|
out.puts [range, evd.location, evd.scale, evd.shape].join("\t")
end
EVD.deviations_to_evd(diffs, (opts[:ppm] ? :ppm : :amu))
end
end

end
end
end
Expand Down

0 comments on commit 29c5014

Please sign in to comment.