diff --git a/README.md b/README.md index 3dc99b9..5a81f9a 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,15 @@ There are some nifty options you can supply to switch things up: -m sets the method used to sample the source audio file, it can either be 'peak' or 'rms'. 'peak' is probably what you want because it looks cooler, but 'rms' is closer to what you actually hear. + -s sets the method used to retrieve samples of audio file, it can either be + 'read' or an array of points within -1..1 range. 'read' is default and samples + from the audio file provided as indicated by the method given in -m + -t sets the type of waveform to render, it can either be 'audio' or 'phonocardiogram'. + 'audio' is default and commonly seen on sites that play music: example of an audio wave http://www.bza.biz/indexhibit/files/gimgs/waveform.gif) + 'phonocardiogram' is specific to heartbeats example: http://www.stethographics.com/newimages/products/phono/murmur.jpg + The core difference between th two is audio plots the absolute value of a sample point + and mirrors it, while the phonocardiogram maintains the original value and does not mirror. + There are also some less-nifty options: diff --git a/bin/waveform b/bin/waveform index 512cb96..feaab61 100755 --- a/bin/waveform +++ b/bin/waveform @@ -39,6 +39,14 @@ optparse = OptionParser.new do |o| options[:method] = method.to_sym end + o.on("-t", "--type TYPE", "Type of waveform generated (can be 'audio' or 'phonocardiogram') -- Default '#{Waveform::DefaultOptions[:type]}'.") do |type| + options[:type] = type.to_sym + end + + o.on("-s", "--samples SAMPLES", "Origin of samples (can be 'read' or an array of floats -1..1) -- Default '#{Waveform::DefaultOptions[:samples]}'.") do |samples| + options[:samples] = samples.to_sym + end + options[:logger] = $stdout o.on("-q", "--quiet", "Don't print anything out when generating waveform") do options[:logger] = nil diff --git a/lib/waveform.rb b/lib/waveform.rb index 7619cd0..e9e89a9 100644 --- a/lib/waveform.rb +++ b/lib/waveform.rb @@ -15,7 +15,9 @@ class Waveform :background_color => "#666666", :color => "#00ccff", :force => false, - :logger => nil + :logger => nil, + :type => :audio, + :samples => :read } TransparencyMask = "#00ff00" @@ -67,6 +69,15 @@ class << self # # :logger => IOStream to log progress to. # + # :type => form of waveform + # Can be :audio or :phonocardiogram + # Default is traditional audio waveform which includes plotting mirrored absolute values of points + # + # :samples => origin of sample data + # Can be array of samples or :read + # Default is :read which means the audio's samples will be created by the gem + # When array of samples is provided, assumption is each float will be between -1 and 1 + # # Example: # Waveform.generate("Kickstart My Heart.wav", "Kickstart My Heart.png") # Waveform.generate("Kickstart My Heart.wav", "Kickstart My Heart.png", :method => :rms) @@ -95,9 +106,7 @@ def generate(source, filename, options={}) # frames are very wide (i.e. the image width is very small) -- I *think* # the larger the frames are, the more "peaky" the waveform should get, # perhaps to the point of inaccurately reflecting the actual sound. - samples = frames(source, options[:width], options[:method]).collect do |frame| - frame.inject(0.0) { |sum, peak| sum + peak } / frame.size - end + samples = retrieve_samples(source, options) @log.timed("\nDrawing...") do # Don't remove the file even if force is true until we're sure the @@ -116,10 +125,20 @@ def generate(source, filename, options={}) private + def retrieve_samples(source, options) + if options[:samples] == :read + samples = frames(source, options[:width], options[:method], options[:type]).collect do |frame| + frame.inject(0.0) { |sum, peak| sum + peak } / frame.size + end + elsif options[:samples].class == Array + samples = options[:samples] + end + end + # Returns a sampling of frames from the given RubyAudio::Sound using the # given method the sample size is determined by the given pixel width -- # we want one sample frame per horizontal pixel. - def frames(source, width, method = :peak) + def frames(source, width, method = :peak, type = :audio) raise ArgumentError.new("Unknown sampling method #{method}") unless [ :peak, :rms ].include?(method) frames = [] @@ -128,10 +147,9 @@ def frames(source, width, method = :peak) frames_read = 0 frames_per_sample = (audio.info.frames.to_f / width.to_f).to_i sample = RubyAudio::Buffer.new("float", frames_per_sample, audio.info.channels) - @log.timed("Sampling #{frames_per_sample} frames per sample: ") do while(frames_read = audio.read(sample)) > 0 - frames << send(method, sample, audio.info.channels) + frames << send(method, sample, audio.info.channels, type) @log.out(".") end end @@ -160,6 +178,22 @@ def draw(samples, options) color = ChunkyPNG::Color.from_hex(options[:color]) end + options[:type] == :audio ? image = drawAudio(samples, image, options, color) : image = drawPhonocardiogram(samples, image, options, color); + + # Simple transparency masking, it just loops over every pixel and makes + # ones which match the transparency mask color completely clear. + if transparent + (0..image.width - 1).each do |x| + (0..image.height - 1).each do |y| + image[x, y] = ChunkyPNG::Color.rgba(0, 0, 0, 0) if image[x, y] == transparent + end + end + end + + image + end + + def drawAudio(samples, image, options, color) # Calling "zero" the middle of the waveform, like there's positive and # negative amplitude zero = options[:height] / 2.0 @@ -171,34 +205,40 @@ def draw(samples, options) # go haywire. image.line(x, (zero - amplitude).round, x, (zero + amplitude).round, color) end + image + end - # Simple transparency masking, it just loops over every pixel and makes - # ones which match the transparency mask color completely clear. - if transparent - (0..image.width - 1).each do |x| - (0..image.height - 1).each do |y| - image[x, y] = ChunkyPNG::Color.rgba(0, 0, 0, 0) if image[x, y] == transparent - end - end - end + def drawPhonocardiogram(samples, image, options, color) + #generally follows drawAudio with minor adjustments to remove mirroring and graph points with negative values (had to channel peaks in order to retain negative values in samples) + + zero = options[:height] / 2.0 + #establish starting point of first line in graph + starting_point = [0, (zero - (samples[0] * options[:height].to_f/2.0).round)] + samples.each_with_index do |sample, x| + amplitude = sample * options[:height].to_f / 2.0 + #connect end of last line with current point in sample data + image.line(starting_point[0], starting_point[1], x, (zero - amplitude).round, color) + #update last point data so next line will begin from correct point + starting_point.replace([x, (zero - amplitude).round]) + end image end # Returns an array of the peak of each channel for the given collection of # frames -- the peak is individual to the channel, and the returned collection # of peaks are not (necessarily) from the same frame(s). - def peak(frames, channels=1) + def peak(frames, channels=1, type) peak_frame = [] (0..channels-1).each do |channel| - peak_frame << channel_peak(frames, channel) + peak_frame << channel_peak(frames, channel, type) end peak_frame end # Returns an array of rms values for the given frameset where each rms value is # the rms value for that channel. - def rms(frames, channels=1) + def rms(frames, channels=1, type) rms_frame = [] (0..channels-1).each do |channel| rms_frame << channel_rms(frames, channel) @@ -213,12 +253,16 @@ def rms(frames, channels=1) # likely still generate the same waveform as the waveform is so comparitively # low resolution to the original input (in most cases), and would increase # the analyzation speed (maybe). - def channel_peak(frames, channel=0) + def channel_peak(frames, channel=0, type) peak = 0.0 frames.each do |frame| next if frame.nil? frame = Array(frame) - peak = frame[channel].abs if frame[channel].abs > peak + if type == :audio + peak = frame[channel].abs if frame[channel].abs > peak + else + peak = frame[channel] + end end peak end diff --git a/lib/waveform/version.rb b/lib/waveform/version.rb index 1b5eb74..e27bf75 100644 --- a/lib/waveform/version.rb +++ b/lib/waveform/version.rb @@ -1,3 +1,3 @@ class Waveform - VERSION = "0.1.2" + VERSION = "0.1.3" end diff --git a/test/waveform_test.rb b/test/waveform_test.rb index 6202917..627d22d 100644 --- a/test/waveform_test.rb +++ b/test/waveform_test.rb @@ -54,6 +54,24 @@ def test_generates_waveform_from_mono_audio_source_via_rms assert_equal ChunkyPNG::Color.from_hex(Waveform::DefaultOptions[:background_color]), image[0, 0] end + def test_generates_phonocardiogram_waveform + Waveform.generate(fixture("sample.wav"), output("phonocardiogram_sample.png"), :type => :phonocardiogram) + assert File.exists?(output("phonocardiogram_sample.png")) + + image = open_png(output("phonocardiogram_sample.png")) + assert_not_equal ChunkyPNG::Color.from_hex(Waveform::DefaultOptions[:color]), image[60, 120] + assert_equal ChunkyPNG::Color.from_hex(Waveform::DefaultOptions[:background_color]), image[0, 0] + end + + def test_generates_phonocardiogram_waveform_via_passed_data + Waveform.generate(fixture("sample.wav"), output("phonocardiogram_array_sample.png"), :type => :phonocardiogram, :array => [-0.052887, -0.074229, -0.094981, -0.100566, -0.090027, -0.084483, -0.088877, -0.088816, -0.089976, -0.090607, -0.085347, -0.075551, -0.056081, -0.033030, -0.011159, 0.008809, 0.016886, 0.017008]) + assert File.exists?(output("phonocardiogram_array_sample.png")) + + image = open_png(output("phonocardiogram_array_sample.png")) + assert_not_equal ChunkyPNG::Color.from_hex(Waveform::DefaultOptions[:color]), image[60, 120] + assert_equal ChunkyPNG::Color.from_hex(Waveform::DefaultOptions[:background_color]), image[0, 0] + end + def test_logs_to_given_io File.open(output("waveform.log"), "w") do |io| Waveform.generate(fixture("sample.wav"), output("logged.png"), :logger => io)