From bc8d03e9467fd20ad13505682619f8e7d621ce97 Mon Sep 17 00:00:00 2001 From: Grzegorz Mrukwa Date: Thu, 4 Jan 2018 01:15:59 +0100 Subject: [PATCH 1/4] Remove all MATLAB files --- src/MATLAB/DiviK/demo.m | 9 - src/MATLAB/DiviK/libs/GMM_library/EM_iter.m | 86 --- .../DiviK/libs/GMM_library/Hubert_out.m | 102 --- .../DiviK/libs/GMM_library/anal_g_merge.m | 104 --- .../DiviK/libs/GMM_library/default_gmm_opts.m | 32 - .../DiviK/libs/GMM_library/dyn_pr_split_w.m | 44 -- .../libs/GMM_library/dyn_pr_split_w_aux.m | 16 - src/MATLAB/DiviK/libs/GMM_library/find_ic.m | 90 --- .../libs/GMM_library/g_mix_est_fast_lik.m | 77 -- src/MATLAB/DiviK/libs/GMM_library/gauss_rem.m | 118 --- .../DiviK/libs/GMM_library/gaussian_mixture.m | 101 --- .../GMM_library/gaussian_mixture_simple.m | 62 -- .../DiviK/libs/GMM_library/inv_cdf_init.m | 26 - .../DiviK/libs/GMM_library/my_qu_ix_w.m | 17 - .../libs/GMM_library/outliers_non_gaussian.m | 64 -- src/MATLAB/DiviK/libs/GMM_library/par_vec.m | 18 - .../DiviK/libs/GMM_library/parfor_progress.m | 81 -- .../DiviK/libs/GMM_library/peaks_part.m | 27 - .../libs/GMM_library/plot_spect_vs_decomp.m | 29 - src/MATLAB/DiviK/libs/GMM_library/post_proc.m | 73 -- src/MATLAB/DiviK/libs/GMM_library/proc_gmm.m | 31 - .../DiviK/libs/GMM_library/proc_peakdetect.m | 91 --- src/MATLAB/DiviK/libs/GMM_library/warp_down.m | 187 ----- src/MATLAB/DiviK/libs/divik_library/divik.m | 251 ------ src/MATLAB/DiviK/libs/divik_library/dunn.m | 67 -- src/MATLAB/DiviK/libs/divik_library/k_means.m | 713 ------------------ .../libs/divik_library/merge_partitions.m | 32 - .../libs/divik_library/optimize_partition.m | 41 - src/MATLAB/DiviK/readme.txt | 34 - src/MATLAB/GMM/Run_gmm_model.m | 16 - src/MATLAB/GMM/ms_gmm/Bruffaerts_out.m | 77 -- src/MATLAB/GMM/ms_gmm/bindata.m | 32 - src/MATLAB/GMM/ms_gmm/components_merging.m | 41 - src/MATLAB/GMM/ms_gmm/dyn_pr_split_w1.m | 40 - src/MATLAB/GMM/ms_gmm/dyn_pr_split_w_aux1.m | 16 - src/MATLAB/GMM/ms_gmm/emcor.m | 116 --- src/MATLAB/GMM/ms_gmm/f_par_mcv.m | 26 - src/MATLAB/GMM/ms_gmm/fill_red.m | 13 - src/MATLAB/GMM/ms_gmm/find_ranges.m | 14 - src/MATLAB/GMM/ms_gmm/find_segment.m | 109 --- src/MATLAB/GMM/ms_gmm/find_split_peaks.m | 59 -- src/MATLAB/GMM/ms_gmm/find_split_segment.m | 51 -- src/MATLAB/GMM/ms_gmm/gmm_decomp_segment1.m | 171 ----- .../GMM/ms_gmm/gmm_decomp_split_segment.m | 174 ----- src/MATLAB/GMM/ms_gmm/ms_gmm1.m | 142 ---- src/MATLAB/GMM/ms_gmm/ms_gmm_params.m | 107 --- src/MATLAB/GMM/ms_gmm/ms_gmm_run1.m | 39 - src/MATLAB/GMM/ms_gmm/my_EM_iter.m | 102 --- src/MATLAB/GMM/ms_gmm/my_qu_ix_w1.m | 15 - src/MATLAB/GMM/ms_gmm/plot_gmm.m | 24 - src/MATLAB/GMM/ms_gmm/plot_res_new_scale.m | 32 - src/MATLAB/GMM/ms_gmm/post_proc1.m | 84 --- src/MATLAB/GMM/ms_gmm/post_proc_gmm.m | 326 -------- src/MATLAB/GMM/ms_gmm/qua_scal.m | 15 - .../ROI_approximation/approximate_by_dice.m | 61 -- .../approximate_region_binding.m | 31 - src/MATLAB/ROI_approximation/exroi.m | 45 -- .../summarize_similarity_classes_binding.m | 114 --- .../wrapped_dice_approximation_binding.m | 5 - .../ROI_approximation/wrapped_exroi_binding.m | 27 - src/MATLAB/adaptive_PAFFT.m | 180 ----- src/MATLAB/algorithms.prj | 189 ----- src/MATLAB/apply_gmm.m | 21 - src/MATLAB/dependencies/DataHash.m | 484 ------------ src/MATLAB/dependencies/any_dist.m | 42 -- src/MATLAB/dependencies/apply_user_configs.m | 20 - src/MATLAB/dependencies/cacher.m | 160 ---- src/MATLAB/dependencies/capture_diary.m | 35 - src/MATLAB/dependencies/cell_settings.m | 8 - src/MATLAB/dependencies/dice.m | 8 - .../fetch_centroids_from_partition.m | 17 - src/MATLAB/dependencies/fetch_thresholds.m | 150 ---- src/MATLAB/dependencies/filthy_merge.m | 24 - src/MATLAB/dependencies/gmm_segmentation.m | 67 -- src/MATLAB/dependencies/gmm_sum_fun.m | 22 - src/MATLAB/dependencies/gmm_uborder_fun.m | 22 - src/MATLAB/dependencies/invisible_figure.m | 8 - src/MATLAB/dependencies/is_unimodal.m | 22 - src/MATLAB/dependencies/list2img.m | 51 -- src/MATLAB/dependencies/mkfiledir.m | 13 - src/MATLAB/dependencies/partition_plotter.m | 48 -- src/MATLAB/dependencies/ppv.m | 7 - src/MATLAB/dependencies/resize.m | 40 - src/MATLAB/dependencies/rows_in.m | 7 - .../dependencies/translate_min_to_zero.m | 8 - src/MATLAB/estimate_gmm.m | 19 - src/MATLAB/load_divik_result.m | 8 - src/MATLAB/macierz_splot.m | 19 - src/MATLAB/merge_gmm_model_components.m | 10 - src/MATLAB/pafft.m | 10 - src/MATLAB/preprocessing_wstepny.m | 64 -- src/MATLAB/reduce_gmm_by_component_area.m | 9 - src/MATLAB/reduce_gmm_by_component_height.m | 9 - src/MATLAB/remove_baseline.m | 14 - src/MATLAB/ticnorm.m | 26 - 95 files changed, 6588 deletions(-) delete mode 100644 src/MATLAB/DiviK/demo.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/EM_iter.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/Hubert_out.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/anal_g_merge.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/default_gmm_opts.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/dyn_pr_split_w.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/dyn_pr_split_w_aux.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/find_ic.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/g_mix_est_fast_lik.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/gauss_rem.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/gaussian_mixture.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/gaussian_mixture_simple.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/inv_cdf_init.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/my_qu_ix_w.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/outliers_non_gaussian.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/par_vec.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/parfor_progress.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/peaks_part.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/plot_spect_vs_decomp.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/post_proc.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/proc_gmm.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/proc_peakdetect.m delete mode 100644 src/MATLAB/DiviK/libs/GMM_library/warp_down.m delete mode 100644 src/MATLAB/DiviK/libs/divik_library/divik.m delete mode 100644 src/MATLAB/DiviK/libs/divik_library/dunn.m delete mode 100644 src/MATLAB/DiviK/libs/divik_library/k_means.m delete mode 100644 src/MATLAB/DiviK/libs/divik_library/merge_partitions.m delete mode 100644 src/MATLAB/DiviK/libs/divik_library/optimize_partition.m delete mode 100644 src/MATLAB/DiviK/readme.txt delete mode 100644 src/MATLAB/GMM/Run_gmm_model.m delete mode 100644 src/MATLAB/GMM/ms_gmm/Bruffaerts_out.m delete mode 100644 src/MATLAB/GMM/ms_gmm/bindata.m delete mode 100644 src/MATLAB/GMM/ms_gmm/components_merging.m delete mode 100644 src/MATLAB/GMM/ms_gmm/dyn_pr_split_w1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/dyn_pr_split_w_aux1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/emcor.m delete mode 100644 src/MATLAB/GMM/ms_gmm/f_par_mcv.m delete mode 100644 src/MATLAB/GMM/ms_gmm/fill_red.m delete mode 100644 src/MATLAB/GMM/ms_gmm/find_ranges.m delete mode 100644 src/MATLAB/GMM/ms_gmm/find_segment.m delete mode 100644 src/MATLAB/GMM/ms_gmm/find_split_peaks.m delete mode 100644 src/MATLAB/GMM/ms_gmm/find_split_segment.m delete mode 100644 src/MATLAB/GMM/ms_gmm/gmm_decomp_segment1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/gmm_decomp_split_segment.m delete mode 100644 src/MATLAB/GMM/ms_gmm/ms_gmm1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/ms_gmm_params.m delete mode 100644 src/MATLAB/GMM/ms_gmm/ms_gmm_run1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/my_EM_iter.m delete mode 100644 src/MATLAB/GMM/ms_gmm/my_qu_ix_w1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/plot_gmm.m delete mode 100644 src/MATLAB/GMM/ms_gmm/plot_res_new_scale.m delete mode 100644 src/MATLAB/GMM/ms_gmm/post_proc1.m delete mode 100644 src/MATLAB/GMM/ms_gmm/post_proc_gmm.m delete mode 100644 src/MATLAB/GMM/ms_gmm/qua_scal.m delete mode 100644 src/MATLAB/ROI_approximation/approximate_by_dice.m delete mode 100644 src/MATLAB/ROI_approximation/approximate_region_binding.m delete mode 100644 src/MATLAB/ROI_approximation/exroi.m delete mode 100644 src/MATLAB/ROI_approximation/summarize_similarity_classes_binding.m delete mode 100644 src/MATLAB/ROI_approximation/wrapped_dice_approximation_binding.m delete mode 100644 src/MATLAB/ROI_approximation/wrapped_exroi_binding.m delete mode 100644 src/MATLAB/adaptive_PAFFT.m delete mode 100644 src/MATLAB/algorithms.prj delete mode 100644 src/MATLAB/apply_gmm.m delete mode 100644 src/MATLAB/dependencies/DataHash.m delete mode 100644 src/MATLAB/dependencies/any_dist.m delete mode 100644 src/MATLAB/dependencies/apply_user_configs.m delete mode 100644 src/MATLAB/dependencies/cacher.m delete mode 100644 src/MATLAB/dependencies/capture_diary.m delete mode 100644 src/MATLAB/dependencies/cell_settings.m delete mode 100644 src/MATLAB/dependencies/dice.m delete mode 100644 src/MATLAB/dependencies/fetch_centroids_from_partition.m delete mode 100644 src/MATLAB/dependencies/fetch_thresholds.m delete mode 100644 src/MATLAB/dependencies/filthy_merge.m delete mode 100644 src/MATLAB/dependencies/gmm_segmentation.m delete mode 100644 src/MATLAB/dependencies/gmm_sum_fun.m delete mode 100644 src/MATLAB/dependencies/gmm_uborder_fun.m delete mode 100644 src/MATLAB/dependencies/invisible_figure.m delete mode 100644 src/MATLAB/dependencies/is_unimodal.m delete mode 100644 src/MATLAB/dependencies/list2img.m delete mode 100644 src/MATLAB/dependencies/mkfiledir.m delete mode 100644 src/MATLAB/dependencies/partition_plotter.m delete mode 100644 src/MATLAB/dependencies/ppv.m delete mode 100644 src/MATLAB/dependencies/resize.m delete mode 100644 src/MATLAB/dependencies/rows_in.m delete mode 100644 src/MATLAB/dependencies/translate_min_to_zero.m delete mode 100644 src/MATLAB/estimate_gmm.m delete mode 100644 src/MATLAB/load_divik_result.m delete mode 100644 src/MATLAB/macierz_splot.m delete mode 100644 src/MATLAB/merge_gmm_model_components.m delete mode 100644 src/MATLAB/pafft.m delete mode 100644 src/MATLAB/preprocessing_wstepny.m delete mode 100644 src/MATLAB/reduce_gmm_by_component_area.m delete mode 100644 src/MATLAB/reduce_gmm_by_component_height.m delete mode 100644 src/MATLAB/remove_baseline.m delete mode 100644 src/MATLAB/ticnorm.m diff --git a/src/MATLAB/DiviK/demo.m b/src/MATLAB/DiviK/demo.m deleted file mode 100644 index a1137da..0000000 --- a/src/MATLAB/DiviK/demo.m +++ /dev/null @@ -1,9 +0,0 @@ -load sample_data.mat - -addpath(genpath('./libs')); - -% data has observations in rows and gmm components in columns -% xy has observations in rows, x in first column, y in second -partition = divik(data, xy, 'Level', 2, ... - 'PlotPartitions', true, ... - 'PlotRecursively', true); \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/EM_iter.m b/src/MATLAB/DiviK/libs/GMM_library/EM_iter.m deleted file mode 100644 index 2ace76e..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/EM_iter.m +++ /dev/null @@ -1,86 +0,0 @@ -function [pp_est,mu_est,sig_est,TIC,logL,IC] = EM_iter(x,y,alpha,mu,sig,draw,gmm_opts) -% EM_ITER(X,Y,PP_INI,MU_INI,SIG_INI) -% EM algorithm for Gaussian mixture model. -% Input: -% x - m/z values [1,n] -% y - corresponding ion counts [1,n] -% alpha - vector of initial weights (probabilities) -% mu - vector of initial component means -% sig - vector of initial standard deviations of components -% draw- if draw results - -if nargin<5; error('Not enough input arguments.'); end -if nargin<6; draw = 0; end -if nargin<7; gmm_opts = default_gmm_opts(); end - -x = x(:); y = y(:)'; alpha = alpha(:)'; mu = mu(:)'; sig = sig(:)'; -[x,ind] = sort(x); y = y(1,ind); -N = length(y); % data length - -if gmm_opts.unif_corr - % correction for nonunifom density - dens = [(x(2:N)-x(1:N-1));(x(N)-x(N-1))]'; - y = y.*dens; -end - -sig2 = sig.^2; -change = 1e20; -count = 1; -TIC = sum(y); -KS = length(alpha); - -if draw; fprintf(1,'EM iter. no. 1. Change:infinity'); end -% MAIN LOOP -while change > gmm_opts.eps_change && count < 10000; - - %removing small and wide or thin components - ind = alpha < gmm_opts.thr_alpha | sig2 <= gmm_opts.thr_sig2; - alpha(ind) = []; mu(ind) = []; sig2(ind) = []; - KS = length(alpha); - old_alpha = alpha; - old_sig2 = sig2; - - f = norm_pdf(repmat(x,1,KS),repmat(mu,N,1),repmat(sqrt(sig2),N,1))'; - px = alpha * f; - px(isnan(px) | px==0) = 5e-324; - for a=1:KS - pk = ((alpha(a)*f(a,:)).*y)./px; - denom = sum(pk); - mu(a) = (pk*x)/denom; - sig2num = sum(pk*((x-mu(a)).^2)); - sig2(a) = gmm_opts.SW + sig2num/denom; - alpha(a) = denom/TIC; - end - - change = sum(abs(alpha-old_alpha)) + sum(((abs(sig2-old_sig2))./sig2))/(length(alpha)); - if draw - tmp = ceil(log10(count)) + 18; - for a = 1:tmp; fprintf(1,'\b'); end - fprintf(1,'%d. Change: %8.2g',count,change); - end - count = count+1; -end -if draw; fprintf(1,'\n'); end - -% RETURN RESULTS -logL = sum(log(px).*y); -[mu_est,ind] = sort(mu); -sig_est = sqrt(sig2(ind)); -pp_est = alpha(ind); -switch gmm_opts.crit - case 'BIC' - IC = -2*logL + (3*KS-1)*log(TIC); - case 'ICL-BIC' - pk(isnan(pk) | pk==0) = 5e-324; - EN = -sum(sum(pk.*log(pk))); - IC = -2*logL + 2*EN + (3*KS-1)*log(TIC); - case 'AIC' - IC = -2*logL + 2*(3*KS-1); - case 'AICc' - IC = -2*logL + 2*(3*KS-1)*(TIC/(TIC-(3*KS-1)-1)); - otherwise - error('Wrong model selection criterion.') -end - -function y = norm_pdf(x,mu,sigma) -y = exp(-0.5*(((x - mu)./sigma).^2)) ./(2.506628274631 * sigma); \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/Hubert_out.m b/src/MATLAB/DiviK/libs/GMM_library/Hubert_out.m deleted file mode 100644 index ebf1394..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/Hubert_out.m +++ /dev/null @@ -1,102 +0,0 @@ -function idx=Hubert_out(A) -%outlier detection using Hubert's method - -n=length(A); -medn=median(A); -MC = medcouple(A); -Q1=prctile(A,25); -Q3=prctile(A,75); - -if MC>0 - low_lim=Q1-1.5*exp(-4*MC)*(Q3-Q1); - up_lim=Q3+1.5*exp(3*MC)*(Q3-Q1); -else - low_lim=Q1-1.5*exp(-3*MC)*(Q3-Q1); - up_lim=Q3+1.5*exp(4*MC)*(Q3-Q1); -end - -w1=min(A>low_lim); -w2=max(Amedn, - AO(ii)=(A(ii)-medn)/(w2-medn); - else - AO(ii)=(medn-A(ii))/(medn-w1); - end -end - -jj=find(AO < prctile(AO,25) - 1.5*exp(-4*medcouple(AO))*iqr(AO)); -ii=find(AO > prctile(AO,75) + 1.5*exp(3*medcouple(AO))*iqr(AO)); - -idx = zeros(1,n); -idx([jj,ii]) = 1; -idx = logical(idx); - -function [mc] = medcouple(x) -% 'medcouple' computes the medcouple measure, a robust measure of skewness -% for a skewed distribution. It takes into account cases where the -% observations are equal to the median of the series. -% -% Data in 'x' are organized so that columns are the time series and rows -% are the time intervals. All series contain the same number of -% observations. -% -% [mc] = medcouple(x) returns the following: -% mc - vector with the medcouple measure of the data series -% -% Created by Francisco Augusto Alcaraz Garcia -% alcaraz_garcia@yahoo.com -% -% References: -% -% 1) G. Brys; M. Hubert; A. Struyf (2004). A Robust Measure of Skewness. -% Journal of Computational and Graphical Statistics 13(4), 996-1017. - -x = x(:); -[n, c] = size(x); -[s_x,~] = sort(x); -x_med = median(x); -z = s_x - repmat(x_med,n,1); - -mc = zeros(1,c); -for w = 1:c, - [ip,~] = find(z(:,w)>=0); % These are the positions in z of z+ - [im,~] = find(z(:,w)<=0); % These are the positions in z of z- - - p = size(ip,1); - q = size(im,1); - - [mi, mj] = ind2sub([p,q],1:p*q); % Positions of all combinations of z+ - % and z- as elements in a pxq matrix - - zp = z(ip,w); % z+ repeated to account for all cells in the matrix - zm = z(im,w); % z- repeated to account for all cells in the matrix - - h = (zp(mi)+zm(mj))./(zp(mi)-zm(mj)); % same size as mi, mj - - [ipz,~]= find(zp==0); % row numbers of z+ = 0, i.e., x_{i} = median(x) - [imz,~]= find(zm==0); % row numbers of z- = 0, i.e., x_{i} = median(x) - piz = ismember(mi,ipz); % positions in mi where z+=0 - pjz = ismember(mj,imz); % positions in mi where z-=0 - zmember = piz+pjz; % same size as mi, mj - pijz = find(zmember == 2); % positions where z+ = z- = 0, i.e., x_{i} = - % = x_{j} = median(x) - [indi,indj] = ind2sub([p,q],pijz); % pxq matrix position of the zero entries - indi = indi - min(indi) + 1; % row position of the zero entries as if they - % were in a separated matrix - indj = indj - min(indj) + 1; % column position of the zero entries as if they - % were in a separated matrix - - for i=1:size(pijz,2), - if (indi(i) + indj(i) - 1) > size(find(z==0),1), - h(pijz(i)) = 1; - elseif (indi(i) + indj(i) - 1) < size(find(z==0),1), - h(pijz(i)) = -1; - else - h(pijz(i)) = 0; - end - end - mc(w) = median(h); -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/anal_g_merge.m b/src/MATLAB/DiviK/libs/GMM_library/anal_g_merge.m deleted file mode 100644 index 7b543e5..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/anal_g_merge.m +++ /dev/null @@ -1,104 +0,0 @@ -function [pp_new,mu_new,sig_new]=anal_g_merge(pp_est,mu_est,sig_est,tol_s,tol_d) -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% anal_g_merge -% auxiliary function -% merging similar components - -pp_new=pp_est; -mu_new=mu_est; -sig_new=sig_est; - -kk=0; -while 1 - kk=kk+1; - if kk+1 > length(pp_new) - break - end -% if kk==11; -% disp('stop');end - cmp=com_mer_v(pp_new(kk:kk+1),mu_new(kk:kk+1),sig_new(kk:kk+1),tol_s,tol_d); - if cmp==1 - [pp_e,mu_e,sig_e]=mer_v(pp_new(kk:kk+1),mu_new(kk:kk+1),sig_new(kk:kk+1)); - pp_new=[pp_new(1:kk-1) pp_e pp_new(kk+2:length(pp_new))]; - mu_new=[mu_new(1:kk-1) mu_e mu_new(kk+2:length(mu_new))]; - sig_new=[sig_new(1:kk-1) sig_e sig_new(kk+2:length(sig_new))]; - end -end - -kk=0; -while 1 - kk=kk+1; - if kk+2 > length(pp_new) - break - end - cmp=com_mer_v(pp_new(kk:kk+2),mu_new(kk:kk+2),sig_new(kk:kk+2),tol_s,tol_d); - if cmp==1 - [pp_e,mu_e,sig_e]=mer_v(pp_new(kk:kk+2),mu_new(kk:kk+2),sig_new(kk:kk+2)); - pp_new=[pp_new(1:kk-1) pp_e pp_new(kk+3:length(pp_new))]; - mu_new=[mu_new(1:kk-1) mu_e mu_new(kk+3:length(mu_new))]; - sig_new=[sig_new(1:kk-1) sig_e sig_new(kk+3:length(sig_new))]; - end -end - -kk=0; -while 1 - kk=kk+1; - if kk+3 > length(pp_new) - break - end - cmp=com_mer_v(pp_new(kk:kk+3),mu_new(kk:kk+3),sig_new(kk:kk+3),tol_s,tol_d); - if cmp==1 - [pp_e,mu_e,sig_e]=mer_v(pp_new(kk:kk+3),mu_new(kk:kk+3),sig_new(kk:kk+3)); - pp_new=[pp_new(1:kk-1) pp_e pp_new(kk+4:length(pp_new))]; - mu_new=[mu_new(1:kk-1) mu_e mu_new(kk+4:length(mu_new))]; - sig_new=[sig_new(1:kk-1) sig_e sig_new(kk+4:length(sig_new))]; - end -end - - - -% -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% com_mer_v -% auxiliary function -% compare - merge -function m_y_n=com_mer_v(pp_v,mu_v,sig_v,tol_s,tol_d) -pp_e=pp_v/sum(pp_v); -mu_e=sum(pp_e.*mu_v); -sig_e=sqrt(sum(pp_e.*(mu_v.^2+sig_v.^2))-mu_e^2); -if sig_e == 0; - m_y_n = 0; -else - dlt_x=sig_e/100; - zakr_x=mu_e-3*sig_e:dlt_x:mu_e+3*sig_e; - praw_x=0*zakr_x; - for kk=1:length(pp_v) - praw_x=praw_x+pp_e(kk)*normpdf(zakr_x,mu_v(kk),sig_v(kk)); - end - apr_x=normpdf(zakr_x,mu_e,sig_e); - - praw_dx=0*zakr_x; - for kk=1:length(pp_v) - praw_dx=praw_dx+(pp_e(kk)*(zakr_x-mu_v(kk)*ones(size(zakr_x))).*normpdf(zakr_x,mu_v(kk),sig_v(kk))/(sig_v(kk)^2)); - end - apr_dx=(zakr_x-mu_e*ones(size(zakr_x))).*normpdf(zakr_x,mu_e,sig_e)/(sig_e^2); - - if max(abs(apr_x-praw_x))/max(apr_x)= N-h_se; - seg_min = seg_min(1:seg_nb-1); - seg_nb = seg_nb-1; -end - -if draw - disp('Coarse partition completed, result shown in figure 1') - disp([num2str(seg_nb) ' segments found.']) - figure(1) - plot(xx,yy,'k') - hold on - mmm = max(yy); - for a=1:length(seg_min) - plot([xx(seg_min(a)),xx(seg_min(a))],[0 mmm],'r') - end - title('Coarse partition of m/z axis'); xlabel('m/z') - disp('Phase 2: fine allocation of Gaussian components by segments analysis and BIC') -end - -seg_min = [1,seg_min,N+1]; -x_temp = cell(1,seg_nb); y_temp = x_temp; -for a=1:seg_nb - x_temp{a} = xx(seg_min(a):seg_min(a+1)-1); - y_temp{a} = yy(seg_min(a):seg_min(a+1)-1); -end - -gmm_opts.eps_change = 1e-4; -pp_list = cell(1,seg_nb); mu_list = pp_list; sig_list = pp_list; K_list = zeros(1,seg_nb); -parfor a=1:seg_nb - gmm_opts_tmp = gmm_opts; - % warp down - [x_temp_new,y_temp_new] = warp_down(x_temp{a},y_temp{a}); - - cmp = 20; stop = 1; b = 1; - D = nan(1,cmp); fit = D; logL = D; - alpha = cell(1,cmp); mu = alpha; sigma = alpha; - while stop - [pp_ini,mu_ini,sig_ini] = inv_cdf_init(x_temp_new,y_temp_new,b); - [alpha{b},mu{b},sigma{b},~,logL(b),fit(b)] = EM_iter(x_temp_new,y_temp_new,pp_ini,mu_ini,sig_ini,0,gmm_opts_tmp); - - if b > 1 && (strcmp(gmm_opts_tmp.crit,'BIC') || strcmp(gmm_opts_tmp.crit,'ICL-BIC')) - D(b) = -2*logL(b-1) + 2*logL(b); - if 1 - chi2cdf(D(b),3) > 0.05 - stop = 0; - end - end - b = b+1; - end - [~,cmp_nb] = min(fit); - [pp_est,mu_est,sig_est] = EM_iter(x_temp_new,y_temp_new,alpha{cmp_nb},mu{cmp_nb},sigma{cmp_nb},0,gmm_opts_tmp); - - % trim and sort - i_trim = find(mu_est > min(x_temp{a}) & mu_est < max(x_temp{a})); - mu_est = mu_est(i_trim); - pp_est = pp_est(i_trim); - sig_est = sig_est(i_trim); - [mu_list{a},ind] = sort(mu_est); - pp_list{a} = pp_est(ind); - sig_list{a} = sig_est(ind); - K_list(a) = length(pp_list{a}); -end - -K = 0; count = 1; -for a=1:seg_nb; K = K + K_list(a); end -pp_est = zeros(1,K); mu_est = pp_est; sig_est = pp_est; -for a=1:seg_nb - if K_list(a) > 0 - pp_est(count:count+K_list(a)-1) = pp_list{a}; - mu_est(count:count+K_list(a)-1) = mu_list{a}; - sig_est(count:count+K_list(a)-1) = sig_list{a}; - count = count + K_list(a); - end -end -pp_est = pp_est/sum(pp_est); \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/g_mix_est_fast_lik.m b/src/MATLAB/DiviK/libs/GMM_library/g_mix_est_fast_lik.m deleted file mode 100644 index d6b29e0..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/g_mix_est_fast_lik.m +++ /dev/null @@ -1,77 +0,0 @@ -% EM iterations -function [ppsort,musort,sigsort,l_lik] = g_mix_est_fast_lik(sample,KS,muv,sigv,pp) -% input data: -% sample - vector of observations -% muv,sigv,pp - initial values of mixture parameters -% output: -% musort,sigsort,ppsort - estimated mixture parameters -% l_lik - log likeliood - -min_sg=0.1; - -mivoc=muv; -sigvoc=max(sigv.^2, min_sg^2); -ppoc=pp; - -% make vectors verticsl -if size(sample,1) > 1 - sample=sample'; -end - -if size(mivoc,2) > 1 - mivoc=mivoc'; -end - -if size(sigvoc,2) > 1 - sigvoc=sigvoc'; -end - -if size(ppoc,2) > 1 - ppoc=ppoc'; -end - - - -N=length(sample); - -% OK oceniamy iteracyjnie wg wzorow z artykulu Bilmsa -pssmac=zeros(KS,N); -change=1; -while change > 1.5e-4; - oldppoc=ppoc; - oldsigvoc=sigvoc; - - % lower limit for component weights - ppoc=max(ppoc,0.001); - - for kskla=1:KS - pssmac(kskla,:)=ppoc(kskla)*normpdf(sample,mivoc(kskla),sqrt(sigvoc(kskla))); - end - psummac=ones(KS,1)*sum(pssmac,1); - pskmac=pssmac./psummac; - ppp=sum(pskmac,2); - ppoc=ppp/N; - mivoc=pskmac*sample'; - mivoc=mivoc./ppp; - sigmac=(ones(KS,1)*sample-mivoc*ones(1,N)).*((ones(KS,1)*sample-mivoc*ones(1,N))); - for kkk=1:KS - % lower limit for component variances - sigvoc(kkk)=max([pskmac(kkk,:)*sigmac(kkk,:)' min_sg^2]); - end - sigvoc=sigvoc./ppp; - - % - change=sum(abs(ppoc-oldppoc))+sum(abs(sigvoc-oldsigvoc))/KS; -end - - -% compute likelihood -l_lik=sum(log(sum(pssmac,1))); - -% sort estimates -[musort,isort]=sort(mivoc); -sigsort=sqrt(sigvoc(isort)); -ppsort=ppoc(isort); - - - diff --git a/src/MATLAB/DiviK/libs/GMM_library/gauss_rem.m b/src/MATLAB/DiviK/libs/GMM_library/gauss_rem.m deleted file mode 100644 index 81aac61..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/gauss_rem.m +++ /dev/null @@ -1,118 +0,0 @@ -function [thr,IC,stats] = gauss_rem(xvec, K,K_noise,draw) -%GAUSS_REM Estimating noise components using Gaussian mixture model -% Input: -% xvec - vector of data (1xn) -% K - number of Gaussian components -% K_noise - number of noise components from left -% draw - if draw results -% Output: -% thr - noise threshold -% bic - Bayesian Information Criterion for estimated model -% stats - additional statistics -% -% Author: Michal Marczyk -% Michal.Marczyk@polsl.pl - -if nargin < 3; error('Insufficient number of arguments.'); end -if nargin < 4; draw = 0; end -if K_noise > K - thr = max(xvec); IC = 0; - return -end -if K_noise < 0 || K < 1 - thr = min(xvec); IC = 0; - return -end - -xvec = sort(xvec(:)); -N = length(xvec); %nb of measurements - -%initial conditions -alpha = rand(1,K); alpha = alpha/sum(alpha); -mi = min(xvec) + range(xvec)*rand(1,K); -mi = sort(mi); -temp = diff([min(xvec) mi max(xvec)]); -sigma = zeros(1,K); -for a=1:K; sigma(a) = max([temp(a) temp(a+1)]); end - -if draw - disp('Starting values') - disp(num2str([alpha; mi; sigma])) -end - -%histogram of input data -[n,x] = hist(xvec,round(sqrt(N))); - -%EM parameters -gmm_opts = default_gmm_opts(); -gmm_opts.unif_corr = 0; -gmm_opts.SW = (min(sigma)^2)/1000; -gmm_opts.thr_sig2 = 0; -gmm_opts.thr_alpha = 0; - -%main loop -xx = unique(xvec); -y = zeros(1,length(xx)); -for a=1:length(xx); y(a) = sum(eq(xvec,xx(a))); end -[alpha,mi,sigma,~,logL,IC] = EM_iter(xx,y,alpha,mi,sigma,draw,gmm_opts); - -%finding threshold -[mi,ind] = sort(mi); alpha = alpha(ind); sigma = sigma(ind); -f_temp = zeros(1e5,K); x_temp = linspace(min(xvec),max(xvec),1e5)'; -for k=1:K; f_temp(:,k) = alpha(k)*normpdf(x_temp,mi(k),sigma(k)); end -temp2 = nan(1,K-1); -for a=1:K-1 - [~,ind]= max(f_temp(:,a:a+1),[],2); - temp = x_temp(diff(ind)==1); - if length(temp) > 1; temp = temp(temp>mi(a) & temp1 || min(size(y))>1; error('x and y must be vectors.'); end -if sum(y)<1; error('Wrong intensity(y) values.'); end -if min(y)<0; error('y must be non negative.'); end -N=length(x); -if min((x(2:N)-x(1:N-1)))<=0; error('Improperly spaced vector x.'); end - -% INPUT CORRECTION IF NECESSARY -x = x(:); y = y(:); - -if draw - disp('Input control..... OK') - disp('Estimating initial condition for EM algorithm.') -end - -% poszukiwanie warunkow poczatkowych -[pp_v, mu_v, sig_v] = find_ic(x,y,draw,gmm_opts); - -KS=length(pp_v); -if draw - disp(['Estimation of initial values completed: ' num2str(KS) ' components detected']) - disp('Gaussian mixture decomposition by EM algoritm') -end - -if gmm_opts.extra_comp - extra_nb = round(0.1 * length(pp_v)); - pp_v = [pp_v mean(pp_v)*ones(1,extra_nb)]; - pp_v = pp_v/sum(pp_v); - mu_v_tmp = linspace(min(x),max(x),extra_nb+2); - mu_v = [mu_v mu_v_tmp(2:end-1)]; - sig_v = [sig_v mean(diff(mu_v_tmp))*ones(1,extra_nb)]; - [mu_v,ind] = sort(mu_v); - pp_v = pp_v(ind); sig_v = sig_v(ind); -end -gmm_opts.SW = 0; -[pp_est,mu_est,sig_est,~,logL,bic] = EM_iter(x,y,pp_v,mu_v,sig_v,draw,gmm_opts); -orig.alpha = pp_est; orig.mass = mu_est; orig.sig = sig_est; -orig.logL = logL; orig.bic = bic; -if draw; - disp(['EM iterations completed: ' num2str(length(pp_est)) ' components']); - figure(3) - plot_spect_vs_decomp(x,y,pp_est,mu_est,sig_est,1); - title(['Original model. No of components detected: ' num2str(length(pp_est))]) - xlabel('M/Z'); ylabel('Intensity') -end - -if gmm_opts.merge - if draw; disp('Merging Gaussian components based on L-inf similarity');end - - [tol_s_tmp,tol_d_tmp,stats] = par_vec(gmm_opts.tol_s,gmm_opts.tol_d); - pp = cell(1,stats.iter); mu = pp; sig = pp; logL = pp; bic = pp; - if draw; parfor_progress(stats.iter); end; - parfor a=1:stats.iter - [pp_tmp,mu_tmp,sig_tmp] = merging(pp_est,mu_est,sig_est,tol_s_tmp(a),tol_d_tmp(a)); - [pp{a},mu{a},sig{a},~,logL{a},bic{a}] = EM_iter(x,y,pp_tmp,mu_tmp,sig_tmp,0,gmm_opts); - if draw; parfor_progress; end - end - - merged = cell(1,stats.iter); - for a=1:stats.iter - merged{a}.alpha = pp{a}; - merged{a}.mass = mu{a}; - merged{a}.sig = sig{a}; - merged{a}.logL = logL{a}; - merged{a}.bic = bic{a}; - end - merged = reshape(merged,stats.len_row,stats.len_col); - if draw && length(merged)==1; disp([num2str(length(orig.alpha)-length(merged{1}.alpha)) ' components merged.']); end -else - merged = []; -end - -function [pp_est,mu_est,sig_est] = merging(pp_est,mu_est,sig_est,tol_s,tol_d) - -[pp_est,mu_est,sig_est]=anal_g_merge(pp_est,mu_est,sig_est,tol_s,tol_d); -while 1 - tmp = length(pp_est); - [pp_est,mu_est,sig_est] = anal_g_merge(pp_est,mu_est,sig_est,tol_s,tol_d); - if length(pp_est) == tmp; break; end -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/gaussian_mixture_simple.m b/src/MATLAB/DiviK/libs/GMM_library/gaussian_mixture_simple.m deleted file mode 100644 index 94a8fd8..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/gaussian_mixture_simple.m +++ /dev/null @@ -1,62 +0,0 @@ -% FUNCTION: -% gaussian_mixture - -% PURPOSE: -% computes Gaussian mixture decomposition -% the signal to be decomposed is the mass spectrum - -% INPUT: -% measurements are specified by vector x -% KS - number of Gaussian components - -% METHOD: -% EM iteriations with initial conditions for means defined by inverting the -% empirical cumulative distribution, iterations for standard deviations stabilized -% by using prior Wishart distribution - -% OUTPUT: -% pp_est - vector of esimated weights -% mu_est - vector of estimated component means -% sig_est - vector of estimated standard deviations of components -% l_lik - log likelihood of the estimated decomposition - -% AUTHOR: -% Andrzej Polanski -% email: andrzej.polanski@polsl.pl - -function [pp_est,mu_est,sig_est,TIC,l_lik]=gaussian_mixture_simple(x,counts,KS) - -TIC=sum(x.*counts); -if KS==1, - mu_est=nanmean(x); - sig_est=std(x); - pp_est=1; - l_lik=NaN; -else - Nb=floor(sqrt(length(x))); - [y_out,mz_out]=hist(x,Nb); - - aux_mx=dyn_pr_split_w_aux(mz_out,y_out); - - [Q,opt_part]=dyn_pr_split_w(mz_out,y_out,KS-1,aux_mx); - part_cl=[1 opt_part Nb+1]; - - % set initial cond - pp_ini=zeros(1,KS); - mu_ini=zeros(1,KS); - sig_ini=zeros(1,KS); - for kkps=1:KS - invec=mz_out(part_cl(kkps):part_cl(kkps+1)-1); - yinwec=y_out(part_cl(kkps):part_cl(kkps+1)-1); - wwec=yinwec/(sum(yinwec)); - pp_ini(kkps)=sum(yinwec)/sum(y_out); - mu_ini(kkps)=sum(invec.*wwec); - %sig_ini(kkps)=sqrt(sum(((invec-sum(invec.*wwec')).^2).*wwec')); - sig_ini(kkps)=0.5*(max(invec)-min(invec)); - end - % - [pp_est,mu_est,sig_est,l_lik] = g_mix_est_fast_lik(x,KS,mu_ini,sig_ini,pp_ini); -end - - - diff --git a/src/MATLAB/DiviK/libs/GMM_library/inv_cdf_init.m b/src/MATLAB/DiviK/libs/GMM_library/inv_cdf_init.m deleted file mode 100644 index 53a0527..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/inv_cdf_init.m +++ /dev/null @@ -1,26 +0,0 @@ -function [alpha,mu,sigma] = inv_cdf_init(x,y,K) -%Initialization of GMM parameters by inverse CDF method - -mu = zeros(1,K); -sigma = zeros(1,K); -N = length(x); -cump = zeros(1,N); -for a=1:N - cump(a)=sum(y(1:a))/sum(y); -end -cump(1) = 0; - -for a=1:K - see = (a-0.5)/K; - ind = find(cump<=see, 1, 'last' ); - if ind < N - mu(a) = x(ind)+(see-cump(ind))*(x(ind+1)-x(ind)); - else - mu(a) = x(ind); - end -end -poz=[x(1) mu x(end)]; -for a=1:K - sigma(a) = poz(a+1) - poz(a); -end -alpha = ones(1,K)/K; %uniform distribution \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/my_qu_ix_w.m b/src/MATLAB/DiviK/libs/GMM_library/my_qu_ix_w.m deleted file mode 100644 index 53cb862..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/my_qu_ix_w.m +++ /dev/null @@ -1,17 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% quality index -% -% -function wyn=my_qu_ix_w(invec,yinwec) -PAR=1; -PAR_sig_min=0.1; -invec=invec(:); -yinwec=yinwec(:); -if (invec(length(invec))-invec(1))<=PAR_sig_min || sum(yinwec)<=1.0e-3 - wyn=inf; -else - wwec=yinwec/(sum(yinwec)); - wyn1=(PAR+sqrt(sum(((invec-sum(invec.*wwec)).^2).*wwec)))/(max(invec)-min(invec)); - wyn=wyn1; -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/outliers_non_gaussian.m b/src/MATLAB/DiviK/libs/GMM_library/outliers_non_gaussian.m deleted file mode 100644 index ae08ef7..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/outliers_non_gaussian.m +++ /dev/null @@ -1,64 +0,0 @@ -function [B,indx]=outliers_non_gaussian(A,lambda) -% 1 - lagodnie odstajace -% >1 - ekstremalnie odstajace -% detekcja wielkosci odstajacych dla rozkładów niesymetrycznych, -% odbiegajacych od rozkładu normalnego - -% M.Huberta and S van der Veeken, Journal of Chemometrics, 2008, 22:235-246 - -%medn=median(A); -%madn=1.483*median(abs(A-medn)); -%SDO=abs(A-medn)/madn; % Stahel and Donoho outlyingness - -medn=median(A); nA=length(A); -n=(length(A)+1)/2; h=[]; -jk1=find([1:nA]<=n); X1=A(jk1); nx1=length(X1); -jk1=find([1:nA]>=n); X2=A(jk1); nx2=length(X2); -for ii=1:nx1 - for jj=1:nx2 - if (X1(ii)~=X2(jj)) - h=[h;((X2(jj)-medn)-(medn-X1(ii)))/(X2(jj)-X1(ii))]; - elseif ii==jj, - h=[h;0]; - else - h=[h;-1]; - end - end -end - -MC=median(h) % medcouple -Q1=prctile(A,25); -Q3=prctile(A,75); -if MC>0 - low_lim=Q1-lambda*exp(-4*MC)*(Q3-Q1) - up_lim=Q3+lambda*exp(3*MC)*(Q3-Q1) -else - low_lim=Q1-lambda*exp(-3*MC)*(Q3-Q1) - up_lim=Q3+lambda*exp(4*MC)*(Q3-Q1) -end - -jk1=find(A>up_lim); -jk2=find(Alow_lim); -% w2=max(Amedn, -% AO(ii)=(A(ii)-medn)/(w2-medn); -% else -% AO(ii)=(medn-A(ii))/(medn-w1); -% end -% end -% -% jj=find((AO<-4)==1); -% ii=find((AO>+4)==1); -% B=A; -% B(jj)=NaN; -% B(ii)=NaN; -% indx=[jj;ii]; -% diff --git a/src/MATLAB/DiviK/libs/GMM_library/par_vec.m b/src/MATLAB/DiviK/libs/GMM_library/par_vec.m deleted file mode 100644 index e20aa8f..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/par_vec.m +++ /dev/null @@ -1,18 +0,0 @@ -function [var1_out,var2_out,stats] = par_vec(var1,var2) -%[var1_out,var2_out] = par_vec(var1,var2) -%Function for vectorizing 2 variables to work in parfor loop - -stats = struct; -len_col = length(var2); -len_row = length(var1); -par_iter = len_row*len_col; -var1_out = nan(1,par_iter); -var2_out = var1_out; -for a=1:len_col - var2_out((a-1)*len_row + 1:a*len_row) = var2(a)*ones(1,len_row); - var1_out((a-1)*len_row + 1:a*len_row) = var1; -end - -stats.len_col = len_col; -stats.len_row = len_row; -stats.iter = par_iter; \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/parfor_progress.m b/src/MATLAB/DiviK/libs/GMM_library/parfor_progress.m deleted file mode 100644 index 1183147..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/parfor_progress.m +++ /dev/null @@ -1,81 +0,0 @@ -function percent = parfor_progress(N) -%PARFOR_PROGRESS Progress monitor (progress bar) that works with parfor. -% PARFOR_PROGRESS works by creating a file called parfor_progress.txt in -% your working directory, and then keeping track of the parfor loop's -% progress within that file. This workaround is necessary because parfor -% workers cannot communicate with one another so there is no simple way -% to know which iterations have finished and which haven't. -% -% PARFOR_PROGRESS(N) initializes the progress monitor for a set of N -% upcoming calculations. -% -% PARFOR_PROGRESS updates the progress inside your parfor loop and -% displays an updated progress bar. -% -% PARFOR_PROGRESS(0) deletes parfor_progress.txt and finalizes progress -% bar. -% -% To suppress output from any of these functions, just ask for a return -% variable from the function calls, like PERCENT = PARFOR_PROGRESS which -% returns the percentage of completion. -% -% Example: -% -% N = 100; -% parfor_progress(N); -% parfor i=1:N -% pause(rand); % Replace with real code -% parfor_progress; -% end -% parfor_progress(0); -% -% See also PARFOR. - -% By Jeremy Scheff - jdscheff@gmail.com - http://www.jeremyscheff.com/ - -narginchk(0, 1); - -if nargin < 1 - N = -1; -end - -percent = 0; -w = 50; % Width of progress bar - -if N > 0 - f = fopen('parfor_progress.txt', 'w'); - if f<0 - error('Do you have write permissions for %s?', pwd); - end - fprintf(f, '%d\n', N); % Save N at the top of progress.txt - fclose(f); - - if nargout == 0 - disp([' 0%[>', repmat(' ', 1, w), ']']); - end -elseif N == 0 - delete('parfor_progress.txt'); - percent = 100; - - if nargout == 0 - disp([repmat(char(8), 1, (w+9)), char(10), '100%[', repmat('=', 1, w+1), ']']); - end -else - if ~exist('parfor_progress.txt', 'file') - error('parfor_progress.txt not found. Run PARFOR_PROGRESS(N) before PARFOR_PROGRESS to initialize parfor_progress.txt.'); - end - - f = fopen('parfor_progress.txt', 'a'); - fprintf(f, '1\n'); - fclose(f); - - f = fopen('parfor_progress.txt', 'r'); - progress = fscanf(f, '%d'); - fclose(f); - percent = (length(progress)-1)/progress(1)*100; - - if nargout == 0 - perc = sprintf('%3.0f%%', percent); % 4 characters wide, percentage - disp([repmat(char(8), 1, (w+9)), char(10), perc, '[', repmat('=', 1, round(percent*w/100)), '>', repmat(' ', 1, w - round(percent*w/100)), ']']); - end -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/peaks_part.m b/src/MATLAB/DiviK/libs/GMM_library/peaks_part.m deleted file mode 100644 index e85521d..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/peaks_part.m +++ /dev/null @@ -1,27 +0,0 @@ -function seg_min = peaks_part(mz,y,gmm_opts,draw) - -if nargin<3; gmm_opts = default_gmm_opts(); end - -if strcmp(gmm_opts.EM_init,'Peaks') - if draw; disp(['Phase 1: coarse partition by peaks detection using ' gmm_opts.EM_init ' algorithm.']); end - mass = proc_peakdetect(mz,y,gmm_opts.thr,gmm_opts.cond1,gmm_opts.cond2); -elseif strcmp(gmm_opts.EM_init,'External') - if draw; disp(['Phase 1: coarse partition using external peaks.']); end - mass = gmm_opts.mass; -else - error('Wrong type of peak detection.') -end - -n = length(mass); -ind = zeros(1,n); -for a=1:n; ind(a) = find(eq(mz,mass(a))); end - -seg_min = zeros(1,n-1); -for a=1:n-1 - [~,y_ind] = min(y(ind(a):ind(a+1))); - seg_min(a) = ind(a)+y_ind-1; -end -diff_seg = [seg_min(1) diff(seg_min)]; -seg_min(diff_seg < 9) = []; - - diff --git a/src/MATLAB/DiviK/libs/GMM_library/plot_spect_vs_decomp.m b/src/MATLAB/DiviK/libs/GMM_library/plot_spect_vs_decomp.m deleted file mode 100644 index 7b07454..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/plot_spect_vs_decomp.m +++ /dev/null @@ -1,29 +0,0 @@ -% auxiliary function -% plot spestrum versus decomposition -% spectral signal is drawn in black -% Gaussian decomposition model is drawn in red -% Individual components are drawn in green - -function y_est=plot_spect_vs_decomp(xx,y_a,ppoc,mivoc,sigvoc,draw) - -ploty=0*xx; -KS=length(ppoc); -for kks=1:KS - ploty=ploty+ppoc(kks)*normpdf(xx,mivoc(kks),sigvoc(kks)); -end - -scale=sum(y_a)/sum(ploty); -if draw - plot(xx,y_a,'b',xx,scale*ploty,'r','MarkerSize',10,'LineWidth',2); - hold on - for kks=1:KS - plot(xx,scale*ppoc(kks)*normpdf(xx,mivoc(kks),sigvoc(kks)),'k','MarkerSize',10,'LineWidth',2); - end - % 'norm' - norm(y_a/scale-ploty); - grid on; - xlabel('M/Z'); ylabel('Intensity') - legend({'Spectrum','GMM model','Components'}) -end - -y_est=scale*ploty; \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/post_proc.m b/src/MATLAB/DiviK/libs/GMM_library/post_proc.m deleted file mode 100644 index a656194..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/post_proc.m +++ /dev/null @@ -1,73 +0,0 @@ -function [del_cv,stats] = post_proc(gmm,draw) -% components post-processing - -x = 100 * gmm.sig./gmm.mass; - -% outlier detection -idx = Hubert_out(x); stats.out_cv = sum(idx); - -%filtering -thr = inf(1,5); bic = thr; -K_noise = thr; alpha = cell(1,10); -x_tmp = x(~idx); -for a=1:5 - for b=1:50 - [thr(a,b),bic(a,b),stats_tmp] = gauss_rem(x_tmp, a,0,0); - K_noise(a,b) = stats_tmp.K_noise; - alpha{a,b} = stats_tmp.alpha; - end -end -[~,stats.cmp_nb_cv] = min(nanmedian(bic,2)); -[~,ind] = sort(bic(stats.cmp_nb_cv,:)); - -%removing small alpha components -alpha_tmp = alpha{stats.cmp_nb_cv,ind(1)}; -a=2; -while sum(alpha_tmp < 1e-3/stats.cmp_nb_cv) - a = a+1; - if a>10 - if stats.cmp_nb_cv > 1 - stats.cmp_nb_cv = stats.cmp_nb_cv - 1; - [~,ind] = sort(bic(stats.cmp_nb_cv,:)); - alpha_tmp = alpha{stats.cmp_nb_cv,ind(1)}; - a=2; - else - [~,ind] = sort(bic(stats.cmp_nb_cv,:)); - ind = ind(5); - break; - end - else - alpha_tmp = alpha{stats.cmp_nb_cv,ind(a)}; - end -end -ind = ind(a); - -stats.cmp_nb_cv_noise = K_noise(stats.cmp_nb_cv,ind); - -thr = thr(stats.cmp_nb_cv,ind); -if thr > quantile(x,0.5) - if sum(idx) > 0 && thr > max(x(idx)) && max(x(idx)) > quantile(x,0.25) - thr = max(x(idx)); - end -else - thr = Inf; -end -ind = x > thr; - -stats.thr_cv = thr; -stats.del_cv = sum(ind); -del_cv = del_gmm(gmm,ind); - -if draw - disp([num2str(stats.del_cv) ' wide components filtered.']) - figure; hold on; box on; - hist(x,sqrt(length(x))); - if ~eq(thr,Inf); lim = ylim; plot([thr,thr],[0,lim(2)],'k','LineWidth',2); end - xlabel('100*sigma/mean'); ylabel('Counts'); title([num2str(stats.del_cv) ' components filtered']) -end - - -function gmm = del_gmm(gmm,ind) -gmm.alpha = gmm.alpha(~ind); -gmm.mass = gmm.mass(~ind); -gmm.sig = gmm.sig(~ind); \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/proc_gmm.m b/src/MATLAB/DiviK/libs/GMM_library/proc_gmm.m deleted file mode 100644 index 0bd94b5..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/proc_gmm.m +++ /dev/null @@ -1,31 +0,0 @@ -function [proc,merged,orig,stats] = proc_gmm(mz,y,draw,gmm_opts) - -if nargin<3; draw = 0; end -if nargin<4; gmm_opts = default_gmm_opts(); end - -mz = mz(:); y = y(:); - -%baseline correction -if gmm_opts.base - f = @(mz) gmm_opts.a + gmm_opts.b .* mz; - y = msbackadj(mz,y,'WINDOWSIZE',f,'STEPSIZE',f); -end -y(y<0) = 0; - -% estimatiting signal components using GMM -[merged,orig] = gaussian_mixture(mz',y,draw,gmm_opts); - -% post processing -if draw; disp('Filtering Gaussian component based on CV'); end -proc = cell(length(gmm_opts.tol_s),length(gmm_opts.tol_d)); -stats = proc; -for a=1:length(gmm_opts.tol_s) - for b=1:length(gmm_opts.tol_d) - [proc{a,b},stats{a,b}] = post_proc(merged{a,b},draw); - end -end -if draw && length(proc)==1 - figure; plot_spect_vs_decomp(mz,y,proc{1}.alpha,proc{1}.mass,proc{1}.sig,1); - title(['Processed model. No of components detected: ' num2str(length(proc{1}.alpha))]) - xlabel('M/Z'); ylabel('Intensity') -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/proc_peakdetect.m b/src/MATLAB/DiviK/libs/GMM_library/proc_peakdetect.m deleted file mode 100644 index 6e25961..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/proc_peakdetect.m +++ /dev/null @@ -1,91 +0,0 @@ -function [peaksmz peaksy minima] = proc_peakdetect(mz,y,thr,cond1,cond2) -% PEAKDETECT(MZ,Y,THR,COND1,COND2) -% -% This function founds peaks in a mass spectrum spectrum -% using first derivative and then reduces its number using -% small peaks reduction, low-intensity peaks reduction and/ -% noise peaks reduction. -% -% IN: -% mz - m/z ratios [nx1] -% y - spectrum intensity [nx1] -% thr - intensity threshold value [0.01] -% cond1 - conditon for small peaks reduction [1.25] -% cond2 - condition for noise peaks reduction [0.001] -% -% Author: -% Michal Marczyk -% michal_marczyk@op.pl - -if nargin < 2; help peakdetect; return; end - -if nargin < 5 - disp('Default parameter values will be used.') - thr = 0.01; - cond1 = 1.3; - cond2 = 0.001; -end - -mz = mz(:); -y = y(:); -thr = thr * max(y); % threshold for low intensities peaks removal -sign = diff(y)>0; % finds all extrema -maxima = find(diff(sign)<0) + 1; %all maxima -minima = [find(diff(sign)>0)+1 ;length(y)]; %all minima -if minima(1) > maxima(1) - minima = [1 ;minima]; -end - -% small peaks removal by slopes -ys = y; -ys(ys<1) = 1; -count = 1; -peak = zeros(1,length(maxima)); -for i=1:length(maxima) - y_check = y(maxima(i)); - if abs(y_check/ys(minima(i)))>cond1 || abs(y_check/ys(minima(i+1)))>cond1 - peak(count) = maxima(i); - count = count + 1; - end -end -peak(count:end) = []; - -% low-intensity peaks removal -peak = peak(y(peak) > thr); - -i = 1; -%noise peaks removal -while i 0 - sign = diff(y(peak(startpos)-1:peak(endpos)+1))>0; - maxima = find(diff(sign)<0) + 1; - [~, ind] = max(y(peak(startpos)+maxima(:)-2)); - peak(endpos) = peak(startpos) - 2 + maxima(ind); - peak(startpos:endpos-1) = 0; - end - i = endpos + 1; -end -peak(peak == 0) = []; - -peaksmz = mz(peak); -peaksy = y(peak); - -function match = check(tempmz,tempint,cond1,cond2) -% match test under 2 conditions -match = 0; -if tempint(1)/tempint(2) < cond1 && tempint(1)/tempint(2) > 1/cond1 - if abs(tempmz(1) - tempmz(2)) < cond2*tempmz(1) - match = 1; - end -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/GMM_library/warp_down.m b/src/MATLAB/DiviK/libs/GMM_library/warp_down.m deleted file mode 100644 index f17c3fb..0000000 --- a/src/MATLAB/DiviK/libs/GMM_library/warp_down.m +++ /dev/null @@ -1,187 +0,0 @@ -% warp_down function for warping down a piece of a spectrum -function [mz_out,y_out] = warp_down(mz,y,idx_start) -% warning('off','stats:nlinfit:IllConditionedJacobian') -% warning('off','stats:nlinfit:Overparameterized') -% warning('off','stats:nlinfit:IterationLimitExceeded') -warning('off','stats:statrobustfit:IterationLimit'); -warning('off','MATLAB:nearlySingularMatrix'); -warning('off','MATLAB:polyfit:RepeatedPointsOrRescale') - -mz = mz(:)'; y = y(:)'; -% y = y-min(y); %warp y to 0 -x_diff = mean(diff(mz)); -start = mz(1); -n = length(mz); -mz = mz - start + n; -check = max(floor(.5/x_diff),5); %0.5 Da to look ahead in fitting or 5 points -check(check > n/2) = round(n/2); -l_end_point = 1; %ceil(idx/2)+1; %how much modeled borders should we left -r_end_point = l_end_point; - -if nargin < 3 - l_idx_start = 5; - r_idx_start = 5; - y_diff = [0 100*diff(y)/max(abs(diff(y)))]; - tmp = find(y_diff(1:round(.05*n)) > 80); - if ~isempty(tmp); l_idx_start = tmp(1); end - tmp = find(y_diff(end-round(.05*n)+1:end) < -80); - if ~isempty(tmp); r_idx_start = round(.05*n)-tmp(end)+1; end -else - l_idx_start = idx_start; - r_idx_start = idx_start; -end - -l_idx_start(l_idx_start<4 || l_idx_start>.8*n) = 4; -r_idx_start(r_idx_start<4 || r_idx_start>.8*n) = 4; - -% left side -[l_res,l_idx] = line_fit(mz,y,l_idx_start,check); -if eq(l_res,1); l_end_point = l_idx; end -[l_mz,l_y] = curve_model(mz,y,l_idx,l_res,l_end_point); - -% right side -[r_res,r_idx] = line_fit(mz,fliplr(y),r_idx_start,check); -if eq(r_res,1); r_end_point = r_idx; end -[r_mz,r_y] = curve_model(fliplr(mz),fliplr(y),r_idx,r_res,r_end_point); - -r_mz = fliplr(r_mz); r_y = fliplr(r_y); -%merging output -ind = (l_idx-l_end_point+1:n-r_idx+r_end_point); -mz_out = [l_mz mz(ind) r_mz]'; -mz_out = mz_out + start - n; -y_out = [l_y y(ind) r_y]'; - -function [res,idx] = line_fit(mz,y,idx,check) -%fitting line to signal and check the slope -stop = 1; -[~,stats] = robustfit(mz(1:idx),y(1:idx)); -mse_old = stats.s; -while stop - idx = idx + 1; - if idx > length(mz)/2; idx = 3; break; end - - [~,stats] = robustfit(mz(1:idx),y(1:idx)); - mse = stats.s; - if mse > mse_old - mse_tmp = inf(1,check); - if idx+check > length(mz)/2; check = round(length(mz)/2) - idx; end - for a=1:check - [~,stats] = robustfit(mz(1:idx+a),y(1:idx+a)); - mse_tmp(a) = stats.s; - end - if sum(mse_tmp < mse_old) == 0 - idx = idx - 1; - stop = 0; - elseif mse_tmp(check) < mse_old && mse_tmp(check) < min(mse_tmp) - idx = idx + check; - mse_old = mse_tmp(check); - else - [~,ind] = min(mse_tmp); - idx = idx - 1 + ind; - stop = 0; - end - else - mse_old = mse; - end -end - -[beta,stats] = robustfit(mz(1:idx),y(1:idx)); -if stats.p(2) > 0.1 || (beta(2)>-1 && beta(2)<0) - res = 1; %no peaks, just noise or improper shape -elseif beta(2) > 0; - res = 3; %proper shape -else - res = 2; %improper shape -end - -function [mz_out,y_out] = curve_model(mz,y,idx,res,end_point) - -x_diff = mean(diff(mz)); -if eq(res,1) - x_pred = mz(1)-floor(y(1))*x_diff:x_diff:mz(1)-x_diff; - if length(x_pred) > length(mz) - zero_point = x_pred(length(x_pred) - length(mz)); - x_pred = mz(1)-round((mz(1)-zero_point)/x_diff)*x_diff:x_diff:mz(1)-x_diff; - end - if isempty(x_pred); x_pred = mz(1)-x_diff; end - y_pred = linspace(0,y(1)-1,length(x_pred)); -else - if eq(res,2) - beta = robustfit(mz(1:idx),fliplr(y(1:idx))); - else - beta = robustfit(mz(1:idx),y(1:idx)); - end - zero_point = fzero(@(x)fun(x,beta),mz(1)); - x_pred = mz(1)-round((mz(1)-zero_point)/x_diff)*x_diff:x_diff:mz(1)-x_diff; - if length(x_pred) > length(mz) - zero_point = x_pred(length(x_pred) - length(mz)); - beta = robustfit([mz(1:idx) zero_point],[y(1:idx) 0]); - x_pred = mz(1)-round((mz(1)-zero_point)/x_diff)*x_diff:x_diff:mz(1)-x_diff; - end - if isempty(x_pred); x_pred = mz(1)-x_diff; end - y_pred = beta(1)+beta(2)*x_pred; -end - -if eq(res,3) -% mdl = @(a,x)(a(1) + a(2)*x + a(3)*x.^2); - beta = [0;1;0]; - deg = 2; -else -% mdl = @(a,x)(a(1) + a(2)*x + a(3)*x.^2 + a(4)*x.^3); - beta = [0;1;0;0]; - deg = 3; -end -z_len = round(max(idx,length(x_pred))/2); -x_mod = [x_pred(1)-z_len*x_diff:x_diff:x_pred(1)-x_diff x_pred mz(1:idx)]; -noise = .1*mean(y_pred).*randn(1,length(y_pred)); -y_mod = [zeros(1,z_len) y_pred+noise y(1:idx)]; -y_mod(y_mod<0) = 0; - -mz_out = x_mod(1:end-end_point); - -% nonlinear regression -% options = statset('nlinfit'); -% options.MaxIter = 1000; -% options.Robust = 'on'; -% [beta,r,~,covv] = nlinfit(x_mod,y_mod,mdl,beta,options); -% y_out = nlpredci(mdl,mz_out,beta,r,'covar',covv); - -% polynomial fit -% beta = polyfit(x_mod,y_mod,deg); -% y_out = polyval(beta,mz_out); - -% lsq constraint fit -y_fit = fit_constr(x_mod,y_mod,deg,mz_out(end),y_mod(end-end_point),beta); -y_out = y_fit(1:end-end_point); - -ind = find(y_out < 0); -if ~isempty(ind); - if length(ind) > 1 - mz_out(1:ind(end-1)) = []; - y_out(1:ind(end-1)) = []; - end - y_out(1) = 0; -end -mz_out = mz_out(:)'; y_out = y_out(:)'; - -function y_fit = fit_constr(x,y,n,x0,y0,beta) -%n - degree of polynomial to fit - -options = optimset('lsqlin'); -options = optimset(options,'Display','off','MaxIter',500,'LargeScale','off'); - -x = x(:); y = y(:); - -V(:,n+1) = ones(length(x),1,class(x)); -for j = n:-1:1; V(:,j) = x.*V(:,j+1); end - -% We use linear equality constraints to force the curve to hit the required point. In -% this case, 'Aeq' is the Vandermoonde matrix for 'x0' -Aeq = x0.^(n:-1:0); -% and 'beq' is the value the curve should take at that point -beq = y0; -p = lsqlin(V,y,[],[],Aeq,beq,[],[],beta,options); -y_fit = polyval( p, x ); - -function y = fun(x,a) -y = a(1) + a(2)*x; \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/divik_library/divik.m b/src/MATLAB/DiviK/libs/divik_library/divik.m deleted file mode 100644 index 7ec9519..0000000 --- a/src/MATLAB/DiviK/libs/divik_library/divik.m +++ /dev/null @@ -1,251 +0,0 @@ -function [ partition, res_struct ] = divik( data, xy, varargin ) -%DIVIK Performs filtering & clustering recursively. -% partition = DIVIK( data, xy ) -% [partition, res_struct] = DIVIK( ___ ) -% partition = DIVIK( ___, 'Name', 'Value' ) - allows to specify additional -% parameters: -% - MaxK - maximal number of clusters considered (default 10) -% - Level - maximal depth of splitting (default 3) -% - UseLevels - states, whether to use levels of depth or size criterion for -% stop (default true, uses depth of recurence) -% - AmplitudeFiltration - states whether to use filtration of low abundance -% features at top level (default true) -% - VarianceFiltration - states whether to use adaptive filtration of -% non-informative features with respect to variance for every region (default -% true) -% - PercentSizeLimit - the percentage of inital size used for stopping the -% algorithm when size criterion is applied (default 0.001 for 0.1% of initial -% size) -% - FeaturePreservationLimit - the percentage of features preserved for sure -% during filtration procedures (default 0.05 for at least 5% of features\ -% preserved) -% - Metric - metric used in clustering (default 'pearson') -% - PlotPartitions - states whether to plot partitions at top level (default -% false) -% - PlotRecursively - states whether to plot partitions at all levels, works -% only if PlotPartitions is set true (default false) -% - DecompositionPlots - states whether to plot decompositions of amplitude -% and variance at top level (default false) -% - DecompositionPlotsRecursively - states whether to make decomposition plots -% at all levels, works only if DecompositionPlots is set true (default false) -% - MaxComponentsForDecomposition - sets the maximal considered number of -% components used for decomposition of amplitude and variance (default 10) -% - OutPath - sets the path to directory where all outputs may be stored -% (default '.') -% - CachePath - sets the path to directory where cache may be stored (default -% '.') -% Cache - enables caching feature (default true) -% Verbose - sets verbosity (default false) -% KmeansOpts - additional options passed to k-means algorithm (see K_MEANS) -% (default {}) - - -% invisible: -% settings.TransposeData = true; % has to be changed in recursive calls -% settings.PrimaryObjectsNumber = NaN; % has to be updated in top call - - if iscell(data) - data = unpack_doublecell(data); - end - if iscell(xy) - xy = unpack_doublecell(xy); - end - - settings = apply_user_configs(@get_defaults,varargin); - - if settings.TransposeData - data = data'; - xy = xy'; - end - - if isnan(settings.PrimaryObjectsNumber) - settings.PrimaryObjectsNumber = size(data,1); - end - - settings.PlotRecursively = settings.PlotRecursively ... - & settings.PlotPartitions; - settings.DecompositionPlotsRecursively = ... - settings.DecompositionPlotsRecursively & settings.DecompositionPlots; - - settings.KmeansOpts = { 'EnableCache', settings.Cache, ... - 'CachePath', settings.CachePath, ... - 'Verbose', settings.Verbose, ... - 'Metric', settings.Metric, ... - 'MaxIter', settings.KmeansMaxIters, ... - settings.KmeansOpts{:} }; - - percent_size_limit = settings.PercentSizeLimit; - percent_of_features_limit = settings.FeaturePreservationLimit; - out_path = settings.OutPath; - perform_denoising = settings.AmplitudeFiltration; - cacher_path = settings.CachePath; - max_K = settings.MaxK; - level = settings.Level; - primary_objects_number = settings.PrimaryObjectsNumber; - metric_name = settings.Metric; - k_means_opts = settings.KmeansOpts; - - [features_number,~] = size(data); - - variance_filtering_threshold = 0; - amplitude_filtering_threshold = 0; - - if perform_denoising - if settings.DecompositionPlots - fig_path = [out_path '\amplitude_decomposition.png']; - else - fig_path = ''; - end - amplitudes = mean(data,2); - thr = fetch_thresholds(log2(amplitudes), ... - 'Threshold', amplitude_filtering_threshold, ... - 'CachePath',cacher_path, ... - 'FigurePath',fig_path, ... - 'MaxComponents',settings.MaxComponentsForDecomposition, ... - 'Cache', settings.Cache); - thr = thr{1}; - if length(thr)<1 || sum(log2(amplitudes)>thr(1))/length(amplitudes) ... - thr(1); - res_struct.amp_thr = thr(1); - end - data = data(amp_filter,:); - - res_struct.amp_filter = amp_filter; - - features_number = sum(amp_filter); - - end - - if settings.VarianceFiltration - if settings.DecompositionPlots - fig_path = [out_path '\variance_decomposition.png']; - else - fig_path = ''; - end - %var filters fetching - variances = log2(var(data,1,2)); - thr = fetch_thresholds(variances, ... - 'Threshold',variance_filtering_threshold, ... - 'CachePath',cacher_path, ... - 'FigurePath',fig_path, ... - 'MaxComponents',settings.MaxComponentsForDecomposition, ... - 'Cache', settings.Cache); - thr = thr{1}; - if length(thr)<1 - var_filter = true(features_number,1); - res_struct.var_thr = -Inf; - else - cur = length(thr); - var_filter = false(features_number,1); - while sum(var_filter)/length(var_filter) ... - < percent_of_features_limit && cur>0 - var_filter = variances>thr(cur); - res_struct.var_thr = thr(cur); - cur = cur - 1; - end - if cur==0 - var_filter = true(features_number,1); - res_struct.var_thr = -Inf; - end - end - res_struct.var_filter = var_filter; - else - var_filter = true(features_number,1); - end - - %%clusterization - [partition,index] = cacher(@optimize_partition,... - { - data(var_filter,:),... - max_K,... - metric_name,... - k_means_opts - },... - 'CachePath',cacher_path, ... - 'Enabled', settings.Cache); - res_struct.partition = partition; - res_struct.index = index; - res_struct.centroids = fetch_centroids_from_partition(data,partition); - if index==-Inf - % rmdir(out_path,'s'); - partition = []; - res_struct = struct(); - return; - end - - %%results saving - if settings.PlotPartitions - partition_plotter(xy, partition, index, out_path, ... - ['primary_' num2str(sum(var_filter)) '_features' ], true); - end - best_K = max(partition); - - if level>1 || ~settings.UseLevels - passed = cell_settings(settings); - merged = partition; - res_struct.subregions = cell(1,best_K); - for i = 1:best_K - if sum(partition==i)>(primary_objects_number*percent_size_limit) - [partitions, r] = divik( ... - data(:,partition==i), ... - xy(:,partition==i), ... - passed{:}, ... %setting copy - 'Level', level-1, ... %override - 'OutPath', [out_path '\' num2str(i)], ... %override - 'AmplitudeFiltration', false, ... %override - 'PlotPartitions', settings.PlotRecursively, ... %override - 'DecompositionPlots', ... - settings.DecompositionPlotsRecursively, ... %override - 'TransposeData', false); %override - - if ~isempty(fieldnames(r)) - res_struct.subregions{i} = r; - end - merged = filthy_merge(merged,partitions,i); - end - end - res_struct.merged = merged; - partition = merged; - - if settings.PlotPartitions - partition_plotter(xy, partition, index, out_path, ... - 'downmerged', false); - end - end - -end - -function settings = get_defaults() - settings = struct(); - settings.MaxK = 10; - settings.Level = 3; - settings.UseLevels = true; - settings.AmplitudeFiltration = true; - settings.VarianceFiltration = true; - settings.PercentSizeLimit = 0.001; - settings.FeaturePreservationLimit = 0.05; - settings.Metric = 'pearson'; - settings.PlotPartitions = false; - settings.PlotRecursively = false; - settings.DecompositionPlots = false; - settings.DecompositionPlotsRecursively = false; - settings.MaxComponentsForDecomposition = 10; - settings.OutPath = '.'; - settings.CachePath = '.'; - settings.Cache = true; - settings.Verbose = false; - settings.KmeansOpts = {}; - settings.KmeansMaxIters = 100; - %invisibles - settings.TransposeData = true; - settings.PrimaryObjectsNumber = NaN; -end - -function unpacked = unpack_doublecell(doublecell) - unpacked = cell2mat(cellfun(@cell2mat,doublecell, ... - 'UniformOutput',false)'); -end diff --git a/src/MATLAB/DiviK/libs/divik_library/dunn.m b/src/MATLAB/DiviK/libs/divik_library/dunn.m deleted file mode 100644 index d87e988..0000000 --- a/src/MATLAB/DiviK/libs/divik_library/dunn.m +++ /dev/null @@ -1,67 +0,0 @@ -function value = dunn( data, partition, metric_name ) -%DUNN Calculates Dunn's index for set partition -% value = DUNN( data, partition, metric_name ) -% AWFUL VERSION - - [features_number,~] = size(data); - K = max(partition); - centers = NaN(features_number,K); - for i=1:K - centers(:,i) = mean(data(:,partition==i),2); - end - intracluster_metric = 'MeanRadius'; - intercluster_metric = ''; - - clusters = cell(1,K); - indexes = cell(1,K); - for i=1:K - part = partition==i; - if sum(part)<2 - value = -Inf; - return; - end - indexes{i} = find(part); - clusters{i} = data(:,indexes{i}); - end - - % %INTRACLUSTER DISTANCE - - %DIAMETER - intracluster_distances = -Inf(1,K); - if strcmp(intracluster_metric,'Diameter') - cache = any_dist(metric_name,data,data); - ccache = cell(K,1); - for i=1:K - query = partition==i; - ccache{i} = cache(query,query); - end - parfor c_num=1:K - intracluster_distances(c_num) = max(max(ccache{c_num})); - end - %RADIUS - elseif strcmp(intracluster_metric,'Radius') - parfor c_num=1:K - intracluster_distances(c_num) = max(any_dist(metric_name,clusters{c_num},centers(:,c_num))); - end - %MEAN RADIUS - elseif strcmp(intracluster_metric,'MeanRadius') - parfor c_num=1:K - cluster = clusters{c_num}; - center = centers(:,c_num); - intracluster_distances(c_num) = mean(any_dist(metric_name,cluster,center)); - end - else - error('Unknown intracluster distance measure for Dunn''s index calculation.'); - end - - - max_intracluster_distance = max(intracluster_distances); - - % % INTERCLUSTER DISTANCE - intercluster_distances = any_dist(metric_name,centers,centers); - intercluster_distances = max(Inf*eye(size(intercluster_distances)),intercluster_distances); - min_intercluster_distance = min(min(intercluster_distances)); - - value = min_intercluster_distance / max_intracluster_distance; - -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/divik_library/k_means.m b/src/MATLAB/DiviK/libs/divik_library/k_means.m deleted file mode 100644 index 3c6a6e6..0000000 --- a/src/MATLAB/DiviK/libs/divik_library/k_means.m +++ /dev/null @@ -1,713 +0,0 @@ -function [ partition, centers ] = k_means( data, K, varargin) -%k_means Performs k-means algorithm -% [ partition, centers ] = K_MEANS( data, K ) - Performs k-means -% algorithm with specified distance metrics and specified mean. Returns -% assignment of the points to specific clusters and centers of the -% clusters. Data points should be organized in columns, features in rows. -% K - number of clusters into which the data has to be split. Can be a -% vector to support multiple number of clusters at once. -% [ partition, centers ] = K_MEANS( data, K, Name, Value ) - performs the -% computations with additional parameters. Parameters available: -% - 'Metric' - name of the metric used during calculations: -% - 'pearson' - Pearson correletive distance (1-s, where s is Pearson -% correlation coefficient) -% - 'spearman' - same as above, however uses Spearman correlation -% coefficient -% - 'euclidean' - Euclidean distance -% - 'cityblock' - see pdist MATLAB function specification -% - 'minkowski' - see pdist MATLAB function specification -% - 'chebychev' - see pdist MATLAB function specification -% - 'mahalanobis' - see pdist MATLAB function specification -% - 'cosine' - see pdist MATLAB function specification -% - 'hamming' - see pdist MATLAB function specification -% - 'jaccard' - see pdist MATLAB function specification -% - 'Init' - initialization technique: -% - 'furthest' - builds the linear model of the data. Picks an object -% with highest error as a first center. The next K-1 points are -% chosen sequentially in such a way that the lowest distance to -% existing centers should be maximal. Distance is chosen with respect -% to the metric given or the additionally specified. -% - 'random' - picks K objects at random to be the cluster centers -% - 'StartsNum' - number of starts, makes sense only for random -% initializations -% - 'StopCrit' - cell array containing all the stop criterions chosen -% (default is empty): -% - 'AssignmentPercentChanging' - stops when less than small -% percentage of all objects changed their assignment -% - 'IndicePercentChanging' - stops when the percentage change of the -% separation indice is small -% Moreover, stops if following conditions are found to be true: -% - assignments cycle is detected -% - no assignment was changed -% - reached maximal number of iterations -% - 'MaxIters' - sets maximal number of iterations to be performed -% (default 100) -% - 'AssignmentChangeThreshold' - sets threshold for -% 'AssignmentPercentChanging' stop criterion (default 0.1% of items which -% changed its assignment) -% - 'IndiceChangeThreshold' - sets threshold for 'IndicePercentChanging' -% stop criterion (default 0.00001%) -% - 'Indice' - sets the indice used for 'IndicePercentChanging' stop -% criterion (default 'dunn'): -% - 'dunn' -% - 'silhouette' (not implemented yet!) -% - 'DunnInter' - sets intercluster distance calculation method from the -% following (default 'Centers'): -% - 'Centers' - the distance between the centers of the clusters -% - 'DunnIntra' - sets intracluster distance calculation method from the -% following (default 'MeanRadius'): -% - 'Diameter' - the distance between two furthest points in the -% cluster -% - 'Radius' - the distance between the center and the furthest point -% in the cluster -% - 'MeanRadius' - the mean distance between the cluster center and -% points assigned -% - 'InitializationMetric' - allows to pick different metric for -% initialization. Default is the same as clustering one. Supports all of -% the metrics specified for clusterization. -% - 'Normalization' - normalization of features is used during -% clusterization process. Since several types are available, therefore -% some of them were implemented (default 'none'): -% - 'none' -% - 'z-score' -% - 'scaling' - scales features dividing them by magnitude -% - 'InitNormalization' - normalization of features during initialization -% is used. Since several types are available, therefore some of them were -% implemented (default 'none'): -% - 'none' -% - 'z-score' -% - 'scaling' - scales features dividing them by magnitude -% - 'Verbose' - allows to enable current status printing (default true). -% - 'CachePath' - allows to use specific location for caching (default -% is current directory) -% - 'EnableCache' - allows to enable caching feature (default true) -% -% Future extensions: -% - possibility of disabling cache -% - cache optimization -% - custom initialization centers -% - custom metrics -% - custom indices - - %settings initialization - settings = set_defaults(); - - %user-settings reading - settings = apply_user_configs(settings,varargin); - - %checking cache for existing results - [cached_centers, partitions, iter_num, centers_known] = fetch_result_from_cache(data,K,settings); - - %centers initialization - [centers, prepared_data] = internal_init(K,... - data,... - settings,... - cached_centers,... - centers_known); - - [features_number,objects_number] = size(prepared_data); - % we store the indexes, because user may want to end when convergence - % is low and the index changes very slightly - n_indexes = indexes_needed(settings); - if n_indexes>0 - indexes = -Inf(settings.MaxIters,n_indexes); - else - indexes = NaN; - end - - if settings.Verbose - fprintf('Initialized.\n') - fprintf('Iteration: 00000\n') - end - - % we store all of the partitions obtained, because user may want to end - % when convergence is low or just to detect assignment cycles - - stop = false; - - while ~stop - % we find new partition (stored for every iteration because we have - % to be able to find loops in partitioning) and new centers - [partitions(iter_num,:),centers{iter_num+1}] = internal_repartition(K,... - prepared_data,... - centers{iter_num},... - settings.Metric); - % we calculate index specified by user in the settings if any is - % specified - if n_indexes>0 - indexes(iter_num,:) = internal_indexes(K,... - partitions,... - centers{iter_num+1},... - prepared_data,... - settings); - end - % we check stop condition - stop = check_stop_conditions(partitions,... - indexes,... - iter_num,... - settings); - iter_num = iter_num+1; - if settings.Verbose - fprintf('\b\b\b\b\b\b%05d\n',iter_num) - end - end - - check_and_save(data,partitions,centers,K,settings); - - partition = partitions(iter_num-1,:); - centers = internal_denormalize(centers{iter_num},... - data,... - settings.Normalization); - - if settings.Verbose - fprintf('Clusterization of %d objects described by %d features into %d partitions done in %d iterations.\n', ... - objects_number,... - features_number,... - K,... - iter_num-1); - end -end - -function settings = set_defaults() -%SET_DEFAULTS Sets default settings of the algorithm. - - settings.Metric = 'pearson'; - settings.Init = 'furthest'; - settings.InitializationMetric = ''; - settings.StartsNum = 1; - settings.StopCrit = {}; - settings.Indice = 'dunn'; - settings.IndiceChangeThreshold = 0.00001; - settings.Normalization = 'none'; - settings.InitNormalization = 'none'; - settings.Verbose = true; - settings.MaxIters = 100; - settings.AssignmentChangeThreshold = 0.1; - settings.DunnInter = 'Centers'; - settings.DunnIntra = 'MeanRadius'; - settings.CachePath = '.'; - settings.EnableCache = true; - -end - -function settings = apply_user_configs(settings,configs) -%APPLY_USER_CONFIGS Applies all of the features specified by the user. -% settings = APPLY_USER_CONFIGS(settings,configs) - - nvararg = length(configs); - - for i = 1:nvararg - if mod(i,2)==1 - property_name = configs{i}; - else - settings.(property_name) = configs{i}; - end - end - - settings.IndiceChangeThreshold = settings.IndiceChangeThreshold/100; - settings.AssignmentChangeThreshold = settings.AssignmentChangeThreshold/100; - if strcmp(settings.InitializationMetric,'') - settings.InitializationMetric = settings.Metric; - end - -end - -%to be extended -function [centers, partitions, iter_num, centers_known] = fetch_result_from_cache(data,K,settings) -%FETCH_RESULT_FROM_CACHE Seeks cache for ready results -% [centers, partitions, iter_num, centers_known] = fetch_result_from_cache(data,K,settings) - - if ~settings.EnableCache - %same as for absent cache - [~,objects_number] = size(data); - centers = NaN*ones(1,K); - partitions = NaN*ones(settings.MaxIters,objects_number); - iter_num = 1; - centers_known = 0; - return; - end - - try - data_hash = DataHash(data); - catch - warning('K-means: Input data hash calculation failed. Skipping any partial results checks.'); - %same as for absent cache - [~,objects_number] = size(data); - centers = NaN*ones(1,K); - partitions = NaN*ones(settings.MaxIters,objects_number); - iter_num = 1; - centers_known = 0; - return; - end - - path = [settings.CachePath '\kmeans_cache']; - %init path construction - path = [path '\' data_hash]; - path = [path '\' settings.Init]; - path = [path '\' settings.InitializationMetric]; - path = [path '\' settings.InitNormalization]; - init_path = [path '\init.mat']; - %result path construction - path = [path '\' settings.Metric]; - path = [path '\' settings.Normalization]; - path = [path '\' num2str(K)]; - result_path = [path '\result.mat']; - - result_exist = exist(result_path,'file'); - init_exist = exist(init_path,'file'); - - if result_exist~=0 - load(result_path); - % if the results were cached yet and are surely enough - if settings.MaxIters= settings.MaxIters - stop = true; - return; - end - - % we check if the change comparison is necessary and if it is possible - % to check it - if indexes_needed(settings) && iter_num>1 - % we compare change in the index - if abs((indexes(iter_num)-indexes(iter_num-1)))/indexes(iter_num-1)1 - if sum(partitions(iter_num-1,:) == partitions(iter_num,:)) >= length(partitions(iter_num,:))*(1-settings.AssignmentChangeThreshold); - stop = true; - return; - end - end - - % we check for the partition loop - for i=(iter_num-1):-1:1 - if sum(partitions(i,:) == partitions(iter_num,:)) == length(partitions(iter_num,:)) - stop = true; - return; - end - end - -end - -function centers_idx = internal_init_regression_furthest( K, data, metric_name, centers, centers_known ) -%internal_init_regression_furthest Initialized k-means algorithm using -%linear regression -% centers_idx = internal_init_regression_furthest( K, data, metric_name, centers, centers_known ) -% Finds the point which is the furthest from the approximation of -% dataset, then finds next points taking the points with maximal minimal -% distance to the chosen yet. - - [features_number,objects_number] = size(data); - - centers_idx = NaN(1,K); - centers_idx(1:centers_known) = centers(1:centers_known); - centers = NaN(features_number,K); - centers(:,1:centers_known) = data(:,centers_idx(1:centers_known)); - - if centers_known<1 - b = regress(transpose(data(1,:)),[ones(objects_number,1) transpose(data(2:end,:))]); - guesses = transpose([ones(objects_number,1) transpose(data(2:end,:))]*b); - differences_between_prediction_and_guess = abs(guesses - data(1,:)); - - maxindex = find(differences_between_prediction_and_guess(:)==max(differences_between_prediction_and_guess(:))); - maxindex = maxindex(1); - centers(:,1) = data(:,maxindex); - centers_idx(1) = maxindex; - centers_known = 1; - end - - if centers_known<1 - error('Furthest init failed: no initial center found.'); - else - dst = any_dist(metric_name,data(:,centers_idx(~isnan(centers_idx))),data); - minimal_distances = min(dst,[],1); - end - - for i = (centers_known+1):K - max_ind = find(minimal_distances == max(minimal_distances), 1); - centers(:,i) = data(:,max_ind); - centers_idx(i) = max_ind; - dst = any_dist(metric_name,data(:,max_ind),data); - minimal_distances = min([minimal_distances; dst],[],1); - end - -end - -function n = indexes_needed(settings) -%INDEXES_NEEDED Checks, how many indexes have to be calculated -% n = INDEXES_NEEDED(settings) - - n = 0; - for i=1:length(settings.StopCrit) - n = n + strcmp(settings.StopCrit,'IndicePercentChanging'); - end - -end - -%to be extended -function [ centers, prepared_data ] = internal_init(K,data,settings,centers,centers_known) -%INTERNAL_INIT Initializes starting parameters for k-means algorithm -% [ centers, prepared_data ] = INTERNAL_INIT(K,data,settings,centers,centers_known) - - init_type = settings.Init; - init_normalization_type = settings.InitNormalization; - normalization_type = settings.Normalization; - init_metric_name = settings.InitializationMetric; - - -% [features_number,objects_number] = size(data); - [~,objects_number] = size(data); - - %clustering data normalization - if strcmp(normalization_type,'none') - prepared_data = data; - elseif strcmp(normalization_type,'z-score') - prepared_data = zscore(data,1,2); - elseif strcmp(normalization_type,'scaling') - prepared_data = bsxfun(@rdivide,data,max(data,[],2)); - else - error('Unknown data normalization type (for clustering process).'); - end - - if centers_known>=K - if ~iscell(centers) - centers = {prepared_data(:,centers)}; - end - return; - end - - %initialization data normalization - if strcmp(init_normalization_type,'none') - init_data = data; - elseif strcmp(init_normalization_type,'z-score') - init_data = zscore(data,1,2); - elseif strcmp(init_normalization_type,'scaling') - init_data = bsxfun(@rdivide,data,max(data,[],2)); - -% % ANOTHER VERSION - whole data is scaled by the same number -% init_data = data/max(max(abs(data))); - else - error('Unknown data normalization type (for initialization process).'); - end - - %centers finding - if strcmp(init_type,'random') - tmp = 1:objects_number; - tmp(centers) = 0; - tmp = tmp(tmp~=0); - idx = [centers, randsample(tmp,K - centers_known)]; - elseif strcmp(init_type,'furthest') - idx = internal_init_regression_furthest(K,init_data,init_metric_name,centers,centers_known); - else - error('Unknown initialization type.'); - end - - check_init_and_save(data,idx,settings); - - centers = {prepared_data(:,idx)}; -end - -function [partition, centers] = internal_repartition(K,data,old_centers,metric_name) -%INTERNAL_REPARTITION Calculates new partitioning of the points along the clusters -% [partition, centers] = INTERNAL_REPARTITION(K,data,old_centers,metric_name) -% Uses specified metric to assign each data point to one of the clusters. - - [~,objects_number] = size(data); - partition = NaN*zeros(1,objects_number); - - dists = any_dist(metric_name,old_centers,data); - mins = min(dists); - for i = 1:K - partition(dists(i,:)==mins) = i; - end - - centers = internal_centers(data,partition,K,'arithmetic'); -end - -function [ centers ] = internal_centers( data, partition, K, mean_name ) -%INTERNAL_CENTERS Calculates centers for given data and its partitioning -% [ centers ] = INTERNAL_CENTERS( data, partition, K, mean_name ) -% Calculates centers of the clusters basing on input data, its -% partitioning, using specified mean. Now uses only arithmetic or -% harmonic mean. Should consider case when clusters disappear. - - [features_number,~] = size(data); - centers = NaN*zeros(features_number,K); - - for i = 1:K - if strcmp(mean_name,'arithmetic') - centers(:,i) = mean(data(:,partition==i),2); - elseif strcmp(mean_name,'harmonic') - centers(:,i) = harmmean(data(:,partition==i),2); - elseif strcmp(mean_name,'median') - centers(:,i) = median(data(:,partition==i),2); - else - error('Unknown center calculation method.'); - end - end - -end - -%to rework -function [ value ] = internal_dunn_index( K, partition, centers, data, metric_name, intercluster_metric, intracluster_metric ) %#ok -%INTERNAL_DUNN_INDEX Calculates Dunn's index -% value = INTERNAL_DUNN_INDEX( K, partition, centers, data, metric_name, intercluster_metric, intracluster_metric ) -% Calculates Dunn's index for current partition of given dataset. As -% intracluster distance the mean distance between any point and the -% center is chosen (or other specified), as intercluster distance, the -% distance between the centers is chosen. -% TO REWORK. (WHEN LOWEST RAM USAGE SPECIFIED, IT USES FULL RAM + CREATES -% CACHE EVERY TIME) - - clusters = cell(1,K); - indexes = cell(1,K); - for i=1:K - part = partition==i; - if sum(part)<2 - value = -Inf; - return; - end - indexes{i} = find(part); - clusters{i} = data(:,indexes{i}); - end - - % %INTRACLUSTER DISTANCE - - %DIAMETER - intracluster_distances = -Inf(1,K); - if strcmp(intracluster_metric,'Diameter') - cache = any_dist(metric_name,data,data); - ccache = cell(K,1); - for i=1:K - query = partition==i; - ccache{i} = cache(query,query); - end - parfor c_num=1:K - intracluster_distances(c_num) = max(max(ccache{c_num})); - end - %RADIUS - elseif strcmp(intracluster_metric,'Radius') - parfor c_num=1:K - intracluster_distances(c_num) = max(any_dist(metric_name,clusters{c_num},centers(:,c_num))); - end - %MEAN RADIUS - elseif strcmp(intracluster_metric,'MeanRadius') - parfor c_num=1:K - cluster = clusters{c_num}; - center = centers(:,c_num); - intracluster_distances(c_num) = mean(any_dist(metric_name,cluster,center)); - end - else - error('Unknown intracluster distance measure for Dunn''s index calculation.'); - end - - - max_intracluster_distance = max(intracluster_distances); - - % % INTERCLUSTER DISTANCE - intercluster_distances = any_dist(metric_name,centers,centers); - intercluster_distances = max(Inf*eye(size(intercluster_distances)),intercluster_distances); - min_intercluster_distance = min(min(intercluster_distances)); - - value = min_intercluster_distance / max_intracluster_distance; -end - -%to be extended -function indexes = internal_indexes(K,partitions,centers,prepared_data,settings) - - warning('IndicePercentChanging feature is implemented only partially. It is recommended to not to use this feature yet.'); - n_stops = length(settings.StopCrit); - any_index_needed = false; - for i = 1:n_stops - any_index_needed = any_index_needed || strcmp(settings.StopCrit,'IndicePercentChanging'); - end - - if any_index_needed - if strcmp(settings.Indice,'dunn') - indexes(1) = internal_dunn_index( K, partitions(end,:), centers, prepared_data, settings.Metric, settings.DunnIntra, '' ); - elseif strcmp(settings.Indice,'silhouette') - error('Not implemented yet.'); - else - error('Unknown indice.'); - end - end - -end - -function centers = internal_denormalize(centers,data,normalization_type) -%INTERNAL_DENORMALIZE Performs denormalization procedure for the centers -%found. -% centers = INTERNAL_DENORMALIZE(centers,data,normalization_type) - - %clustering data normalization - if strcmp(normalization_type,'none') - return; - elseif strcmp(normalization_type,'z-score') - centers = bsxfun(@plus,bsxfun(@rdivide,centers,1./std(data,1,2)),mean(data,2)); - elseif strcmp(normalization_type,'scaling') - centers = bsxfun(@rdivide,centers,1./max(data,[],2)); - else - error('Unknown data normalization type (for results denormalization process).'); - end - -end - -function check_init_and_save(data,idx,settings) -%CHECK_INIT_AND_SAVE -% CHECK_INIT_AND_SAVE(data,idx,settings) - - if ~settings.EnableCache - return - end - - try - data_hash = DataHash(data); - catch - %skipping any partial saves for init - warning('K-means: Skipping any partial saves for init.'); - return; - end - - path = [settings.CachePath '\kmeans_cache']; - %init path construction - path = [path '\' data_hash]; - path = [path '\' settings.Init]; - path = [path '\' settings.InitializationMetric]; - path = [path '\' settings.InitNormalization]; - - if exist(path,'dir')==0 - mkdir(path); - end - - init_path = [path '\init.mat']; - - init_exist = exist(init_path,'file'); - - if init_exist~=0 - load(init_path); - if length(inits.centers) < length(idx) %#ok - inits.centers = idx; - save(init_path,'inits'); - end - else - inits.centers = idx; %#ok - save(init_path,'inits'); - end - -end - -function check_and_save(data,partitions,centers,K,settings) -%CHECK_AND_SAVE -% CHECK_AND_SAVE(data,partitions,centers,K,settings) - - if ~settings.EnableCache - return; - end - - try - data_hash = DataHash(data); - catch - warning('K-means: Skipping partial result save.'); - return; - end - - path = [settings.CachePath '\kmeans_cache']; - %result path construction - path = [path '\' data_hash]; - path = [path '\' settings.Init]; - path = [path '\' settings.InitializationMetric]; - path = [path '\' settings.InitNormalization]; - path = [path '\' settings.Metric]; - path = [path '\' settings.Normalization]; - path = [path '\' num2str(K)]; - - if exist(path,'dir')==0 - mkdir(path); - end - - result_path = [path '\result.mat']; - - result_exist = exist(result_path,'file'); - - if result_exist~=0 - load(result_path); - if length(results.terminal) < length(centers)-1 %#ok - results.centers = centers; - results.partitions = partitions; - results.terminal = false(length(centers)-1,1); - save(result_path,'results'); - end - else - results.centers = centers; - results.partitions = partitions; - results.terminal = false(length(centers)-1,1); - save(result_path,'results'); - end -end \ No newline at end of file diff --git a/src/MATLAB/DiviK/libs/divik_library/merge_partitions.m b/src/MATLAB/DiviK/libs/divik_library/merge_partitions.m deleted file mode 100644 index 8235198..0000000 --- a/src/MATLAB/DiviK/libs/divik_library/merge_partitions.m +++ /dev/null @@ -1,32 +0,0 @@ -function [ merged, st_trans_table, nd_trans_table ] = merge_partitions( primary, secondary, replaced ) -%MERGE_PARTITIONS Merges the vectors containing data partition and -%subpartition of one part. -% merged = MERGE_PARTITIONS(primary,secondary,repl) - returns the vector -% containing cluster specified by repl from vector primary split -% according to vector secondary. -% [merged,st,nd] = MERGE_PARTITIONS(primary,secondary,repl) - also -% describes to which indexes old indexes are binded. -% Works only for integer indexes. - - merged = NaN*primary; - - min_primary = min(primary); - max_primary = max(primary); - min_secondary = min(secondary); - max_secondary = max(secondary); - - st_trans_table = NaN*zeros(max_primary-min_primary+1,1); - st_trans_table(1:(replaced - min_primary),1) = transpose(1:(replaced - min_primary)); - st_trans_table((replaced - min_primary + 2):end,1) = transpose((replaced - min_primary + 2):(max_primary-min_primary+1)) + max_secondary - min_secondary; - nd_trans_table = NaN*zeros(max_secondary-min_secondary+1,1); - nd_trans_table(1:(max_secondary-min_secondary+1),1) = transpose(1:(max_secondary-min_secondary+1)) + replaced - 1; - - primary = primary - min_primary + 1; - secondary = secondary - min_secondary + 1; - replaced = replaced - min_primary + 1; - - merged(primary>replaced) = primary(primary>replaced) + max_secondary - min_secondary; - merged(primary==replaced) = secondary + replaced - 1; - -end - diff --git a/src/MATLAB/DiviK/libs/divik_library/optimize_partition.m b/src/MATLAB/DiviK/libs/divik_library/optimize_partition.m deleted file mode 100644 index b9270cf..0000000 --- a/src/MATLAB/DiviK/libs/divik_library/optimize_partition.m +++ /dev/null @@ -1,41 +0,0 @@ -function [partition, index] = optimize_partition(data, max_K, metric_name, k_means_opts) -%OPTIMIZE_PARTITION -% [partition, index] = OPTIMIZE_PARTITION(data, max_K, metric_name, k_means_opts) - - optimum = -Inf; - - if are_distances_unimodal(data, metric_name) - partition = ones(1, size(data, 2)); - index = -Inf; - return; - end - - for K=max_K:-1:2 - %INDEX TO BE FOUND IN NEW VERSION - %CENTERS TO BE FOUND IN FULL SPACE - - partition = k_means(data, K, k_means_opts{:}, 'Metric', metric_name); - - index = dunn(data,partition,metric_name); - - if (index > optimum && index~=Inf) || K==max_K || (K==2 && optimum == -Inf) - optimum = index; - optimal_partition = partition; - end - end - - partition = optimal_partition; - index = optimum; -end - -function unimodal = are_distances_unimodal(data, metric_name) - - if strcmp(metric_name, 'pearson') - distances = pdist(data, 'correlation'); - else - distances = pdist(data, metric_name); - end - - unimodal = is_unimodal(distances); - -end diff --git a/src/MATLAB/DiviK/readme.txt b/src/MATLAB/DiviK/readme.txt deleted file mode 100644 index b95feee..0000000 --- a/src/MATLAB/DiviK/readme.txt +++ /dev/null @@ -1,34 +0,0 @@ -This directory contains demo of DiviK algorithm. - -Content: - -- 'libs' directory contains all libraries used for DiviK algorithm. -- 'sample_data.mat' is data file with data after GMM modelling of a single preparation (available here: https://1drv.ms/u/s!AvaKDFGHXrvCjPlfSAGzQuCxs5_NrA). -- 'demo.m' is the demo script file. - -How to run: - -- Copy whole directory onto your computer, into a writable location -- Open MATLAB in that location -- Run script 'demo.m' - -What will be the result: - -- file 'downmerged_mono.png' showing merged partitions from two levels of DiviK -- file 'primary_(...)_mono.png' showing partition at top level -- directory '1' with file 'primary_(...)_mono.png' showing partition of first subregion found -- directory '2' with file 'primary_(...)_mono.png' showing partition of second subregion found -- directory 'cache' which contains disk cache of functions used internally by DiviK. - It speeds up repeated calculations by using partial results in the case analysis was interrupted - and should be resumed. This feature can be disabled. -- directory 'kmeans_cache' with similar purpose as above, but designed separately for own k-means - algorithm implementation. - -How to apply DiviK to another data? - -Implementation contains documentation comment, which describes all capabilities. It is sufficient -to add 'libs' directory to MATLAB path and then just write 'help divik' in MATLAB command line. -All input and output parameters are described, as well as named parameters which allow to apply -DiviK for segmentation or compression. - -Included sample has been tested on MATLAB R2016a, R2016b. \ No newline at end of file diff --git a/src/MATLAB/GMM/Run_gmm_model.m b/src/MATLAB/GMM/Run_gmm_model.m deleted file mode 100644 index 9618a12..0000000 --- a/src/MATLAB/GMM/Run_gmm_model.m +++ /dev/null @@ -1,16 +0,0 @@ -clc; clear all; close all; -addpath('ms_gmm') - -% step 1 - build GMM model -%GMM parameters -opts.base = 1; %if baseline correction -opts.draw = 1; %if draw results -opts.mz_thr = 0.3; %M/Z threshold for merging -opts.if_merge = 0; %if merge components -opts.if_rem = 0; %if remove additional components - -%load data -load('ms_data_1.mat') -%mz - nx1, y - nx1 - -mdl = ms_gmm_run(mz,y,opts); diff --git a/src/MATLAB/GMM/ms_gmm/Bruffaerts_out.m b/src/MATLAB/GMM/ms_gmm/Bruffaerts_out.m deleted file mode 100644 index ff9d1c0..0000000 --- a/src/MATLAB/GMM/ms_gmm/Bruffaerts_out.m +++ /dev/null @@ -1,77 +0,0 @@ -function [del,x_corr,left,right] = Bruffaerts_out(x,p,alpha,draw) -% GEN_BOX(X) -% Outlier detection for skewed and heavy-tailed distributions -% -% Input: -% x - data sample [1xn] -% p - robustness of the method with respect to outliers [0.75-0.99, default:0.9] -% alpha - desired detection rate of atypical values in the absence of -% contamination with outliers [default: 0.007] -% draw - if draw results [default: 0] -% -% Output: -% del - indices of outlier measurement -% x_corr - corrected sample -% left - indices of left tail outliers -% right - indices of right tail outliers -% -% Implemented by: -% Michal Marczyk, Michal.Marczyk@polsl.pl -% -% Authors: -% C. Bruffaerts, V. Verardi, C. Vermandele, A generalized boxplot for -% skewed and heavy-tailed distributions, Statistics & Probability Letters, -% Volume 95, Pages 110-117 - -%check inputs -if nargin < 4 - draw = 0; - if nargin < 3 - alpha = 0.007; - if nargin < 2 - p = 0.9; - end - end -end - -x_s = (x-median(x))/iqr(x); %standardize the data -r = x_s - min(x_s) + 0.1; %shift dataset -r_d = r/(min(r)+max(r)); %standardize r to (0,1) scale -w = norminv(r_d); %consider inverse normal (probit) -w_s = (w-median(w))/(iqr(w)/1.3426); %standardize w - -%adjust values by Tukey g-and-h distribution -z_p = norminv(p); -q_p1 = quantile(w_s,p); -q_p2 = quantile(w_s,1-p); -g = (1/z_p)*log(-q_p1/q_p2); -h = (2*log(-g*(q_p1*q_p2)/(q_p1+q_p2)))/(z_p^2); - -% w_sadj = (1/g)*(exp(g.*w_s)-1).*exp(h*(w_s.^2)/2); -% hist(w_s,0.7*sqrt(length(w_sadj))); - -%calculate quantiles of adj. values -q_a(1) = quantile(w_s,alpha/2); -q_a(2) = quantile(w_s,1-alpha/2); -ksi = (1/g)*(exp(g.*q_a)-1).*exp(h*(q_a.^2)/2); - -%calculate final extremities -B = (normcdf(median(w)+iqr(w)*ksi/1.3426)*(min(r)+max(r))+min(x_s)-0.1)*iqr(x)+median(x); - -%find outliers -left = xB(2); - -del = left | right; -x_corr = x(~del); - -if draw - disp([num2str(sum(left)) ' left tail outliers removed.']) - disp([num2str(sum(right)) ' right tail outliers removed.']) - figure; hist(x,sqrt(length(x))); hold on; - plot([B(1),B(1)],ylim,'r'); plot([B(2),B(2)],ylim,'r') - xlabel('Data'); ylabel('Counts'); title([num2str(sum(del)) ' outliers removed']) -end - -function y=iqr(x) -y = quantile(x,0.75) - quantile(x,0.25); \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/bindata.m b/src/MATLAB/GMM/ms_gmm/bindata.m deleted file mode 100644 index 2ca6f2b..0000000 --- a/src/MATLAB/GMM/ms_gmm/bindata.m +++ /dev/null @@ -1,32 +0,0 @@ -% this function was taken from Dr. Patrick Mineault webpage -% -function [ym,yb] = bindata(y,x,xrg) - %function [ym,yb] = bindata(y,x,xrg) - %Computes ym(ii) = mean(y(x>=xrg(ii) & x < xrg(ii+1)) for every ii - %using a fast algorithm which uses no looping - %If a bin is empty it returns nan for that bin - %Also returns yb, the approximation of y using binning (useful for r^2 - %calculations). Example: - % - %x = randn(100,1); - %y = x.^2 + randn(100,1); - %xrg = linspace(-3,3,10)'; - %[ym,yb] = bindata(y,x,xrg); - %X = [xrg(1:end-1),xrg(2:end)]'; - %Y = [ym,ym]' - %plot(x,y,'.',X(:),Y(:),'r-'); - % - %By Patrick Mineault - %Refs: http://xcorr.net/?p=3326 - % http://www-pord.ucsd.edu/~matlab/bin.htm - % xcorr is the blog of Dr. Patrick Mineault. - % A postdoc at the Ringach lab at UCLA studying vision. - - [~,whichedge] = histc(x,xrg(:)'); - - bins = min(max(whichedge,1),length(xrg)-1); - xpos = ones(size(bins,1),1); - ns = sparse(bins,xpos,1); - ysum = sparse(bins,xpos,y); - ym = full(ysum)./(full(ns)); - yb = ym(bins); diff --git a/src/MATLAB/GMM/ms_gmm/components_merging.m b/src/MATLAB/GMM/ms_gmm/components_merging.m deleted file mode 100644 index 22c9690..0000000 --- a/src/MATLAB/GMM/ms_gmm/components_merging.m +++ /dev/null @@ -1,41 +0,0 @@ -function mdl = components_merging(mdl,thr_mu,thr_sig,thr_pp) -%Function for merging components in GMM using method of moments. - -pp_est = mdl.w; -mu_est = mdl.mu; -sig_est = mdl.sig; - -i=1; -while i 0 - %method of moments from Richardson and Green weighted with w - pp_temp = sum(pp_est(s:e)); - mu_temp = sum(pp_est(s:e).*mu_est(s:e))/pp_temp; - sig_est(s) = sqrt(sum(pp_est(s:e).*(mu_est(s:e).^2 + sig_est(s:e).^2))/pp_temp - mu_temp^2); - mu_est(s) = mu_temp; mu_est(s+1:e) = NaN; pp_est(s) = pp_temp; - end - i = e + 1; -end - -del_mu = isnan(mu_est); -pp_est(del_mu) = []; -sig_est(del_mu) = []; -mu_est(del_mu) = []; - -del_pp = pp_est <= thr_pp; -mdl.w = pp_est(~del_pp); -mdl.mu = mu_est(~del_pp); -mdl.sig = sig_est(~del_pp); -mdl.KS = length(pp_est); \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w1.m b/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w1.m deleted file mode 100644 index e07980c..0000000 --- a/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w1.m +++ /dev/null @@ -1,40 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% main dynamic programming algorithm for computing initial -% conditions for EM iterations -% - -function [Q,opt_part]=dyn_pr_split_w1(data,ygreki,K_gr,aux_mx,PAR,PAR_sig_min) - -% initialize -Q=zeros(1,K_gr); -N=length(data); -p_opt_idx=zeros(1,N); -p_aux=zeros(1,N); -opt_pals=zeros(K_gr,N); -for kk=1:N; - p_opt_idx(kk)=my_qu_ix_w1(data(kk:N),ygreki(kk:N),PAR,PAR_sig_min); -end - -% aux_mx - already computed - -% iterate -for kster=1:K_gr - for kk=1:N-kster - for jj=kk+1:N-kster+1 - p_aux(jj)= aux_mx(kk,jj)+p_opt_idx(jj); - end - [mm,ix]=min(p_aux(kk+1:N-kster+1)); - p_opt_idx(kk)=mm; - opt_pals(kster,kk)=kk+ix(1); - end - Q(kster)=p_opt_idx(1); -end - - -% restore optimal decisions -opt_part=zeros(1,K_gr); -opt_part(1)=opt_pals(K_gr,1); -for kster=K_gr-1:-1:1 - opt_part(K_gr-kster+1)=opt_pals(kster,opt_part(K_gr-kster)); -end diff --git a/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w_aux1.m b/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w_aux1.m deleted file mode 100644 index 1a8bc0b..0000000 --- a/src/MATLAB/GMM/ms_gmm/dyn_pr_split_w_aux1.m +++ /dev/null @@ -1,16 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% -% auxiliary function for dynamic programming - compute quality index matrix -% -function aux_mx=dyn_pr_split_w_aux1(data,ygreki,PAR,PAR_sig_min) - -N=length(data); - -% aux_mx -aux_mx=zeros(N,N); -for kk=1:N-1 - for jj=kk+1:N - aux_mx(kk,jj)= my_qu_ix_w1(data(kk:jj-1),ygreki(kk:jj-1),PAR,PAR_sig_min); - end -end \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/emcor.m b/src/MATLAB/GMM/ms_gmm/emcor.m deleted file mode 100644 index c32eff0..0000000 --- a/src/MATLAB/GMM/ms_gmm/emcor.m +++ /dev/null @@ -1,116 +0,0 @@ -% emergency correction function -% launched in the case of (rare) overlap between splitters -function [ww_a_l_new,mu_a_l_new,sig_a_l_new,ww_a_p_new,mu_a_p_new,sig_a_p_new]=emcor(ksp,mu_mx_1,ww_mx_1,sig_mx_1,mz,y_bas,Splitt_v,P,Par_mcv) - -Res_par_2=ms_gmm_params(9); -em_tol=ms_gmm_params(16); - -KSP=length(Splitt_v); - -ww_a_l_new=0*ww_mx_1(1,:); -mu_a_l_new=0*ww_mx_1(1,:); -sig_a_l_new=0*ww_mx_1(1,:); - -ww_a_p_new=0*ww_mx_1(1,:); -mu_a_p_new=0*ww_mx_1(1,:); -sig_a_p_new=0*ww_mx_1(1,:); - - -if ksp>0 - mu_l=mu_mx_1(ksp,:); - ww_l=ww_mx_1(ksp,:); - sig_l=sig_mx_1(ksp,:); - ixp=max(find(ww_l > 0)); - - if isempty(ixp) - KSll=0; - mzll=mz(1); - mu_l=[]; - ww_l=[]; - sig_l=[]; - else - mu_l=mu_l(1:ixp); - ww_l=ww_l(1:ixp); - sig_l=sig_l(1:ixp); - KSll=length(ww_l); - [mzll,mzlp]=find_ranges(mu_l,sig_l); - end -else - KSll=0; - mzll=mz(1); - mu_l=[]; - ww_l=[]; - sig_l=[]; -end - -if ksp 0)); - mu_p=mu_p(1:ixp); - ww_p=ww_p(1:ixp); - sig_p=sig_p(1:ixp); - KSpp=length(ww_p); - [mzlp,mzpp]=find_ranges(mu_p,sig_p); -else - KSpp=0; - mzpp=mz(end); - mu_p=[]; - ww_p=[]; - sig_p=[]; -end - - -idm=find(mz>=mzll & mz<=mzpp); -mz_o=mz(idm); -y_o=y_bas(idm); -pyl=0*mz_o; -pyp=0*mz_o; - -if ksp>0 - for kks=1:KSll - pyl=pyl+ww_l(kks)*normpdf(mz_o,mu_l(kks),sig_l(kks)); - end - idl=find(mz_o>=mzll & mz_o<=P(Splitt_v(ksp),1)); - pylpds=pyl(idl); - y_o(idl)=pylpds; -end - -if ksp=P(Splitt_v(ksp+1),1) & mz_o<=mzpp); - pyppds=pyp(idp); - y_o(idp)=pyppds; -end - -KS=KSll+KSpp; - -pp_ini=[ww_l ww_p]/sum([ww_l ww_p]); -mu_ini=[mu_l mu_p]; -sig_ini=[sig_l sig_p]; - -PAR_sig_min=Res_par_2*Par_mcv*mean(mz_o); -[pp_est,mu_est,sig_est]=my_EM_iter(mz_o,y_o,pp_ini,mu_ini,sig_ini,0,PAR_sig_min,em_tol); -KS = length(pp_est); -KSpp = KS - KSll; - -[~,scale]=qua_scal(mz_o,y_o,pp_est,mu_est,sig_est); -ww_est=pp_est*scale; - -if ksp>0 - ww_a_l_new(1:KSll)=ww_est(1:KSll); - mu_a_l_new(1:KSll)=mu_est(1:KSll); - sig_a_l_new(1:KSll)=sig_est(1:KSll); -end -if kspsize(P,1) - fst_new=0; - else - fst_new=fst; - while 1 - if P(Kini+fst_new,1)-P(Kini,1) > fst*Par_mcv*P(Kini,1) - break - else - fst_new=fst_new+1; - if Kini+fst_new>size(P,1) - fst_new=0; - break - end - end - end - end -return - - - diff --git a/src/MATLAB/GMM/ms_gmm/fill_red.m b/src/MATLAB/GMM/ms_gmm/fill_red.m deleted file mode 100644 index b064a2f..0000000 --- a/src/MATLAB/GMM/ms_gmm/fill_red.m +++ /dev/null @@ -1,13 +0,0 @@ -% auxiliary function for filling splitter components in red -function ok=fill_red(ww_v,mu_v,sig_v) - -KS=length(ww_v); -[mzl,mzp]=find_ranges(mu_v,sig_v); -dlmz=(mzp-mzl)/100; -mzr=mzl:dlmz:mzp; -py=0*mzr; -for kks=1:KS - py=py+ww_v(kks)*normpdf(mzr,mu_v(kks),sig_v(kks)); -end -fill(mzr,py,'r') -ok=1; \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/find_ranges.m b/src/MATLAB/GMM/ms_gmm/find_ranges.m deleted file mode 100644 index 9b39392..0000000 --- a/src/MATLAB/GMM/ms_gmm/find_ranges.m +++ /dev/null @@ -1,14 +0,0 @@ -% auxiliary function -% find reasonable ranges for a group of Gaussian components -function [mzlow,mzhigh]=find_ranges(mu_v,sig_v) -K=length(mu_v); -mzlv=zeros(K,1); -mzpv=zeros(K,1); - -for kk=1:K - mzlv(kk)=mu_v(kk)-3*sig_v(kk); - mzpv(kk)=mu_v(kk)+3*sig_v(kk); -end -mzlow=min(mzlv); -mzhigh=max(mzpv); -return \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/find_segment.m b/src/MATLAB/GMM/ms_gmm/find_segment.m deleted file mode 100644 index b090399..0000000 --- a/src/MATLAB/GMM/ms_gmm/find_segment.m +++ /dev/null @@ -1,109 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% find segment between two neighboring splitters -% -function [mz_out,y_out]=find_segment(ksp,P,Splitt_v,mz,y_bas,mu_l,ww_l,sig_l,mu_p,ww_p,sig_p) - -DRAW=ms_gmm_params(1); -KSP=length(Splitt_v); - -if ksp>0 - mzl=P(Splitt_v(ksp),1); -else - mzl=mz(1); -end -if ksp=mzl & mz<=mzp); -mz_o=mz(idm); -y_o=y_bas(idm); - -if DRAW==1 - mzll=max([(mzl-round((mzp-mzl)/5)) mz(1)]); - mzpp=min([(mzp+round((mzp-mzl)/5)) mz(length(mz))]); - ixmzp=find((mz>mzll) & (mz0 - KS=length(ww_l); - for kks=1:KS - pyl=pyl+ww_l(kks)*normpdf(mz_o,mu_l(kks),sig_l(kks)); - end - if DRAW==1 - ok=fill_red(ww_l,mu_l,sig_l); - end -end - -pyp=0*mz_o; -if ksp0 - iyl=find(pyl > 0.05*max(pyl)); - if length(iyl)>1 - mz_ol=mz_o(iyl); - y_ol=y_out(iyl); - [mp,imp]=min(y_ol); - mz_l=mz_ol(imp(1)); - else - mz_l=mz_o(1); - end -else - mz_l=mzl; -end - -if ksp 0.05*max(pyp)); - if length(iyp)>1 - mz_op=mz_o(iyp); - y_op=y_out(iyp); - [mp,imp]=min(y_op); - mz_p=mz_op(imp(1)); - else - mz_p=mz_o(length(mz_o)); - end -else - mz_p=mzp; -end - -imz=find((mz_o <= mz_p) & (mz_o >= mz_l)); -y_out=y_out(imz); -mz_out=mz_o(imz); - -iy=find(y_out > 0 ); -y_out=y_out(iy); -mz_out=mz_out(iy); - -if DRAW==1 - subplot(3,1,2) - hold off - plot(mz_o,0*y_o,'r') - hold on - plot(mz_out,y_out,'k') - title(['Segment:' num2str(ksp)]) - grid on -end - diff --git a/src/MATLAB/GMM/ms_gmm/find_split_peaks.m b/src/MATLAB/GMM/ms_gmm/find_split_peaks.m deleted file mode 100644 index 552ce9b..0000000 --- a/src/MATLAB/GMM/ms_gmm/find_split_peaks.m +++ /dev/null @@ -1,59 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% find splitting peaks -% -function [Splitt_v, seg_vec_c]=find_split_peaks(P,mz,y_bas,Par_mcv) - -% read parameter for peak detection sensitivity -Par_P_Sens=ms_gmm_params(3); -% read parameter for peak quality threshold -Par_Q_Thr=ms_gmm_params(4); -% read parameter for initial jump -Par_Ini_J=ms_gmm_params(5); -% read parameter for range for peak lookup -Par_P_L_R=ms_gmm_params(6); -% read parameter for jump -Par_P_J=ms_gmm_params(7); -% -seg_vec=zeros(size(P,1),1); -for kkp=1:size(P,1)-1 - zakm=find(mz<=P(kkp+1,1) & mz>=P(kkp,1)); - warty=y_bas(zakm); - [miny,idxm]=min(warty); - prawzak=zakm(idxm(1)); - seg_vec(kkp)=prawzak; -end -seg_vec(length(seg_vec))=length(mz); -seg_vec_c=[1; seg_vec]; -M_B_H=zeros(size(P,1),1); -for kk=1:size(P,1) - M_B_H(kk)=max([y_bas(seg_vec_c(kk)) y_bas(seg_vec_c(kk+1))])+1; -end -MaxP=max(P(:,2)); -PPe=P(:,2)./M_B_H; -Kini=f_par_mcv(1,Par_Ini_J,P,Par_mcv); -Splitt_v=zeros(floor((mz(length(mz))-mz(1))/(mz(1)*Par_mcv)),1); -kspl=0; -% -while 1 - if f_par_mcv( Kini,Par_P_L_R,P,Par_mcv )==0 - break - end - Top=Kini+f_par_mcv( Kini,Par_P_L_R,P,Par_mcv ); - Zak=Kini:Top; - % verify quality condition for the best peak in the range Zak=Kini:Top - pzak2=P(Zak,2); - ppezak=PPe(Zak); - ixq=find((pzak2 > Par_P_Sens*MaxP) & (ppezak > Par_Q_Thr)); - if length(ixq)>=1 - [mpk,impk]=max(PPe(Zak(ixq))); - kkt=ixq(impk(1)); - kspl=kspl+1; - Splitt_v(kspl)=Zak(kkt); - Kini=Top+f_par_mcv( Top,Par_P_J,P,Par_mcv ); - else - Kini=Top+f_par_mcv( Top,1,P,Par_mcv ); - end -end -Splitt_v=Splitt_v(1:kspl); - diff --git a/src/MATLAB/GMM/ms_gmm/find_split_segment.m b/src/MATLAB/GMM/ms_gmm/find_split_segment.m deleted file mode 100644 index bb5b9e3..0000000 --- a/src/MATLAB/GMM/ms_gmm/find_split_segment.m +++ /dev/null @@ -1,51 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% find splitter segment around the split peak no k -% -% -% -function [mz_ss,y_ss]=find_split_segment(k_pick,mz,y_bas,seg_vec_c,P,Par_mcv) - -zakl=seg_vec_c(max([1 (k_pick-4)])); -zakp=seg_vec_c(min([size(P,1) (k_pick+5)])); - -mzp=mz(zakp); -mzl=mz(zakl); -mzPP=P(k_pick,1); - -if (mzp-mzPP)<5*mzPP*Par_mcv - zakm=find(mz>=mzp & mz<=mzp+5*mzPP*Par_mcv); - warty=y_bas(zakm); - [miny,idxm]=min(warty); - prawzak=zakm(idxm(1)); -else - prawzak=zakp; -end - -if (mzPP-mzl)<5*mzPP*Par_mcv - zakm=find(mz<=mzl & mz>=mzl-5*mzPP*Par_mcv); - warty=y_bas(zakm); - [miny,idxm]=min(warty); - lewzak=zakm(idxm(1)); -else - lewzak=zakl; -end - -mz_o=mz(lewzak:prawzak); -y_bas_o=y_bas(lewzak:prawzak); - -yp=y_bas(prawzak); -yl=y_bas(lewzak); - -dmzl=mz(lewzak+1)-mz(lewzak); -dmzp=mz(prawzak)-mz(prawzak-1); - -mzaugl=mz(lewzak)-6*Par_mcv*mz(lewzak):dmzl:mz(lewzak)-dmzl; -mzaugp=mz(prawzak)+dmzp:dmzp:mz(prawzak)+6*Par_mcv*mz(prawzak); - -yop=sqrt(2*pi)*(2*Par_mcv*mz(prawzak))*yp*normpdf(mzaugp-mzp,0,2*Par_mcv*mz(prawzak)); -yol=sqrt(2*pi)*(2*Par_mcv*mz(lewzak))*yl*normpdf(mzaugl-mzl,0,2*Par_mcv*mz(lewzak)); - - -mz_ss=[mzaugl'; mz_o; mzaugp']; -y_ss=[yol'; y_bas_o; yop']; diff --git a/src/MATLAB/GMM/ms_gmm/gmm_decomp_segment1.m b/src/MATLAB/GMM/ms_gmm/gmm_decomp_segment1.m deleted file mode 100644 index cf78360..0000000 --- a/src/MATLAB/GMM/ms_gmm/gmm_decomp_segment1.m +++ /dev/null @@ -1,171 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% gmm decomposition of a segment based on -% dynamic programming initialization -% -% -function [ww_dec,mu_dec,sig_dec]=gmm_decomp_segment1(mz,y_bas,ww_mx_1,mu_mx_1,sig_mx_1,P,Splitt_v,Par_mcv,Fr_No) - -% parameters -DRAW=ms_gmm_params(1); -QFPAR=ms_gmm_params(8); -Res_par_2=ms_gmm_params(9); -Prec_Par_2=ms_gmm_params(12); -Penet_Par_2=ms_gmm_params(13); -Buf_size_seg_Par=ms_gmm_params(15); -em_tol=ms_gmm_params(16); - - - % buffers -ww_dec=zeros(1,Buf_size_seg_Par); -mu_dec=zeros(1,Buf_size_seg_Par); -sig_dec=zeros(1,Buf_size_seg_Par); - -% assign fragment number to ksp -KSP=length(Splitt_v); -ksp=Fr_No; - -% find separated fragments -if ksp>0 - mu_l=mu_mx_1(ksp,:); - ww_l=ww_mx_1(ksp,:); - sig_l=sig_mx_1(ksp,:); - ktr=max((find(ww_l>0))); - mu_l=mu_l(1:ktr); - ww_l=ww_l(1:ktr); - sig_l=sig_l(1:ktr); - else - mu_l=[]; - ww_l=[]; - sig_l=[]; - end - - if ksp0))); - mu_p=mu_p(1:ktr); - ww_p=ww_p(1:ktr); - sig_p=sig_p(1:ktr); - else - mu_p=[]; - ww_p=[]; - sig_p=[]; - end - - [mz_out,y_out]=find_segment(ksp,P,Splitt_v,mz,y_bas,mu_l,ww_l,sig_l,mu_p,ww_p,sig_p); - - mz_out=mz_out(:); - y_out=y_out(:); - if length(mz_out) > 300 - dmz=(mz_out(length(mz_out))-mz_out(1))/200; - mz_out_bb=(mz_out(1):dmz:mz_out(length(mz_out))); - mz_out_b=mz_out_bb(1:200)+0.5*dmz; - [y_out_b,yb] = bindata(y_out,mz_out,mz_out_bb); - ixnn=find(~isnan(y_out_b)); - y_out_b=y_out_b(ixnn); - mz_out_b=mz_out_b(ixnn); - y_out_b=y_out_b(:); - mz_out_b=mz_out_b(:); - else - y_out_b=y_out; - mz_out_b=mz_out; - end - -% find appropriate gmm model for the segment - - quamin=inf; - N=length(mz_out); - Nb=length(mz_out_b); - PAR_sig_min=Res_par_2*Par_mcv*mean(mz_out); - -if length(mz_out)<3 - return -else - KSmin=min([1 (floor((mz_out(N)-mz_out(1))/PAR_sig_min)-1)]); - if KSmin<=0 - wwec=y_out/(sum(y_out)); - mu_est=sum(mz_out.*wwec); - pp_est=1; - sig_est=sqrt(sum(((mz_out-sum(mz_out.*wwec)).^2).*wwec)); - [qua,scale]=qua_scal(mz_out,y_out,pp_est,mu_est,sig_est); - else - KS=KSmin; - % penetration - how far are we searching for minimum - PAR_penet=min([Penet_Par_2 floor((mz_out(N)-mz_out(1))/PAR_sig_min) floor(length(mz_out)/4)]); - kpen=0; - - %name=['dane_nr' num2str(ksp)]; - %save(name, 'mz_out', 'y_out', 'mz_out_b', 'y_out_b', 'QFPAR', 'PAR_sig_min', 'PAR_penet'); - aux_mx=dyn_pr_split_w_aux1(mz_out_b,y_out_b,QFPAR,PAR_sig_min); - while KS < Buf_size_seg_Par - KS=KS+1; - kpen=kpen+1; - - if KS > KSmin+1 && KS >= length(mz_out)/2 - break - end - - [Q,opt_part]=dyn_pr_split_w1(mz_out_b,y_out_b,KS-1,aux_mx,QFPAR,PAR_sig_min); - part_cl=[1 opt_part Nb+1]; - - % set initial cond - pp_ini=zeros(1,KS); - mu_ini=zeros(1,KS); - sig_ini=zeros(1,KS); - for kkps=1:KS - invec=mz_out_b(part_cl(kkps):part_cl(kkps+1)-1); - yinwec=y_out_b(part_cl(kkps):part_cl(kkps+1)-1); - wwec=yinwec/(sum(yinwec)); - pp_ini(kkps)=sum(yinwec)/sum(y_out); - mu_ini(kkps)=sum(invec.*wwec); - %sig_ini(kkps)=sqrt(sum(((invec-sum(invec.*wwec')).^2).*wwec')); - sig_ini(kkps)=0.5*(max(invec)-min(invec)); - end - % - [pp_est,mu_est,sig_est,TIC,l_lik,bic]=my_EM_iter(mz_out,y_out,pp_ini,mu_ini,sig_ini,0,PAR_sig_min,em_tol); - - - % compute quality indices of gmm model of the fragment - [qua,scale]=qua_scal(mz_out,y_out,pp_est,mu_est,sig_est); - quatest=qua+Prec_Par_2*KS; - - if (quatest < quamin) - quamin=quatest; - pp_min=pp_est; - mu_min=mu_est; - sig_min=sig_est; - scale_min=scale; - end - - if (quatest > quamin) && (kpen>PAR_penet) - pp_est=pp_min; - mu_est=mu_min; - sig_est=sig_min; - scale=scale_min; - break - end - end - end - - if DRAW==1 - figure(2) - subplot(3,1,3) - hold off - ok=plot_res_new_scale(mz_out,y_out,pp_est*scale,mu_est,sig_est); - xlabel(['Segment: ' num2str(Fr_No) ' KS= ' num2str(length(pp_est))]) - drawnow - end - - ww_o=pp_est*scale; - mu_o=mu_est; - sig_o=sig_est; - - for kkpick=1:length(ww_o) - mu_dec(kkpick)=mu_o(kkpick); - ww_dec(kkpick)=ww_o(kkpick); - sig_dec(kkpick)=sig_o(kkpick); - end - -end diff --git a/src/MATLAB/GMM/ms_gmm/gmm_decomp_split_segment.m b/src/MATLAB/GMM/ms_gmm/gmm_decomp_split_segment.m deleted file mode 100644 index c0d5d41..0000000 --- a/src/MATLAB/GMM/ms_gmm/gmm_decomp_split_segment.m +++ /dev/null @@ -1,174 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% gmm decomposition of splitter segment based on -% dynamic programming initialization for splitting segments -% -function [ww_pick,mu_pick,sig_pick]=gmm_decomp_split_segment(mz,y_bas,Splitt_v,seg_vec_c,P,Par_mcv,Sp_No) -% input -% mz,y_bas - spectrum -% Splitt_v, seg_vec_c - list of splitting peaks and segment bounds computed by find_split_peaks -% P,Par_mcv - peaks, -% Sp_no - number of splitting segment - -% find appropriate gmm model for the splitting segment no Sp_No - % - % parameters - DRAW=ms_gmm_params(1); - QFPAR=ms_gmm_params(8); - Res_par_2=ms_gmm_params(9); - Prec_Par_1=ms_gmm_params(10); - Penet_Par_1=ms_gmm_params(11); - Buf_size_split_Par=ms_gmm_params(14); - em_tol=ms_gmm_params(16); - - % buffers - ww_pick=zeros(1,Buf_size_split_Par); - mu_pick=zeros(1,Buf_size_split_Par); - sig_pick=zeros(1,Buf_size_split_Par); - - % un-binned data - [mz_out,y_out]=find_split_segment(Splitt_v(Sp_No),mz,y_bas,seg_vec_c,P,Par_mcv); - mz_out=mz_out(:); - y_out=y_out(:); - - % bin if necessary - if length(mz_out) > 300 - dmz=(mz_out(length(mz_out))-mz_out(1))/200; - mz_out_bb=(mz_out(1):dmz:mz_out(length(mz_out))); - mz_out_b=mz_out_bb(1:200)+0.5*dmz; - [y_out_b,yb] = bindata(y_out,mz_out,mz_out_bb); - ixnn=find(~isnan(y_out_b)); - y_out_b=y_out_b(ixnn); - mz_out_b=mz_out_b(ixnn); - y_out_b=y_out_b(:); - mz_out_b=mz_out_b(:); - else - y_out_b=y_out; - mz_out_b=mz_out; - end - - N=length(mz_out); - Nb=length(mz_out_b); - quamin=inf; - PAR_sig_min=Res_par_2*Par_mcv*mean(mz_out); - - KSmin=min([2 (floor((mz_out(N)-mz_out(1))/PAR_sig_min)-1)]); - if KSmin<=0 - wwec=y_out/(sum(y_out)); - mu_est=sum(mz_out.*wwec); - pp_est=1; - sig_est=sqrt(sum(((mz_out-sum(mz_out.*wwec)).^2).*wwec)); - [qua,scale]=qua_scal(mz_out,y_out,pp_est,mu_est,sig_est); - else - - KS=KSmin; - PAR_penet=min([Penet_Par_1 floor((mz_out(N)-mz_out(1))/PAR_sig_min)]); - kpen=0; - - aux_mx=dyn_pr_split_w_aux(mz_out_b,y_out_b,QFPAR,PAR_sig_min); - while KS <= 2*(KSmin+PAR_penet) - KS=KS+1; - kpen=kpen+1; - - if KS > KSmin+1 && KS >= length(mz_out)/2 - break - end - - [Q,opt_part]=dyn_pr_split_w(mz_out_b,y_out_b,KS-1,aux_mx,QFPAR,PAR_sig_min); - part_cl=[1 opt_part Nb+1]; - - % set initial cond - pp_ini=zeros(1,KS); - mu_ini=zeros(1,KS); - sig_ini=zeros(1,KS); - for kkps=1:KS - invec=mz_out_b(part_cl(kkps):part_cl(kkps+1)-1); - yinwec=y_out_b(part_cl(kkps):part_cl(kkps+1)-1); - wwec=yinwec/(sum(yinwec)); - pp_ini(kkps)=sum(yinwec)/sum(y_out_b); - mu_ini(kkps)=sum(invec.*wwec); - %sig_ini(kkps)=sqrt(sum(((invec-sum(invec.*wwec')).^2).*wwec')); - sig_ini(kkps)=0.5*(max(invec)-min(invec)); - end - - - [pp_est,mu_est,sig_est,TIC,l_lik,bic]=my_EM_iter(mz_out,y_out,pp_ini,mu_ini,sig_ini,0,PAR_sig_min,em_tol); - - % compute quality indices and scale of gmm model of the fragment - [qua,scale]=qua_scal(mz_out,y_out,pp_est,mu_est,sig_est); - quatest=qua+Prec_Par_1*KS; - - if (quatest < quamin) - quamin=quatest; - pp_min=pp_est; - mu_min=mu_est; - sig_min=sig_est; - scale_min=scale; - end - - if (quatest > quamin) && (kpen>PAR_penet) - pp_est=pp_min; - mu_est=mu_min; - sig_est=sig_min; - scale=scale_min; - break - end - - end - - end %(if -- else) - - % pick and store results - dist=abs((mu_est-P(Splitt_v(Sp_No),1))./sig_est); - ixf=find(dist<=3); - if isempty(ixf) - [tmp,ixf] = min(dist); - ixnf = find(dist>tmp); - else - ixnf=find(dist>3); - end - - mu_p=mu_est(ixf); - ww_p=scale*pp_est(ixf); - sig_p=sig_est(ixf); - - mu_t=mu_est(ixnf); - ww_t=scale*pp_est(ixnf); - sig_t=sig_est(ixnf); - - inn=find(mu_tmin(mu_p)); - mu_tp=mu_t(inn); - ww_tp=ww_t(inn); - sig_tp=sig_t(inn); - - mu_pp=[mu_p mu_tp]; - ww_pp=[ww_p ww_tp]; - sig_pp=[sig_p sig_tp]; - - for kkpick=1:length(ww_pp) - mu_pick(kkpick)=mu_pp(kkpick); - ww_pick(kkpick)=ww_pp(kkpick); - sig_pick(kkpick)=sig_pp(kkpick); - end - - %%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%% plots - if DRAW==1 - figure(1) - subplot(2,1,1) - hold off - plot(mz_out,y_out,'k') - hold on - plot([P(Splitt_v(Sp_No),1) P(Splitt_v(Sp_No),1)],[0 max(y_out)],'r') - ylabel('y (no. of counts)'); - title(['Splitter segment: ' num2str(Sp_No)]); - grid on - subplot(2,1,2) - hold off - ww_est=scale*pp_est; - ok=plot_gmm(mz_out,y_out,ww_est,mu_est,sig_est); - ok=fill_red(ww_pp,mu_pp,sig_pp); - title(['Splitter: ' num2str(Sp_No)]) - drawnow - end - \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/ms_gmm1.m b/src/MATLAB/GMM/ms_gmm/ms_gmm1.m deleted file mode 100644 index 96252bd..0000000 --- a/src/MATLAB/GMM/ms_gmm/ms_gmm1.m +++ /dev/null @@ -1,142 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% ms_gmm -% -% -% decomposition of the proteomic spectrum into Gaussian mixture model -% -% -function [ww_gmm,mu_gmm,sig_gmm]=ms_gmm1(mz,y_bas) - -% -% INITIALIZATION -% -mz=mz(:); -y_bas=y_bas(:); - -% find peaks -[P, PFWHH]=mspeaks(mz,y_bas,'SHOWPLOT',0); - -% read paramter for gmm decomp resolution -Res_Par=ms_gmm_params(2); - -% estimate resolution -Par_mcv=Res_Par*mean((PFWHH(:,2)-PFWHH(:,1))./P(:,1)); - -%plot(P(:,1),(PFWHH(:,2)-PFWHH(:,1)),'.') -%pause - -% find good quality splitting peaks -% -[Splitt_v, seg_vec_c]=find_split_peaks(P,mz,y_bas,Par_mcv); -KSP=length(Splitt_v); % number of plitting peaks -% figure; hold on; plot(mz,y_bas);for a=1:KSP;plot([mz(seg_vec_c(Splitt_v(a))),mz(seg_vec_c(Splitt_v(a)))],ylim,'r');end - -% PHASE 1 - SPLITTERS AND GMM DECOMPOSITIONS OF SPLITTING SEGMENTS -% -% buffers for splitter parameters -Buf_size_split_Par=ms_gmm_params(14); -mu_mx_1=zeros(KSP,Buf_size_split_Par); -ww_mx_1=zeros(KSP,Buf_size_split_Par); -sig_mx_1=zeros(KSP,Buf_size_split_Par); - -parfor kk=1:KSP -% disp(['Split Progress: ' num2str(kk) ' of ' num2str(KSP)]); - % Gaussian mixture decomposition of the splitting segment - [ww_pick,mu_pick,sig_pick]=gmm_decomp_split_segment(mz,y_bas,Splitt_v,seg_vec_c,P,Par_mcv,kk); - mu_mx_1(kk,:)=mu_pick; - ww_mx_1(kk,:)=ww_pick; - sig_mx_1(kk,:)=sig_pick; -end - -% -% -% PHASE 2 - GMM DECOMPOSITIONS OF SEGMENTS -% -% buffers for segments decompositions paramteres -Buf_size_seg_Par=ms_gmm_params(15); -KSP1=KSP+1; -mu_mx_2=zeros(KSP1,Buf_size_seg_Par); -ww_mx_2=zeros(KSP1,Buf_size_seg_Par); -sig_mx_2=zeros(KSP1,Buf_size_seg_Par); -parfor ksp=1:KSP1 -% disp(['Seg Progress: ' num2str(ksp) ' of ' num2str(KSP1)]); - [ww_out,mu_out,sig_out]=gmm_decomp_segment1(mz,y_bas,ww_mx_1,mu_mx_1,sig_mx_1,P,Splitt_v,Par_mcv,ksp-1); - mu_mx_2(ksp,:)=mu_out; - ww_mx_2(ksp,:)=ww_out; - sig_mx_2(ksp,:)=sig_out; -end - -% AGGREGATION -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%% aggregate components computed in PHASE 1 and in PHASE 2 - -NUMPICK=size(P,1); -mu_vec=zeros(7*NUMPICK,1); -ww_vec=zeros(7*NUMPICK,1); -sig_vec=zeros(7*NUMPICK,1); - -kacum=0; - -for ksp=1:KSP+1 - mu_a=mu_mx_2(ksp,:); - ww_a=ww_mx_2(ksp,:); - sig_a=sig_mx_2(ksp,:); - ixp=max(find(ww_a > 0)); - if length(ixp)==0 - % emergency correction in the case of detected ovelap between splitters - [ww_a_l_new,mu_a_l_new,sig_a_l_new,ww_a_p_new,mu_a_p_new,sig_a_p_new]=emcor(ksp-1,mu_mx_1,ww_mx_1,sig_mx_1,mz,y_bas,Splitt_v,P,Par_mcv); - if ksp>1 - ww_mx_1(ksp-1,:)=ww_a_l_new; - mu_mx_1(ksp-1,:)=mu_a_l_new; - sig_mx_1(ksp-1,:)=sig_a_l_new; - end - if ksp 0)); - mu_a=mu_a(1:ixp); - ww_a=ww_a(1:ixp); - sig_a=sig_a(1:ixp); - % - for kk=1:length(ww_a) - kacum=kacum+1; - mu_vec(kacum)=mu_a(kk); - ww_vec(kacum)=ww_a(kk); - sig_vec(kacum)=sig_a(kk); - end -end - - -mu_vec=mu_vec(1:kacum); -ww_vec=ww_vec(1:kacum); -sig_vec=sig_vec(1:kacum); - -[mu_sort,muixs]=sort(mu_vec); -mu_gmm=mu_sort; -ww_gmm=ww_vec(muixs); -sig_gmm=sig_vec(muixs); - - - diff --git a/src/MATLAB/GMM/ms_gmm/ms_gmm_params.m b/src/MATLAB/GMM/ms_gmm/ms_gmm_params.m deleted file mode 100644 index bde4832..0000000 --- a/src/MATLAB/GMM/ms_gmm/ms_gmm_params.m +++ /dev/null @@ -1,107 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% ms_gmm_params -% -% -% set all paramters for ms_gmm -% -% -function param=ms_gmm_params(K) -param_v=zeros(100,1); - -% DRAW - show plots of decompositions during computations -% used in many finctions -DRAW=0; - -% Res_Par - used in the main body of ms_gmm - multiplied by estimated -% average width of the peak in the spectrum defines resolutioon of the -% decomposition -Res_Par=0.5; - -% Par_P_Sens parameter for peak detection sensitivity -% used in find_split_peaks split peaks must be of height -% >= Par_P_Sens * maximal peak height -Par_P_Sens=0; - -% Par_Q_Thr parameter for peak quality threshold -% used in find_split_peaks -% split peaks must have quality >= Par_Q_Thr -Par_Q_Thr=0.5; - -% Par_Ini_J parameter for initial jump -% used in find_split_peaks -Par_Ini_J=5; - -% Par_P_L_R parameter for range for peak lookup -% used in find_split_peaks -Par_P_L_R=4; - -% Par_P_J parameter for jump -% used in find_split_peaks -Par_P_J=4; - -% QFPAR - parameter used in the dynamic programming quality funtion -% -QFPAR=0.5; - -% Res_Par_2 - used in the EM iterations to define lower bounds for standard -% deviations -Res_Par_2=0.1; - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%% TUNABLE -% Prec_Par_1 - precision parameter - weight used to pick best gmm decomposition -% penalty coefficient for number of components in the quality funtio -Prec_Par_1=0.0001; -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -% Penet_Par_1 - penetration parameter 1 used to continue searching for -% best number of components in gmm decomposition (bigger for splitting -% segments) -Penet_Par_1=15; - -% Prec_Par_2 - precision parameter 2 - weight used to pick best gmm decomposition -Prec_Par_2=0.001; - -% Penet_Par_1 - penetration parameter 2 used to continue searching for -% best number of components in gmm decomposition (smaller for segments) -Penet_Par_2=30; - -% Buf_size_split_Par - size of the buffer for computing GMM paramters of -% splitters -Buf_size_split_Par=10; - -% Buf_size_seg_Par - size of the buffer for computing GMM paramters of -% segments -Buf_size_seg_Par=30; - -% eps_Par - parameter for EM iterations - tolerance for mixture parameters -% change -eps_Par=0.0001; - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -param_v(1)=DRAW; -param_v(2)=Res_Par; -param_v(3)=Par_P_Sens; -param_v(4)=Par_Q_Thr; -param_v(5)=Par_Ini_J; -param_v(6)=Par_P_L_R; -param_v(7)=Par_P_J; -param_v(8)=QFPAR; -param_v(9)=Res_Par_2; -param_v(10)=Prec_Par_1; -param_v(11)=Penet_Par_1; -param_v(12)=Prec_Par_2; -param_v(13)=Penet_Par_2; -param_v(14)=Buf_size_split_Par; -param_v(15)=Buf_size_seg_Par; -param_v(16)=eps_Par; - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -param=param_v(K); -return diff --git a/src/MATLAB/GMM/ms_gmm/ms_gmm_run1.m b/src/MATLAB/GMM/ms_gmm/ms_gmm_run1.m deleted file mode 100644 index 4be4a7e..0000000 --- a/src/MATLAB/GMM/ms_gmm/ms_gmm_run1.m +++ /dev/null @@ -1,39 +0,0 @@ -function [mdl_orig,mdl_area,mdl_height,mdl_merge] = ms_gmm_run(mz,y,opts) - -mz = mz(:); y = y(:); -if nargin <3 - opts.base = 1; - opts.draw = 1; - opts.mz_thr = 0.3; - opts.if_merge = 1; - opts.if_rem = 1; -end - -%baseline correction -if opts.base - y = msbackadj(mz,y,'showplot',opts.draw); -end -y(y<0) = 0; - -%run ms_gmm -[ww_gmm,mu_gmm,sig_gmm]=ms_gmm1(mz,y); - -%plot results -if opts.draw; - plot_gmm(mz,y,ww_gmm,mu_gmm,sig_gmm); - title(['Original model: ' num2str(length(ww_gmm)) ' components.']) - disp(['Original model: ' num2str(length(ww_gmm)) ' components.']) -end - -mdl_orig.w = ww_gmm; -mdl_orig.mu = mu_gmm; -mdl_orig.sig = sig_gmm; -mdl_orig.KS = length(ww_gmm); - -%make post-processing -[mdl_area,mdl_height,mdl_merge] = post_proc1(mz,y,mdl_orig,opts); -if opts.draw - disp(['Merged model: ' num2str(length(mdl_merge.w)) ' components.']) - disp(['Area-filtered model: ' num2str(length(mdl_area.w)) ' components.']) - disp(['Height-filtered model: ' num2str(length(mdl_height.w)) ' components.']) -end diff --git a/src/MATLAB/GMM/ms_gmm/my_EM_iter.m b/src/MATLAB/GMM/ms_gmm/my_EM_iter.m deleted file mode 100644 index ca773bf..0000000 --- a/src/MATLAB/GMM/ms_gmm/my_EM_iter.m +++ /dev/null @@ -1,102 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% -% -% EM iterations -% -% EM_iter -% function - iterations of the EM algorithm -% -function [pp_est,mu_est,sig_est,TIC,l_lik,bic]=my_EM_iter(x,y,pp_ini,mu_ini,sig_ini,DRAW,SIG_MIN,eps_change) -% - -% VALUES FOR CONSTANTS -% threshold value for terminating iterations -% eps_change; - -% DRAW=1 - show graphically positions of means and standard deviations of components during -% iterartions -% DRAW=0 - omit show option; - -% SIG_MIN - minimum value of sigma -% squared - - -SIG_SQ=SIG_MIN*SIG_MIN; - -% data length -N=length(y); - -% correct sig_in if necessary -sig_ini=max(sig_ini,SIG_MIN); - -% main buffer for intermediate computational results -% pssmac=zeros(KS,N); -% initial values for weights, means and standard deviations of Gaussian -% components -ppoc=pp_ini; -mivoc=mu_ini; -sigkwvoc=sig_ini.^2; - -change=1.0; - -% MAIN LOOP -while change > eps_change; - - ixu=find(ppoc>1.0e-3); - ppoc=ppoc(ixu); - mivoc=mivoc(ixu); - sigkwvoc=sigkwvoc(ixu); - %ixu=find(sigkwvoc>0.01); - %ppoc=ppoc(ixu); - %mivoc=mivoc(ixu); - %sigkwvoc=sigkwvoc(ixu); - KS=length(ixu); - - pssmac=zeros(KS,N); - - oldppoc=ppoc; - oldsigkwvoc=sigkwvoc; - - - ppoc=max(ppoc,1.0e-6); - - for kskla=1:KS - pssmac(kskla,:)=normpdf(x,mivoc(kskla),sqrt(sigkwvoc(kskla))); - end - - denpss=ppoc*pssmac; - denpss = max(min(denpss(denpss>0)),denpss); - for kk=1:KS - macwwwpom=((ppoc(kk)*pssmac(kk,:)).*y')./denpss; - denom=sum(macwwwpom); - minum=sum(macwwwpom*x); - mivacoc=minum/denom; - mivoc(kk)=mivacoc; - pomvec=(x-mivacoc*(ones(N,1))).*(x-mivacoc*(ones(N,1))); - sigkwnum = sum(macwwwpom*pomvec); - sigkwvoc(kk) = max([sigkwnum/denom SIG_SQ]); - ppoc(kk)=sum(macwwwpom)/sum(y); - end - - - change=sum(abs(ppoc-oldppoc))+ sum(((abs(sigkwvoc-oldsigkwvoc))./sigkwvoc))/(length(ppoc)); - - if DRAW==1 - plot(mivoc,sqrt(sigkwvoc),'*') - xlabel('means') - ylabel('standard deviations') - title(['Progress of the EM algorithm: change=' num2str(change)]); - drawnow - end - -end - -% RETURN RESULTS -l_lik=sum(log(denpss).*y'); -[mu_est,isort]=sort(mivoc); -sig_est=sqrt(sigkwvoc(isort)); -pp_est=ppoc(isort); -TIC=sum(y); -bic=l_lik-((3*KS-1)/2)*log(TIC); - diff --git a/src/MATLAB/GMM/ms_gmm/my_qu_ix_w1.m b/src/MATLAB/GMM/ms_gmm/my_qu_ix_w1.m deleted file mode 100644 index 91ddeb4..0000000 --- a/src/MATLAB/GMM/ms_gmm/my_qu_ix_w1.m +++ /dev/null @@ -1,15 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% auxiliary function - computing quality index for dynamic programming -% -% -function wyn=my_qu_ix_w1(invec,yinwec,PAR,PAR_sig_min) -invec=invec(:); -yinwec=yinwec(:); -if (invec(length(invec))-invec(1))<=PAR_sig_min || sum(yinwec)<=1.0e-3 - wyn=inf; -else - wwec=yinwec/(sum(yinwec)); - wyn1=(PAR+sqrt(sum(((invec-sum(invec.*wwec)).^2).*wwec)))/(max(invec)-min(invec)); - wyn=wyn1; -end \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/plot_gmm.m b/src/MATLAB/GMM/ms_gmm/plot_gmm.m deleted file mode 100644 index af6d866..0000000 --- a/src/MATLAB/GMM/ms_gmm/plot_gmm.m +++ /dev/null @@ -1,24 +0,0 @@ -% plot MS signal and its GMM model -function ok=plot_gmm(mz,y,ww_gmm,mu_gmm,sig_gmm) - -ploty=0*mz; -plot(mz,y,'k'); -hold on - -KS=length(ww_gmm); -for kks=1:KS - ixmz=find(abs((mz-mu_gmm(kks))/sig_gmm(kks))<4); - ploty(ixmz)=ploty(ixmz)+ww_gmm(kks)*normpdf(mz(ixmz),mu_gmm(kks),sig_gmm(kks)); - plot(mz(ixmz),ww_gmm(kks)*normpdf(mz(ixmz),mu_gmm(kks),sig_gmm(kks)),'g'); -end - - -% plot(mz,ploty,'r'); - -% grid on; - -xlabel('M/Z'); -ylabel('Intensity'); - -ok=1; - diff --git a/src/MATLAB/GMM/ms_gmm/plot_res_new_scale.m b/src/MATLAB/GMM/ms_gmm/plot_res_new_scale.m deleted file mode 100644 index 1435bf6..0000000 --- a/src/MATLAB/GMM/ms_gmm/plot_res_new_scale.m +++ /dev/null @@ -1,32 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 -% -% -% -% plot MS signal versus GMM model (used for for segments) -% -% -function ok=plot_res_new_scale(data,ygreki,wwoc,mivoc,sigvoc) - -xx=data; -y_a=ygreki; - -ploty=0*xx; - -KS=length(wwoc); - -for kks=1:KS - ploty=ploty+wwoc(kks)*normpdf(xx,mivoc(kks),sigvoc(kks)); -end - -plot(xx,y_a,'k',xx,ploty,'r'); - -hold on -for kks=1:KS - plot(xx,wwoc(kks)*normpdf(xx,mivoc(kks),sigvoc(kks)),'g'); -end - -grid on; - -xlabel('m/z'); - -ok=1; \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/post_proc1.m b/src/MATLAB/GMM/ms_gmm/post_proc1.m deleted file mode 100644 index b7750e3..0000000 --- a/src/MATLAB/GMM/ms_gmm/post_proc1.m +++ /dev/null @@ -1,84 +0,0 @@ -function [mdl_area,mdl_height,mdl_merge] = post_proc1(mz,y,mdl,opts) - -if opts.if_merge - %merging - mdl_merge = components_merging(mdl,opts.mz_thr,finv(1 - .05,2,2),0); - while 1 - tmp = length(mdl_merge.w); - mdl_merge = components_merging(mdl,opts.mz_thr,finv(1 - .05,2,2),0); - if length(mdl_merge.w) == tmp; break; end - end -% if opts.draw -% figure; plot_gmm(mz,y,mdl_merge.w,mdl_merge.mu,mdl_merge.sig); -% hold on; plot_gmm(mz,y,mdl.w,mdl.mu,mdl.sig); -% title([num2str(mdl_merge.KS) ' peaks left.' num2str(mdl.KS-mdl_merge.KS) ' merged.']) -% end - mdl = mdl_merge; -else - mdl_merge = mdl; -end - -if opts.if_rem - % feature reduction - area = zeros(1,mdl.KS); height = area; - for a=1:mdl.KS - y_tmp = mdl.w(a)*normpdf(mz,mdl.mu(a),mdl.sig(a)); - area(a) = trapz(mz,y_tmp); - height(a) = max(y_tmp); - end - - % figure; subplot(2,1,1); hist(area,sqrt(mdl.KS)); subplot(2,1,2); hist(log(area),sqrt(mdl.KS)); - % figure; subplot(2,1,1); hist(height,sqrt(mdl.KS)); subplot(2,1,2); hist(log(height),sqrt(mdl.KS)); - - % outlier detection - [~,~,idxl_a,idx_a] = Bruffaerts_out(area); - if sum(idx_a) > opts.thr - mdl_area = del_gmm(mdl,~idx_a); - thr_a = max(area(~idx_a)); - else - mdl_area = del_gmm(mdl,idxl_a); - thr_a = max(area(idxl_a)); - end - - [~,~,idxl_h,idx_h] = Bruffaerts_out(height); - if sum(idx_h) > opts.thr - mdl_height = del_gmm(mdl,~idx_h); - thr_h = max(height(~idx_h)); - else - mdl_height = del_gmm(mdl,idxl_h); - thr_h = max(height(idxl_h)); - end - - if opts.draw -% figure; subplot(2,1,1) -% if sum(idx_a) > opts.thr -% hist(area,sqrt(mdl.KS)); xlabel('Area under the peak'); ylabel('Counts') -% if ~isempty(thr_a); hold on; plot([thr_a,thr_a],ylim,'r'); end -% else -% hist(log(area),sqrt(mdl.KS)); xlabel('Log(Area under the peak)'); ylabel('Counts') -% if ~isempty(thr_a); hold on; plot([log(thr_a),log(thr_a)],ylim,'r'); end -% end -% title([num2str(mdl_area.KS) ' peaks left.' num2str(mdl.KS-mdl_area.KS) ' removed.']) -% subplot(2,1,2); plot_gmm(mz,y,mdl_area.w,mdl_area.mu,mdl_area.sig); - - figure; subplot(2,1,1) - if sum(idx_h) > Inf - hist(height,sqrt(mdl.KS)); xlabel('Peak height'); ylabel('Counts') - if ~isempty(thr_h); hold on; plot([thr_h,thr_h],ylim,'r'); end - else - hist(log(height),sqrt(mdl.KS)); xlabel('Log(Peak height)'); ylabel('Counts') - if ~isempty(thr_h); hold on; plot([log(thr_h),log(thr_h)],ylim,'r'); end - end - title([num2str(mdl_height.KS) ' peaks left.' num2str(mdl.KS-mdl_height.KS) ' removed.']) - subplot(2,1,2); plot_gmm(mz,y,mdl_height.w,mdl_height.mu,mdl_height.sig); - end -else - mdl_area = mdl; - mdl_height = mdl; -end - -function mdl = del_gmm(mdl,ind) -mdl.w = mdl.w(~ind); -mdl.mu = mdl.mu(~ind); -mdl.sig = mdl.sig(~ind); -mdl.KS = length(mdl.w); diff --git a/src/MATLAB/GMM/ms_gmm/post_proc_gmm.m b/src/MATLAB/GMM/ms_gmm/post_proc_gmm.m deleted file mode 100644 index 774e483..0000000 --- a/src/MATLAB/GMM/ms_gmm/post_proc_gmm.m +++ /dev/null @@ -1,326 +0,0 @@ -function [mdl_proc,ind_del] = post_proc_gmm(mz,y,mdl,name,opts) - -KS = mdl.KS; -switch name - case 'SNR' - x = mdl.mu./mdl.sig; - tail = 'left'; - case 'CV' - x = 100 * mdl.sig./mdl.mu; - name = 'CV perc'; - tail = 'right'; - case 'sig' - x = mdl.sig; - tail = 'right'; - case 'var' - x = mdl.sig.^2; - tail = 'right'; - case 'area' - x = zeros(KS,1); - for a=1:KS - y_tmp = mdl.w(a)*normpdf(mz,mdl.mu(a),mdl.sig(a)); - x(a) = trapz(mz,y_tmp); - end - tail = 'left'; - case 'height' - x = zeros(KS,1); - for a=1:KS - y_tmp = mdl.w(a)*normpdf(mz,mdl.mu(a),mdl.sig(a)); - x(a) = max(y_tmp); - end - tail = 'left'; -end - -if opts.draw; disp(['Model filtering using components ' name ]); end - -% if opts.draw -% figure; subplot(2,1,1); hist(x,sqrt(KS)); -% xlabel(name); ylabel('Counts') -% subplot(2,1,2); hist(log(x),sqrt(KS)); -% xlabel(['Log(' name ')']); ylabel('Counts') -% end - -if opts.log - x = log(x); - disp('Logarithmic transformation of signal.'); -end - -% outlier detection -if strcmp(opts.out,'Bruffaerts') - [~,~,idxl,idxr] = Bruffaerts_out(x); -elseif strcmp(opts.out,'Hubert') - [~,~,idxl,idxr] = Hubert_out(x,1.5,0); -elseif strcmp(opts.out,'Tukey') - [~,~,idxl,idxr] = Tukey_out(x,1.5); -else - idxl = false(KS,1); idxr = idxl; -end - -if strcmp(tail,'left') - x_out = x(~idxl); - if opts.draw; disp([num2str(sum(idxl)) ' left tail outliers removed.']); end -elseif strcmp(tail,'right') - x_out = x(~idxr); - if opts.draw; disp([num2str(sum(idxr)) ' right tail outliers removed.']); end -end - -% adaptive filtering -max_KS = 7; rep_no = 50; -thr = inf(1,max_KS*rep_no); bic = thr; -alpha = cell(1,max_KS); %K_noise = thr; -[KS,~,stats_vec] = par_vec(1:max_KS,1:rep_no); - -parfor a=1:stats_vec.iter - [thr(a),bic(a),stats_tmp] = gauss_rem(x_out, KS(a),0,0); -% K_noise(a) = stats_tmp.K_noise; - alpha{a} = stats_tmp.alpha; -end - -thr = reshape(thr,stats_vec.len_row,stats_vec.len_col); -bic = reshape(bic,stats_vec.len_row,stats_vec.len_col); -% K_noise = reshape(K_noise,stats_vec.len_row,stats_vec.len_col); -alpha = reshape(alpha,stats_vec.len_row,stats_vec.len_col); - -%find optimal no. of components -[~,cmp_nb] = min(nanmedian(bic,2)); -[~,ind] = sort(bic(cmp_nb,:)); - -%correct result by removing small alpha components models -[ind,cmp_nb] = rem_small_alpha(alpha,cmp_nb,ind,bic); -% cmp_nb_noise = K_noise(cmp_nb,ind); - -if opts.draw; disp([num2str(cmp_nb) ' components ' name ' model.']); end - -%estimating threshold including outlier detection and GMM filtering -thr_filt = thr(cmp_nb,ind); -if strcmp(tail,'left') - thr_filt = max(thr_filt,-Inf); - if thr_filt > quantile(x,.5); thr_filt = -Inf; end - if sum(idxl) >0; if max(x(idxl)) <= quantile(x,.5) - thr_filt = max(thr_filt,max(x(idxl))); end; end - ind_del = x <= thr_filt; -elseif strcmp(tail,'right') - thr_filt = min(thr_filt,Inf); - if thr_filt < quantile(x,.5); thr_filt = Inf; end - if sum(idxr) >0; if min(x(idxr)) > quantile(x,.5) - thr_filt = min(thr_filt,min(x(idxr))); end; end - ind_del = x >= thr_filt; -end -if opts.draw; disp([num2str(sum(ind_del)) ' components filtered.']); end - -%find filtered model -mdl_proc = del_gmm(mdl,ind_del); - -if opts.draw - figure; subplot(2,1,1) - hist(x,round(sqrt(length(x)))); - if opts.log; - xlabel(['Log(' name ')']); - title(['Log ' name ' filter. ' num2str(mdl_proc.KS) ' peaks left.' num2str(mdl.KS-mdl_proc.KS) ' removed.']) - else - xlabel(name); - title([name ' filter. ' num2str(mdl_proc.KS) ' peaks left.' num2str(mdl.KS-mdl_proc.KS) ' removed.']) - end - ylabel('Counts') - if ~isempty(thr); hold on; plot([thr_filt,thr_filt],ylim,'r'); end - - subplot(2,1,2); plot_gmm(mz,y,mdl_proc.w,mdl_proc.mu,mdl_proc.sig); -end - -function mdl = del_gmm(mdl,ind) -mdl.w = mdl.w(~ind); -mdl.mu = mdl.mu(~ind); -mdl.sig = mdl.sig(~ind); -mdl.KS = length(mdl.w); - -function [var1_out,var2_out,stats] = par_vec(var1,var2) -%[var1_out,var2_out] = par_vec(var1,var2) -%Function for vectorizing 2 variables to work in parfor loop - -stats = struct; -len_col = length(var2); -len_row = length(var1); -par_iter = len_row*len_col; -var1_out = nan(1,par_iter); -var2_out = var1_out; -for a=1:len_col - var2_out((a-1)*len_row + 1:a*len_row) = var2(a)*ones(1,len_row); - var1_out((a-1)*len_row + 1:a*len_row) = var1; -end - -stats.len_col = len_col; -stats.len_row = len_row; -stats.iter = par_iter; - -function [ind,cmp_nb] = rem_small_alpha(alpha,cmp_nb,ind,bic) -%removing small and thin components -alpha_tmp = alpha{cmp_nb,ind(1)}; -a=2; -while sum(alpha_tmp < 1e-3/cmp_nb) - a = a+1; - if a>10 - if cmp_nb > 1 - cmp_nb = cmp_nb - 1; - [~,ind] = sort(bic(cmp_nb,:)); - alpha_tmp = alpha{cmp_nb,ind(1)}; - a=2; - else - [~,ind] = sort(bic(cmp_nb,:)); - ind = ind(5); - break; - end - else - alpha_tmp = alpha{cmp_nb,ind(a)}; - end -end -ind = ind(a); - -function [thr,bic,stats] = gauss_rem(xvec, K,K_noise,draw) -%GAUSS_REM Estimating noise components using Gaussian mixture model -% Input: -% xvec - vector of data (1xn) -% K - number of Gaussian components -% K_noise - number of noise components from left -% draw - if draw results -% Output: -% thr - noise threshold -% bic - Bayesian Information Criterion for estimated model -% stats - additional statistics -% -% Author: Michal Marczyk -% Michal.Marczyk@polsl.pl - -if nargin < 3 - error('Insufficient number of arguments.') -end -if nargin < 4 - draw = 0; -end - -xvec = sort(xvec(:)); -it = 1; %no. of EM iterations -N = length(xvec); %nb of measurements -epsilon = 1e-5; %EM stop criterion threshold -bic = Inf; - -while bic == Inf || bic == 0 - - %initial conditions - alpha = rand(1,K); alpha = alpha/sum(alpha); - mi = min(xvec) + range(xvec)*rand(1,K); - mi = sort(mi); - temp = diff([min(xvec) mi max(xvec)]); - sigma = zeros(1,K); - for a=1:K - sigma(a) = max([temp(a) temp(a+1)]); - end - f = zeros(N,K); p = f; L_old = 1; - - %histogram of input data - [n,x] = hist(xvec,round(sqrt(N))); - for k=1:K; f(:,k) = alpha(k) * normpdf(xvec,mi(k),sigma(k)); end - f(isnan(f)) = 1e-45; f(f==0) = 1e-45; - - if draw - disp('Starting values') - disp(num2str([alpha; mi; sigma])) - end - - %main loop - while it < 1000 - px = sum(f,2); - L_new = sum(log(px),1); - - %stop criterion - if abs(L_new - L_old) < epsilon || isnan(L_new) || isinf(L_new) - break - end - for k=1:K - p(:,k) = f(:,k)./px; - sum_pk = sum(p(:,k),1); - alpha(k) = sum_pk/N; - mi(k) = sum(xvec.*p(:,k),1)/sum_pk; - sigma(k) = sqrt(sum(((xvec-mi(k)).^2) .* p(:,k),1)/sum_pk); - if sigma(k) <= 0; sigma = 1e-5; disp('Sigma too low. Too many components.');end - f(:,k) = alpha(k) * normpdf(xvec,mi(k),sigma(k)); - end - f(isnan(f)) = 1e-45; f(f==0) = 1e-45; - L_old = L_new; - it = it + 1; - end - - %calculating BIC - bic = -2*L_old + (3*K - 1)*log(N); - if bic == Inf || bic == 0 - disp('EM crash. Repeat calculations.') - it = 1; - else %finding threshold - [mi,ind] = sort(mi); alpha = alpha(ind); sigma = sigma(ind); - f_temp = zeros(1e5,k); x_temp = linspace(min(xvec),max(xvec),1e5)'; - step_x_temp = [min(diff(x_temp)); diff(x_temp)]; - for k=1:K; f_temp(:,k) = alpha(k)*normpdf(x_temp,mi(k),sigma(k)).*step_x_temp; end - temp2 = nan(1,K-1); - for a=1:K-1 - [~,ind]= max(f_temp(:,a:a+1),[],2); - temp = x_temp(diff(ind)==1); - if length(temp) > 1; temp = temp(temp>mi(a) & temp= K; - thr = max(xvec) + 1e-10; -elseif K_noise >= 0 && K_noise < K - if K == 1 - K_noise = 0; - elseif K == 2 - K_noise = 1; - elseif K_noise == 0 - temp3 = [alpha;mi;sigma]'; - idx = kmeans(temp3,2,'emptyaction','singleton','replicates',20); - K_noise = sum(idx == idx(1)); - end -elseif K_noise < 0; - thr = min(xvec) - 1e-10; -end - -if ~exist('thr','var') - if isempty(temp2) - thr = NaN; - else - thr = temp2(K_noise); - end -end - -stats.thr = thr; -stats.alpha = alpha; -stats.mu = mi; -stats.K_noise = K_noise; -stats.K = K; -stats.sigma = sigma; -stats.logL = L_old; - -%drawing results -if draw - disp('After EM:') - disp(num2str([alpha; mi; sigma])) - disp(['Iterations: ' num2str(it) ' Stop crit: ' num2str(abs(L_new - L_old))]) - - figure; hold on; box on - bar(x,n,[min(xvec) max(xvec)],'hist'); - plot(x_temp',mean(diff(x))*N*sum(f_temp./repmat(step_x_temp,1,K),2),'b.'); - colors(1:K_noise) = 'r'; colors(K_noise+1:K) = 'g'; - for a=1:K - plot(x_temp',mean(diff(x))*N*f_temp(:,a)./step_x_temp,colors(a)); - end - plot(temp2,zeros(1,K-1),'r.') - title('After EM') - lines = findobj(gca,'Type','Line'); - set(lines,'LineWidth',2) - set(get(gca,'Ylabel'),'FontSize',14) -end - -function y = normpdf(x,mu,sigma) -y = exp(-0.5 * ((x - mu)./sigma).^2) ./ (sqrt(2*pi) .* sigma); \ No newline at end of file diff --git a/src/MATLAB/GMM/ms_gmm/qua_scal.m b/src/MATLAB/GMM/ms_gmm/qua_scal.m deleted file mode 100644 index c143d44..0000000 --- a/src/MATLAB/GMM/ms_gmm/qua_scal.m +++ /dev/null @@ -1,15 +0,0 @@ -% compute quality indices and scale of gmm model of the segment -function [qua,scale]=qua_scal(data,ygreki,ppoc,mivoc,sigvoc) - -xx=data; -y_a=ygreki; -ploty=0*xx; - -KS=length(ppoc); - -for kks=1:KS - ploty=ploty+ppoc(kks)*normpdf(xx,mivoc(kks),sigvoc(kks)); -end - -scale=sum(y_a)/sum(ploty); -qua=sum(abs(y_a/scale-ploty))/sum(y_a/scale); \ No newline at end of file diff --git a/src/MATLAB/ROI_approximation/approximate_by_dice.m b/src/MATLAB/ROI_approximation/approximate_by_dice.m deleted file mode 100644 index a1117fd..0000000 --- a/src/MATLAB/ROI_approximation/approximate_by_dice.m +++ /dev/null @@ -1,61 +0,0 @@ -function [approximation, index, fig, all_approximations, best] = approximate_by_dice(labeling, is_ground_truth, make_plot) -%APPROXIMATE_BY_DICE Approximates ground truth by clusters maximizing Dice -% [approximation, dice, fig] = approximate_by_dice(labeling, is_ground_truth, make_plot) -% [approximation, dice, fig, all_approximations, best] = approximate_by_dice(labeling, is_ground_truth, make_plot) - - labeling = reshape(labeling, [], 1); - is_ground_truth = reshape(is_ground_truth, [], 1); - - if nargin < 3 - - make_plot = false; - - end - - labels = sort_labels_descending_by_index(labeling, is_ground_truth, @ppv); - - approximation = false(length(labeling), length(labels) + 1); - coverage = zeros(1, length(labels) + 1); - - for i=1:length(labels) - - approximation(:, i + 1) = approximation(:, i) | (labeling == labels(i)); - coverage(i + 1) = dice(approximation(:, i + 1), is_ground_truth); - - end - - best = coverage == max(coverage); - - if make_plot - - fig = invisible_figure(); - ax = axes(fig); - plot(ax, ... - (0:length(labels)) / length(labels), coverage, 'b.', ... - find(best) / length(labels), coverage(best), 'r.'); - - end - - all_approximations = approximation; - best = find(best, 1); - index = coverage(best); - approximation = approximation(:, best); - -end - -function labels = sort_labels_descending_by_index(labeling, is_ground_truth, index) - - labels = unique(labeling); - score = NaN(size(labels)); - - parfor i=1:length(labels) - - cluster_selector = (labeling == labels(i)); - score(i) = feval(index, cluster_selector, is_ground_truth); - - end - - [~, order] = sort(score, 'descend'); - labels = labels(order); - -end \ No newline at end of file diff --git a/src/MATLAB/ROI_approximation/approximate_region_binding.m b/src/MATLAB/ROI_approximation/approximate_region_binding.m deleted file mode 100644 index ddfcd04..0000000 --- a/src/MATLAB/ROI_approximation/approximate_region_binding.m +++ /dev/null @@ -1,31 +0,0 @@ -function similarity_classes = approximate_region_binding(name, data, partition, approximated_region, varargin) -%APPROXIMATE_REGION_BINDING Finds region approximation -% similarity_classes = APPROXIMATE_REGION(name, divik_source, ... -% divik_result, approximated_region_names) - approximating -% function with consistent interface -% Supported approximators: -% - exROI (exroi function) -% - greedy (approximate_by_dice function) -% ___ = APPROXIMATE_REGION( ___, Name, Value ) - passes optional -% parameters to the underlying extractor. - - assert(ischar(name)); - params = {data, partition, approximated_region, varargin{:}}; %#ok - empty optionals case - - if strcmp(name, 'exROI') - - similarity_classes = wrapped_exroi_binding(params{:}); - - elseif strcmp(name, 'greedy') - - similarity_classes = wrapped_dice_approximation_binding(params{:}); - - else - - msgID = 'classification:training_set_selection:approximate_region_binding'; - msg = ['Unknown approximator: ' name]; - throw(MException(msgID, msg)); - - end - -end \ No newline at end of file diff --git a/src/MATLAB/ROI_approximation/exroi.m b/src/MATLAB/ROI_approximation/exroi.m deleted file mode 100644 index a992339..0000000 --- a/src/MATLAB/ROI_approximation/exroi.m +++ /dev/null @@ -1,45 +0,0 @@ -function [similarity_classes, thresholds, distances] = exroi(representative, centroids, partition, varargin) -%EXROI estimates similarity classes w.r.t. a representative -% similarity_classes = EXROI(representative, centroids, partition) -% [similarity_classes, thresholds, distances] = EXROI(representative, centroids, partition) -% ___ = EXROI(___, Name, Value) - allows to specify optional parameters: -% - Metric - metric w.r.t. which similarity is considered. Supported all -% of the metrics from pdist. -% - GMM_ParameterName - parameters passed to GMM decomposition prefixed -% with 'GMM_'. For more details see fetch_thresholds. -% NOTICE: There is an assumption made, that centroids can be indexed by -% partition vector content - - options = apply_user_configs(@get_defaults, varargin); - distances = pdist2(representative, centroids, options.Metric)'; - thresholds = fetch_thresholds(distances, ... - 'MaxComponents', options.GMM_MaxComponents, ... - 'CachePath', options.GMM_CachePath, ... - 'Cache', options.GMM_Cache, ... - 'FigurePath', options.GMM_FigurePath, ... - 'Threshold', options.GMM_Threshold); - thresholds = thresholds{1}; - similarity_classes = zeros(size(partition)); - t_partition = transpose(partition); - for ii = 1:length(thresholds) - - worse_class_centroids = find(distances >= thresholds(ii)); - worse_class_observations = rows_in(t_partition, worse_class_centroids); - similarity_classes(worse_class_observations) = ii; - - end - similarity_classes = similarity_classes' + 1; - -end - -function options = get_defaults() - - options = struct(); - options.Metric = 'correlation'; - options.GMM_MaxComponents = 10; - options.GMM_CachePath = '.'; - options.GMM_Cache = false; - options.GMM_FigurePath = ''; - options.GMM_Threshold = 0; - -end \ No newline at end of file diff --git a/src/MATLAB/ROI_approximation/summarize_similarity_classes_binding.m b/src/MATLAB/ROI_approximation/summarize_similarity_classes_binding.m deleted file mode 100644 index f200bf1..0000000 --- a/src/MATLAB/ROI_approximation/summarize_similarity_classes_binding.m +++ /dev/null @@ -1,114 +0,0 @@ -function summarize_similarity_classes_binding(method, data, xy, partition, approximated_roi, compared_roi, ... - destination, varargin) - - xy = translate_min_to_zero(xy); - approximated_roi_indication = rows_in(xy, approximated_roi); - compared_roi_indication = rows_in(xy, compared_roi); - partition = reshape(partition, size(data,1), 1); - similarity_classes = approximate_region_binding(method, data, partition, approximated_roi_indication, varargin{:}); -% similarity_classes = 1 - (similarity_classes == min(similarity_classes)); - - build_filename = @(name) [destination filesep method filesep name]; - - dump_report(similarity_classes, compared_roi_indication, partition, build_filename('report.csv')); - dump_visualizations(similarity_classes, xy, compared_roi_indication, build_filename); - -end - -function confusions = get_confusion_matrices(similarity_classes, region) - - unique_classes = length(unique(similarity_classes)); - confusions = NaN(unique_classes, 4); - - for ii = 1:unique_classes - 1 - - prediction_positive = similarity_classes <= ii; - tp = sum(prediction_positive & region); - tn = sum(~prediction_positive & ~region); - fp = sum(prediction_positive & ~region); - fn = sum(~prediction_positive & region); - confusions(ii, :) = [tp, tn, fp, fn]; - - end - -end - -function coverages = get_pixel_coverages(similarity_classes) - - unique_classes = length(unique(similarity_classes)); - coverages = NaN(unique_classes, 2); - - for ii = 1:unique_classes - 1 - - prediction_positive = similarity_classes <= ii; - number_predicted = sum(prediction_positive); - coverages(ii, :) = [number_predicted, number_predicted / length(similarity_classes)]; - - end - -end - -function coverages = get_cluster_coverages(similarity_classes, partition) - - unique_similarity_classes = length(unique(similarity_classes)); - unique_classes = unique(partition); - coverages = NaN(unique_similarity_classes, 2); - - for ii = 1:unique_similarity_classes - 1 - - prediction_positive = rows_in(unique_classes, partition(similarity_classes <= ii)); - number_predicted = sum(prediction_positive); - coverages(ii, :) = [number_predicted, number_predicted / length(unique_classes)]; - - end - -end - -function report = get_report(similarity_classes, region, merged_partition) - - confusions = get_confusion_matrices(similarity_classes, region); - pixel_coverages = get_pixel_coverages(similarity_classes); - cluster_coverages = get_cluster_coverages(similarity_classes, merged_partition); - report = array2table([confusions, pixel_coverages, cluster_coverages], ... - 'VariableNames', ... - {'TP', 'TN', 'FP', 'FN', 'PixelsNumber', 'PixelsPercent', 'ClustersNumber', 'ClustersPercent'}); - -end - -function dump_report(similarity_classes, region, merged_partition, filename) - - report = get_report(similarity_classes, region, merged_partition); - mkfiledir(filename); - writetable(report, filename); - -end - -function visualizations = visualize_similarity_classes(similarity_classes, xy) - - unique_classes = length(unique(similarity_classes)); - visualizations = cell(unique_classes, 1); - - for ii = 1:unique_classes - 1 - -% visualizations{ii} = list2img(xy, to_visualizable(similarity_classes <= ii)); - visualizations{ii} = list2img(xy, 255 * uint8(similarity_classes <= ii), uint8(0)); - - end - -end - -function dump_visualizations(similarity_classes, xy, ~, build_filename) - - visualized = visualize_similarity_classes(similarity_classes, xy); - - for ii = 1:length(visualized) - - imwrite(visualized{ii}, build_filename(['similarity_level_' num2str(ii) '.png'])); -% blended = mark_roi(visualized{ii}, xy, region, 'blend'); -% imwrite(blended, build_filename(['similarity_level_' num2str(ii) '_blend.png'])); -% falsecolor = mark_roi(visualized{ii}, xy, region, 'falsecolor'); -% imwrite(falsecolor, build_filename(['similarity_level_' num2str(ii) '_falsecolor.png'])); - - end - -end diff --git a/src/MATLAB/ROI_approximation/wrapped_dice_approximation_binding.m b/src/MATLAB/ROI_approximation/wrapped_dice_approximation_binding.m deleted file mode 100644 index b5d7ea2..0000000 --- a/src/MATLAB/ROI_approximation/wrapped_dice_approximation_binding.m +++ /dev/null @@ -1,5 +0,0 @@ -function similarity_classes = wrapped_dice_approximation_binding(~, partition, approximated_region, varargin) - - similarity_classes = 2 - approximate_by_dice(partition, approximated_region, false); - -end \ No newline at end of file diff --git a/src/MATLAB/ROI_approximation/wrapped_exroi_binding.m b/src/MATLAB/ROI_approximation/wrapped_exroi_binding.m deleted file mode 100644 index 9becc5a..0000000 --- a/src/MATLAB/ROI_approximation/wrapped_exroi_binding.m +++ /dev/null @@ -1,27 +0,0 @@ -function similarity_classes = wrapped_exroi_binding(data, partition, approximated_region, varargin) - - representative = mean(data(approximated_region, :), 1); - [centroids, partition] = reorganize(data, partition); - similarity_classes = exroi(representative, centroids, partition, varargin{:}); - -end - -function [centroids, partition] = reorganize(data, filthy_partition) - - merged = filthy_partition; - partition = NaN(size(merged)); - class_names = unique(merged); - centroids = NaN(length(class_names), size(data, 2)); - - for ii = 1:length(class_names) - - selection = merged == class_names(ii); - assert(sum(selection) > 0); - partition(selection) = ii; - centroids(ii, :) = mean(data(selection, :), 1); - - end - - assert(all(all(~isnan(centroids)))); - -end \ No newline at end of file diff --git a/src/MATLAB/adaptive_PAFFT.m b/src/MATLAB/adaptive_PAFFT.m deleted file mode 100644 index 729559a..0000000 --- a/src/MATLAB/adaptive_PAFFT.m +++ /dev/null @@ -1,180 +0,0 @@ -function alignedSpectrum = adaptive_PAFFT(mz,spectra, reference, segSize_perc, shift_perc, show_bar) - -% FUNCTION: alignedSpectrum = adaptive_PAFFT(spectra, target, segmentSize, shift) -% -% Spectrum alignment of spectral data to a reference using fast fourier -% transform correlation theorem by segmentation alignment -% -% INPUT: -% mz - Common M/Z of spectra -% spectra - Spectra to be shift corrected (D x N - where -% D is the number of samples, and N is number of -% data points. -% reference - Reference spectra, (spectrum and target must be -% of the same length). -% segSize_perc - Minimum segement size allowed (in M/Z %).[0.7] -% shift_perc - Optional: limits maximum shift allowed for each -% segment (in M/Z %).[0.1] -% -% OUTPUT: -% alignedSpectrum - resulting aligned spectra -% -% Author of original algorithm: Jason W. H. Wong -% Date: 12 April 2005 -% License Agreement: The authors accept no responsibility for the use of -% this code for any purpose. Please acknowledge the author if your work -% uses this code. -% -% Author of modified version: Michal Marczyk (waitbar, % shift and segSize added) - - -%MAIN---------------------------------------------------------------------- -if length(reference)~=length(spectra) - error('Reference and spectra of unequal lengths.'); -elseif length(reference)== 1 - error('Reference spectrum cannot be of length 1.'); -end -if length(mz) == size(spectra,1) - spectra = spectra'; -end -if length(mz) == size(reference,1) - reference = reference'; -end -if nargin < 3 - error('Not enough input values.'); -end -if nargin<4 - segSize_perc = .7; - shift_perc = .1; - show_bar = 0; -end -if nargin<5 - shift_perc = .1; - show_bar = 0; -end -if nargin<6 - show_bar = 0; -end - -alignedSpectrum(1:size(spectra,1),1:size(spectra,2)) = 0; -all_pos = []; -if show_bar - w = waitbar(0,'PAFFT alignment','CloseRequestFcn','','WindowStyle','Modal');end -for i=1:size(spectra,1) - if show_bar - waitbar(i/size(spectra,1),w);end - startpos = 1; - aligned =[]; - while startpos <= length(spectra) - scale_seg = (segSize_perc * 0.01)/(mz(startpos+1)-mz(startpos)); - segSize = round(scale_seg*mz(startpos)); - endpos=startpos+(segSize*2); - if endpos >=length(spectra) - samseg = spectra(i,startpos:length(spectra)); - refseg = reference(1,startpos:length(spectra)); - else - samseg = spectra(i,startpos+segSize:endpos-1); - refseg = reference(1,startpos+segSize:endpos-1); - minpos = findMin(samseg,refseg); - endpos = startpos+minpos+segSize; - samseg = spectra(i,startpos:endpos); - refseg = reference(1,startpos:endpos); - end - all_pos = [all_pos,endpos]; - scale_shift = (shift_perc * 0.01)/(mz(startpos+1)-mz(startpos)); - shift = round(scale_shift*mz(startpos+round(length(samseg)/2))); - lag = FFTcorr(samseg,refseg,shift); - aligned = [aligned,move(samseg,lag)]; - startpos = endpos+1; - end - alignedSpectrum(i,:) = aligned; -end -alignedSpectrum = alignedSpectrum'; -if show_bar - delete(w); - all_pos(all_pos>length(mz)) = []; - figure; plot(mz,reference,mz(all_pos),reference(all_pos),'k.') - xlabel('M/Z'); ylabel('Intensity'); legend({'Signal','Segment cut'}) -end - - -% FFT cross-correlation---------------------------------------------------- -function lag = FFTcorr(spectrum, target, shift) -%padding -M=size(target,2); -diff = 1000000; -for i=1:20 - curdiff=((2^i)-M); - if (curdiff > 0 && curdiff maxi) - maxi = vals(1,i); - maxpos = i; - end - if (vals(1,length(vals)-i+1) > maxi) - maxi = vals(1,length(vals)-i+1); - maxpos = length(vals)-i+1; - end -end - -if maxi < 0.1 - lag =0; - return; -end -if maxpos > length(vals)/2 - lag = maxpos-length(vals)-1; -else - lag = maxpos-1; -end -%------------------------------------------------------------------- - -%shift segments---------------------------------------------------------- -function movedSeg = move(seg, lag) -% sideways movement -if lag == 0 || lag >= length(seg) - movedSeg = seg; - return -end - -if lag > 0 - ins = ones(1,lag)*seg(1); - movedSeg = [ins seg(1:(length(seg) - lag))]; -elseif lag < 0 - lag = abs(lag); - ins = ones(1,lag)*seg(length(seg)); - movedSeg = [seg((lag+1):length(seg)) ins]; -end - -%find position to divide segment-------------------------------------- -function minpos = findMin(samseg,refseg) - -[Cs Is]=sort(samseg,2); -[Cr Ir]=sort(refseg,2); -for i=1:round(length(Cs)/20) - for j=1:round(length(Cs)/20) - if Ir(j)==Is(i); - minpos = Is(i); - return; - end - end -end -minpos = Is(1,1); \ No newline at end of file diff --git a/src/MATLAB/algorithms.prj b/src/MATLAB/algorithms.prj deleted file mode 100644 index a3ace05..0000000 --- a/src/MATLAB/algorithms.prj +++ /dev/null @@ -1,189 +0,0 @@ - - - MsiAlgorithms - - - 1.0 - Grzegorz Mrukwa, Michal Marczyk - {Grzegorz.Mrukwa; Michal.Marczyk}@polsl.pl - Silesian University of Technology - Contains algorithms developed for MALDI MSI data processing. - This library provides implementation of methods published in following papers: -Marczyk M, Polanska J, Polanski A: Comparison of Algorithms for Profile-Based Alignment of Low Resolution MALDI-ToF Spectra. In Advances in Intelligent Systems and Computing, Vol. 242 of Man-Machine Interactions 3, Gruca A, Czachorski T, Kozielski S, editors. Springer Berlin Heidelberg 2014, p. 193-201 (ISBN: 978-3-319-02308-3), ICMMI 2013, 22-25.10.2013 Brenna, Poland -P. Widlak, G. Mrukwa, M. Kalinowska, M. Pietrowska, M. Chekan, J. Wierzgon, M. Gawin, G. Drazek and J. Polanska, "Detection of molecular signatures of oral squamous cell carcinoma and normal epithelium - application of a novel methodology for unsupervised segmentation of imaging mass spectrometry data," Proteomics, vol. 16, no. 11-12, pp. 1613-21, 2016. -M. Pietrowska, H. C. Diehl, G. Mrukwa, M. Kalinowska-Herok, M. Gawin, M. Chekan, J. Elm, G. Drazek, A. Krawczyk, D. Lange, H. E. Meyer, J. Polanska, C. Henkel, P. Widlak, "Molecular profiles of thyroid cancer subtypes: Classification based on features of tissue revealed by mass spectrometry imaging," Biochimica et Biophysica Acta (BBA)-Proteins and Proteomics, 2016 -G. Mrukwa, G. Drazek, M. Pietrowska, P. Widlak and J. Polanska, "A Novel Divisive iK-Means Algorithm with Region-Driven Feature Selection as a Tool for Automated Detection of Tumour Heterogeneity in MALDI IMS Experiments," in International Conference on Bioinformatics and Biomedical Engineering, 2016. -A. Polanski, M. Marczyk, M. Pietrowska, P. Widlak and J. Polanska, "Signal partitioning algorithm for highly efficient Gaussian mixture modeling in mass spectrometry," PloS one, vol. 10, no. 7, p. e0134256, 2015. - - - \Silesian University of Technology\MsiAlgorithms\ - option.installpath.programfiles - - - - ${PROJECT_ROOT}\algorithms-compile\for_testing - ${PROJECT_ROOT}\algorithms-compile\for_redistribution_files_only - ${PROJECT_ROOT}\algorithms-compile\for_redistribution - ${PROJECT_ROOT}\algorithms-compile - false - - true - subtarget.net.component - - MatlabAlgorithms - - false - true - false - MyAppInstaller_web - algorithms_matlab_runtime - MyAppInstaller_app - true - false - - false - option.net.framework.default - false - - false - false - - - MatlabAlgorithms - Preprocessing - - C:\WINDOWS\System32 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${PROJECT_ROOT}\apply_gmm.m - ${PROJECT_ROOT}\dependencies\fetch_thresholds.m - ${PROJECT_ROOT}\DiviK\libs\divik_library\divik.m - ${PROJECT_ROOT}\estimate_gmm.m - ${PROJECT_ROOT}\load_divik_result.m - ${PROJECT_ROOT}\merge_gmm_model_components.m - ${PROJECT_ROOT}\pafft.m - ${PROJECT_ROOT}\reduce_gmm_by_component_area.m - ${PROJECT_ROOT}\reduce_gmm_by_component_height.m - ${PROJECT_ROOT}\remove_baseline.m - ${PROJECT_ROOT}\ROI_approximation\summarize_similarity_classes_binding.m - ${PROJECT_ROOT}\ticnorm.m - - - - - ${PROJECT_ROOT}\pafft.m - ${PROJECT_ROOT}\remove_baseline.m - ${PROJECT_ROOT}\ticnorm.m - - - ${PROJECT_ROOT}\apply_gmm.m - ${PROJECT_ROOT}\estimate_gmm.m - ${PROJECT_ROOT}\merge_gmm_model_components.m - ${PROJECT_ROOT}\reduce_gmm_by_component_area.m - ${PROJECT_ROOT}\reduce_gmm_by_component_height.m - - - ${PROJECT_ROOT}\DiviK\libs\divik_library\divik.m - ${PROJECT_ROOT}\load_divik_result.m - - - ${PROJECT_ROOT}\dependencies\fetch_thresholds.m - - - ${PROJECT_ROOT}\ROI_approximation\summarize_similarity_classes_binding.m - - - - - ${PROJECT_ROOT}\dependencies - ${PROJECT_ROOT}\DiviK\libs - ${PROJECT_ROOT}\GMM - ${PROJECT_ROOT}\load_divik_result.m - ${PROJECT_ROOT}\ROI_approximation - - - ${PROJECT_ROOT}\adaptive_PAFFT.m - - - - ${PROJECT_ROOT}\algorithms-compile\for_testing\MatlabAlgorithmsNative.dll - ${PROJECT_ROOT}\algorithms-compile\for_testing\MatlabAlgorithms_overview.html - ${PROJECT_ROOT}\algorithms-compile\for_testing\MatlabAlgorithms.dll - ${PROJECT_ROOT}\algorithms-compile\for_testing\readme.txt - - - - C:\Program Files\MATLAB\R2016b - - - - - - - - true - - - - - true - - - - - true - - - - - false - false - true - false - false - false - false - false - 6.2 - false - true - win64 - true - - - \ No newline at end of file diff --git a/src/MATLAB/apply_gmm.m b/src/MATLAB/apply_gmm.m deleted file mode 100644 index b478410..0000000 --- a/src/MATLAB/apply_gmm.m +++ /dev/null @@ -1,21 +0,0 @@ -function modelled = apply_gmm(mdl, data, mz) -%APPLY_GMM applies estimated model on data -% modelled = APPLY_GMM(mdl, data) returns data modelled using estimated -% GMM parameters. - - w=mdl.w; - mu=mdl.mu; - sig=mdl.sig; - KS=mdl.KS; - - modelled=zeros(size(data,1), KS); - for kks=1:KS - y_gmm = w(kks)*normpdf(mz, mu(kks), sig(kks)); - parfor i=1:size(data,1) - y=data(i,:); - wid=y.*y_gmm; - modelled(i, kks)=sum(wid); - end - end - -end diff --git a/src/MATLAB/dependencies/DataHash.m b/src/MATLAB/dependencies/DataHash.m deleted file mode 100644 index ce7b709..0000000 --- a/src/MATLAB/dependencies/DataHash.m +++ /dev/null @@ -1,484 +0,0 @@ -function Hash = DataHash(Data, Opt) -% DATAHASH - Checksum for Matlab array of any type -% This function creates a hash value for an input of any type. The type and -% dimensions of the input are considered as default, such that UINT8([0,0]) and -% UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed -% recursively. -% -% Hash = DataHash(Data, Opt) -% INPUT: -% Data: Array of these built-in types: -% (U)INT8/16/32/64, SINGLE, DOUBLE, (real or complex) -% CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested), -% function_handle. -% Opt: Struct to specify the hashing algorithm and the output format. -% Opt and all its fields are optional. -% Opt.Method: String, known methods for Java 1.6 (Matlab 2009a): -% 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'. -% Known methods for Java 1.3 (Matlab 6.5): -% 'MD5', 'SHA-1'. -% Default: 'MD5'. -% Opt.Format: String specifying the output format: -% 'hex', 'HEX': Lower/uppercase hexadecimal string. -% 'double', 'uint8': Numerical vector. -% 'base64': Base64 encoded string, only printable ASCII -% characters, shorter than 'hex', no padding. -% Default: 'hex'. -% Opt.Input: Type of the input as string, not case-sensitive: -% 'array': The contents, type and size of the input [Data] are -% considered for the creation of the hash. Nested CELLs -% and STRUCT arrays are parsed recursively. Empty arrays of -% different type reply different hashs. -% 'file': [Data] is treated as file name and the hash is calculated -% for the files contents. -% 'bin': [Data] is a numerical, LOGICAL or CHAR array. Only the -% binary contents of the array is considered, such that -% e.g. empty arrays of different type reply the same hash. -% Default: 'array'. -% -% OUTPUT: -% Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing -% method. -% -% EXAMPLES: -% % Default: MD5, hex: -% DataHash([]) % 7de5637fd217d0e44e0082f4d79b3e73 -% % MD5, Base64: -% Opt.Format = 'base64'; -% Opt.Method = 'MD5'; -% DataHash(int32(1:10), Opt) % bKdecqzUpOrL4oxzk+cfyg -% % SHA-1, Base64: -% S.a = uint8([]); -% S.b = {{1:10}, struct('q', uint64(415))}; -% Opt.Method = 'SHA-1'; -% DataHash(S, Opt) % ZMe4eUAp0G9TDrvSW0/Qc0gQ9/A -% % SHA-1 of binary values: -% Opt.Method = 'SHA-1'; -% Opt.Input = 'bin'; -% DataHash(1:8, Opt) % 826cf9d3a5d74bbe415e97d4cecf03f445f69225 -% -% NOTES: -% Function handles and user-defined objects cannot be converted uniquely: -% - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS, -% but the replied struct can depend on the Matlab version. -% - It is tried to convert objects to UINT8 streams in the subfunction -% ConvertObject. A conversion by STRUCT() might be more appropriate. -% Adjust these subfunctions on demand. -% -% MATLAB CHARs have 16 bits! In consequence the string 'hello' is treated as -% UINT16('hello') for the binary input method. Use this to get the hash of an -% ASCII-string (Result as defined in RFC 1321!): -% Opt.Method = 'MD5'; Opt.Input = 'bin'; -% DataHash(uint8('abc'), Opt); % '900150983cd24fb0d6963f7d28e17f72' -% -% DataHash uses James Tursa's smart and fast TYPECASTX, if it is installed: -% http://www.mathworks.com/matlabcentral/fileexchange/17476 -% As fallback the built-in TYPECAST is used automatically, but for large -% inputs this can be more than 100 times slower. -% -% Tested: Matlab 7.7, 7.8, 7.13, WinXP/32, Win7/64 -% Author: Jan Simon, Heidelberg, (C) 2011-2015 matlab.THISYEAR(a)nMINUSsimon.de -% -% See also: TYPECAST, CAST. -% FEX: -% Michael Kleder, "Compute Hash", no structs and cells: -% http://www.mathworks.com/matlabcentral/fileexchange/8944 -% Tim, "Serialize/Deserialize", converts structs and cells to a byte stream: -% http://www.mathworks.com/matlabcentral/fileexchange/29457 -% Jan Simon, "CalcMD5", MD5 only, much faster C-mex: -% http://www.mathworks.com/matlabcentral/fileexchange/25921 - -% $JRev: R-v V:022 Sum:68JCxGh2/Q0N Date:30-Mar-2015 01:35:37 $ -% $License: BSD (use/copy/change/redistribute on own risk, mention the author) $ -% $File: Tools\GLFile\DataHash.m $ -% History: -% 001: 01-May-2011 21:52, First version. -% 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered. -% 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data. -% 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also. -% Thanks to David (author 243360), who found this bug. -% Jan Achterhold (author 267816) suggested to consider Java objects. -% 016: 01-Feb-2015 20:53, Java heap space exhausted for large files. -% Now files are process in chunks to save memory. -% 017: 15-Feb-2015 19:40, Collsions: Same hash for different data. -% Examples: zeros(1,1) and zeros(1,1,0) -% complex(0) and zeros(1,1,0,0) -% Now the number of dimensions is included, to avoid this. -% 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX. -% Ross found these 2 bugs, which occur when TYPECASTX is not installed. -% If you need the base64 format padded with '=' characters, adjust -% fBase64_enc as you like. - -% OPEN BUGS: -% Nath wrote: -% function handle refering to struct containing the function will create -% infinite loop. Is there any workaround ? -% Example: -% d= dynamicprops(); -% addprop(d,'f'); -% d.f= @(varargin) struct2cell(d); -% DataHash(d.f) % infinite loop - -% Main function: =============================================================== -% typecastx creates a shared data copy instead of the deep copy as Matlab's -% TYPECAST - for a [1000x1000] DOUBLE array this is 100 times faster! -persistent usetypecastx -if isempty(usetypecastx) - % Java is needed: - if ~usejava('jvm') - Error_L('NoJava', 'Java is required.'); - end - - usetypecastx = ~isempty(which('typecastx')); % Run the slow WHICH once only -end - -% Default options: ------------------------------------------------------------- -Method = 'MD5'; -OutFormat = 'hex'; -isFile = false; -isBin = false; - -% Check number and type of inputs: --------------------------------------------- -nArg = nargin; -if nArg == 2 - if isa(Opt, 'struct') == 0 % Bad type of 2nd input: - Error_L('BadInput2', '2nd input [Opt] must be a struct.'); - end - - % Specify hash algorithm: - if isfield(Opt, 'Method') - Method = upper(Opt.Method); - end - - % Specify output format: - if isfield(Opt, 'Format') - OutFormat = Opt.Format; - end - - % Check if the Input type is specified - default: 'array': - if isfield(Opt, 'Input') - if strcmpi(Opt.Input, 'File') - isFile = true; - if ischar(Data) == 0 - Error_L('CannotOpen', '1st input FileName must be a string'); - end - - elseif strncmpi(Opt.Input, 'bin', 3) % Accept 'binary' also - isBin = true; - if (isnumeric(Data) || ischar(Data) || islogical(Data)) == 0 || ... - issparse(Data) - Error_L('BadDataType', ... - '1st input must be numeric, CHAR or LOGICAL for binary input.'); - end - end - end - -elseif nArg ~= 1 % Bad number of arguments: - Error_L('BadNInput', '1 or 2 inputs required.'); -end - -% Create the engine: ----------------------------------------------------------- -try - Engine = java.security.MessageDigest.getInstance(Method); -catch - Error_L('BadInput2', 'Invalid algorithm: [%s].', Method); -end - -% Create the hash value: ------------------------------------------------------- -if isFile - % Check existence of file: - Found = FileExist(Data); - if ~Found - Error_L('FileNotFound', 'File not found: %s.', Data); - end - - % Open the file: - FID = fopen(Data, 'r'); - if FID < 0 - Error_L('CannotOpen', 'Cannot open file: %s.', Data); - end - - % Read file in chunks to save memory and Java heap space: - Chunk = 1e6; - [Data, Count] = fread(FID, Chunk, '*uint8'); - Engine.update(Data); - while Count == Chunk - [Data, Count] = fread(FID, Chunk, '*uint8'); - Engine.update(Data); - end - fclose(FID); - - % Calculate the hash: - if usetypecastx - Hash = typecastx(Engine.digest, 'uint8'); - else - Hash = typecast(Engine.digest, 'uint8'); - end - -elseif isBin % Contents of an elementary array, type tested already: - if isempty(Data) % Nothing to do, Engine.update fails for empty input! - if usetypecastx % Bugfix: Consider missing typecastx - Hash = typecastx(Engine.digest, 'uint8'); - else - Hash = typecast(Engine.digest, 'uint8'); - end - - elseif usetypecastx % Faster typecastx: - if isreal(Data) - Engine.update(typecastx(Data(:), 'uint8')); - else - Engine.update(typecastx(real(Data(:)), 'uint8')); - Engine.update(typecastx(imag(Data(:)), 'uint8')); - end - Hash = typecastx(Engine.digest, 'uint8'); - - else % Matlab's TYPECAST is less elegant: - if isnumeric(Data) - if isreal(Data) - Engine.update(typecast(Data(:), 'uint8')); - else - Engine.update(typecast(real(Data(:)), 'uint8')); - Engine.update(typecast(imag(Data(:)), 'uint8')); - end - elseif islogical(Data) % TYPECAST cannot handle LOGICAL - Engine.update(typecast(uint8(Data(:)), 'uint8')); - elseif ischar(Data) % TYPECAST cannot handle CHAR - Engine.update(typecast(uint16(Data(:)), 'uint8')); - % Bugfix: Line removed - end - Hash = typecast(Engine.digest, 'uint8'); - end - -elseif usetypecastx % Faster typecastx: - Engine = CoreHash_(Data, Engine); - Hash = typecastx(Engine.digest, 'uint8'); - -else % Slower built-in TYPECAST: - Engine = CoreHash(Data, Engine); - Hash = typecast(Engine.digest, 'uint8'); -end - -% Convert hash specific output format: ----------------------------------------- -switch OutFormat - case 'hex' - Hash = sprintf('%.2x', double(Hash)); - case 'HEX' - Hash = sprintf('%.2X', double(Hash)); - case 'double' - Hash = double(reshape(Hash, 1, [])); - case 'uint8' - Hash = reshape(Hash, 1, []); - case 'base64' - Hash = fBase64_enc(double(Hash)); - otherwise - Error_L('BadOutFormat', ... - '[Opt.Format] must be: HEX, hex, uint8, double, base64.'); -end - -% return; - -% ****************************************************************************** -function Engine = CoreHash_(Data, Engine) -% This method uses the faster typecastx version. - -% Consider the type and dimensions of the array to distinguish arrays with the -% same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2], -% DOUBLE(0) and SINGLE([0,0]): -% < v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash! -% >= v016: [class, ndim, size, data] -Engine.update([uint8(class(Data)), ... - typecastx([ndims(Data), size(Data)], 'uint8')]); - -% Special treatment for sparse arrays - store the indices at first and the -% values afterwards: -if issparse(Data) - % Replace Data by vector of non-zero elements: - [i1, i2, Data] = find(Data); - Engine.update(typecastx(i1, 'uint8')); - Engine.update(typecastx(i2, 'uint8')); -end - -if isstruct(Data) % Hash for all array elements and fields: - F = sort(fieldnames(Data)); % Ignore order of fields - Engine = CoreHash_(F, Engine); % Consider the fieldnames - - for iS = 1:numel(Data) % Loop over elements of struct array - for iField = 1:length(F) % Loop over fields - Engine = CoreHash_(Data(iS).(F{iField}), Engine); - end - end - -elseif iscell(Data) % Get hash for all cell elements: - for iS = 1:numel(Data) - Engine = CoreHash_(Data{iS}, Engine); - end - -elseif isnumeric(Data) || islogical(Data) || ischar(Data) - if isempty(Data) == 0 - if isreal(Data) % TRUE for LOGICAL and CHAR also: - Engine.update(typecastx(Data(:), 'uint8')); - else % Complex input: - Engine.update(typecastx(real(Data(:)), 'uint8')); - Engine.update(typecastx(imag(Data(:)), 'uint8')); - end - end - -elseif isa(Data, 'function_handle') - Engine = CoreHash_(ConvertFuncHandle(Data), Engine); - -elseif (isobject(Data) || isjava(Data)) && ismethod(Data, 'hashCode') - Engine = CoreHash_(Data.hashCode, Engine); - -else % Most likely this is a user-defined object: - try - Engine = CoreHash_(ConvertObject(Data), Engine); - catch - warning(['JSimon:', mfilename, ':BadDataType'], ... - ['Type of variable not considered: ', class(Data)]); - end -end - -% return; - -% ****************************************************************************** -function Engine = CoreHash(Data, Engine) -% This methods uses the slower TYPECAST of Matlab -% See CoreHash_ for comments. - -Engine.update([uint8(class(Data)), ... - typecast([ndims(Data), size(Data)], 'uint8')]); - -if isstruct(Data) % Hash for all array elements and fields: - F = sort(fieldnames(Data)); % Ignore order of fields - Engine = CoreHash(F, Engine); % Catch the fieldnames - for iS = 1:numel(Data) % Loop over elements of struct array - for iField = 1:length(F) % Loop over fields - Engine = CoreHash(Data(iS).(F{iField}), Engine); - end - end -elseif iscell(Data) % Get hash for all cell elements: - for iS = 1:numel(Data) - Engine = CoreHash(Data{iS}, Engine); - end -elseif isempty(Data) -elseif isnumeric(Data) - if isreal(Data) - % G. Mrukwa: Prevents the problem with too big size. - casting_tmp = typecast(Data(:), 'uint8'); - max_val = (real(java.lang.Integer.MAX_VALUE)+1)/2; - if length(casting_tmp)>=max_val-1 - cut = 1; - while cut < length(casting_tmp) - if cut+max_val-2 part of the toolbox functions -% is replaced such that the hash for @mean does not depend on the Matlab -% version. -% Drawbacks: Anonymous functions, nested functions... -% funcStruct = functions(FuncH); -% funcfile = strrep(funcStruct.file, matlabroot, ''); -% FuncKey = uint8([funcStruct.function, ' ', funcfile]); - -% Finally I'm afraid there is no unique method to get a hash for a function -% handle. Please adjust this conversion to your needs. - -% return; - -% ****************************************************************************** -function DataBin = ConvertObject(DataObj) -% Convert a user-defined object to a binary stream. There cannot be a unique -% solution, so this part is left for the user... - -% Perhaps a direct conversion is implemented: -DataBin = uint8(DataObj); - -% Or perhaps this is better: -% DataBin = struct(DataObj); - -% return; - -% ****************************************************************************** -function Out = fBase64_enc(In) -% Encode numeric vector of UINT8 values to base64 string. -% The intention of this is to create a shorter hash than the HEX format. -% Therefore a padding with '=' characters is omitted on purpose. - -Pool = [65:90, 97:122, 48:57, 43, 47]; % [0:9, a:z, A:Z, +, /] -v8 = [128; 64; 32; 16; 8; 4; 2; 1]; -v6 = [32, 16, 8, 4, 2, 1]; - -In = reshape(In, 1, []); -X = rem(floor(In(ones(8, 1), :) ./ v8(:, ones(length(In), 1))), 2); -Y = reshape([X(:); zeros(6 - rem(numel(X), 6), 1)], 6, []); -Out = char(Pool(1 + v6 * Y)); - -% return; - -% ****************************************************************************** -function Ex = FileExist(FileName) -% A more reliable version of EXIST(FileName, 'file'): -dirFile = dir(FileName); -if length(dirFile) == 1 - Ex = ~(dirFile.isdir); -else - Ex = false; -end - -% return; - -% ****************************************************************************** -function Error_L(ID, varargin) - -error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ... - mfilename, varargin{2:nargin - 1}); - -% return; diff --git a/src/MATLAB/dependencies/any_dist.m b/src/MATLAB/dependencies/any_dist.m deleted file mode 100644 index 277b7a0..0000000 --- a/src/MATLAB/dependencies/any_dist.m +++ /dev/null @@ -1,42 +0,0 @@ -function dist = any_dist( metric_name, v1, v2 ) -%ANY_DIST Calculates distance -% dist = ANY_DIST( metric_name, v1, v2 ) -% Calculates distance between two specified vectors using specified -% metric. - - if strcmp(metric_name,'pearson') - dist = 1-corr(v1,v2,'type','Pearson'); - elseif strcmp(metric_name,'pearson_squared') - dist = 1-power(corr(v1,v2,'type','Pearson'),2); - elseif strcmp(metric_name,'spearman') - dist = 1-corr(v1,v2,'type','Spearman'); - elseif strcmp(metric_name,'canberra') - dist_f = @(v1,v2) sum(abs(v1-v2)./(abs(v1)+abs(v2)),1); - dist = array_result(dist_f,v1,v2); - elseif strcmp(metric_name,'euclidean') - dist = pdist2(v1',v2',metric_name); - return; - dist_f = @(v1,v2) sqrt(sum(power(v1-v2,2),1)); - dist = array_result(dist_f,v1,v2); - else - dist = squareform(pdist([v1 v2]',metric_name)); - [~,nv1]=size(v1); - dist = dist(1:nv1,(nv1+1):end); - end - -end - -function dist = array_result(fun,v1,v2) -%ARRAY_RESULT Applies specified distance metric to deal with arrays. - - [~,nv1] = size(v1); - [~,nv2] = size(v2); - dist = NaN(nv1,nv2); - for i = 1:nv1 - v = v1(:,i); - parfor j=1:nv2 - dist(i,j) = fun(v,v2(:,j)); %#ok - end - end - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/apply_user_configs.m b/src/MATLAB/dependencies/apply_user_configs.m deleted file mode 100644 index 133f580..0000000 --- a/src/MATLAB/dependencies/apply_user_configs.m +++ /dev/null @@ -1,20 +0,0 @@ -function settings = apply_user_configs(settings_gen,configs) -%APPLY_USER_CONFIGS Applies all of the features specified by the user. -% settings = APPLY_USER_CONFIGS(settings_gen,configs) - - nvararg = length(configs); - settings = settings_gen(); - - for i = 1:nvararg - if mod(i,2)==1 - property_name = configs{i}; - else - if isfield(settings,property_name) - settings.(property_name) = configs{i}; - else - error(['Unknown option ' property_name '.']); - end - end - end - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/cacher.m b/src/MATLAB/dependencies/cacher.m deleted file mode 100644 index 1a0db0c..0000000 --- a/src/MATLAB/dependencies/cacher.m +++ /dev/null @@ -1,160 +0,0 @@ -function varargout = cacher( fun, fun_in, varargin ) -%CACHER For specified function handle and parameters caches result -% Simple result caching function. -% output = CACHER( fun, params ) - returns the same result as function -% fun for the arguments specified in params cell, but caches the result -% to avoid unnecessary calculations overheating -% output = CACHER( ___, 'Name', 'Value') allows to specify additional -% options: -% - 'CachePath' - specifies folder, where cache has to be established -% (default current working directory) -% - 'DropOnHashError' - specifies whether error in hashing stops the -% execution and the exception is thrown or if the execution continues -% with a warning (default false - warning is thrown). -% - 'OutputsNumber' - in the case of function with unknown number of -% output arguments, this option is mandatory. -% - 'Enabled' - allows to disable caching without making significant -% changes to code. (default true) -% -% Please note, that whole function output will be cached (what matters -% especially in case of big data, where enormous storages can be occupied -% on hard disk by this cacher). To avoid that, one can wrap one function -% into another, returning much smaller data (e.g., using lambda functions -% or any other way). -% -% Author: Grzegorz Mrukwa - - %%APPLY SETTINGS - settings = apply_user_configs(@get_defaults,varargin); - - if nargout(fun) == -1 && settings.OutputsNumber == -1 - error(['Number of outputs must be specified in the case of ', ... - 'function with unknown number of output arguments.']); - elseif nargout(fun) == -1 - nout = settings.OutputsNumber; - else - nout = nargout(fun); - end - - if ~settings.Enabled - [varargout{1:nout}] = fun(fun_in{:}); - return; - end - - try - fun_hash = DataHash(fun); - fun_in_hash = DataHash(fun_in); - catch hash_error - if settings.DropOnHashError - rethrow(hash_error); - else - warning([sprintf('Hash calculation failed. Skipping partial result check and save. Error report:\n') getReport(hash_error)]); - try - %%CALCULATE NONCACHED RESULT - [varargout{1:nout}] = fun(fun_in{:}); - return; - catch e - rethrow(e); - end - end - end - - %%CHECK CACHE - [was_cached, varargout] = check_result(fun_hash,fun_in_hash,settings); - if was_cached - return; - end - - %%CALCULATE NONCACHED RESULT - dname = diary_fname(fun_hash,fun_in_hash,settings); - logged = capture_diary(fun,dname,'OutputsNumber',nout); - [varargout{1:nout}] = logged(fun_in{:}); - %%SAVE NONCACHED RESULT - save_result(fun_hash,fun_in_hash,varargout,settings); -end - -function settings = get_defaults() - settings.CachePath = '.'; - settings.DropOnHashError = false; - settings.OutputsNumber = -1; - settings.Enabled = true; -% settings.OneFile = true; -% settings.OneFile = false; -end - -% function dictionary_cache = get_cache(settings) -% persistent cache; -% if isempty(cache) -% cache = containers.Map(); -% if exist([settings.CachePath '\cache.mat'],'file')==2 -% load([settings.CachePath '\cache.mat']); -% end -% end -% dictionary_cache = cache; -% end - -function [was_cached, result] = check_result(fun_hash,fun_in_hash,settings) - slash = get_dir_char(); -% if ~settings.OneFile - path = [settings.CachePath slash 'cache' slash fun_hash slash fun_in_hash '.mat']; - if exist(path,'file')~=2 - was_cached = false; - result = {}; - return; - end - load(path); - path = [settings.CachePath slash 'cache' slash fun_hash slash fun_in_hash '.txt']; - if exist(path,'file')==2 - type(path); - else - warning('Report missing.'); - end - was_cached = true; -% else -% dictionary = get_cache(settings); -% if ~isKey(dictionary,fun_hash) || ~isKey(dictionary(fun_hash),fun_in_hash) -% was_cached = false; -% result = {}; -% return; -% end -% results = dictionary(fun_hash); -% result = results(fun_in_hash); -% was_cached = true; -% end -end - -function save_result(fun_hash,fun_in_hash,result,settings) %#ok - slash = get_dir_char(); -% if ~settings.OneFile - path = [settings.CachePath slash 'cache' slash fun_hash]; - if exist(path,'dir')~=7 - mkdir(path); - end - save([path slash fun_in_hash '.mat'],'result'); -% else -% cache = get_cache(settings); -% if ~isKey(cache,fun_hash) -% cache(fun_hash) = containers.Map(); -% end -% results = cache(fun_hash); -% results(fun_in_hash) = result; %#ok -% save([settings.CachePath slash 'cache.mat'],'cache'); -% end -end - -function fname = diary_fname(fun_hash,fun_in_hash,settings) - slash = get_dir_char(); - fname = [settings.CachePath slash 'cache' slash fun_hash slash ... - fun_in_hash '.txt']; -end - -function dir_char = get_dir_char() - - os = getenv('OS'); - if(strcmp(os,'Windows_NT')) - dir_char = '\'; - else - dir_char = '/'; - end - -end diff --git a/src/MATLAB/dependencies/capture_diary.m b/src/MATLAB/dependencies/capture_diary.m deleted file mode 100644 index a4d16bd..0000000 --- a/src/MATLAB/dependencies/capture_diary.m +++ /dev/null @@ -1,35 +0,0 @@ -function logged = capture_diary(f,fname,varargin) -%CAPTURE_DIARY Makes function capturing diary to file. -% f = CAPTURE_DIARY(f,fname) returns another function which saves -% its results to a file during running. -% f = CAPTURE_DIARY( ___, 'Name', 'Value') allows to specify -% additional Name-Value pairs of arguments: -% - 'OutputsNumber' - in the case of function with unknown number of -% output arguments, this option is mandatory. - - settings = apply_user_configs(@defaults,varargin); - - if nargout(f) == -1 && settings.OutputsNumber == -1 - error(['Number of outputs must be specified in the case of ', ... - 'function with unknown number of output arguments.']); - elseif nargout(f) == -1 - nout = settings.OutputsNumber; - else - nout = nargout(f); - end - - function varargout = f_logged(varargin) - mkfiledir(fname); - diary(fname); - [varargout{1:nout}] = f(varargin{:}); - diary('off'); - end - - logged = @f_logged; - -end - -function settings = defaults() - settings = struct(); - settings.OutputsNumber = -1; -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/cell_settings.m b/src/MATLAB/dependencies/cell_settings.m deleted file mode 100644 index ac1f15e..0000000 --- a/src/MATLAB/dependencies/cell_settings.m +++ /dev/null @@ -1,8 +0,0 @@ -function cell_settings = copy_settings(settings) - fields = fieldnames(settings); - cell_settings = cell(1,2*numel(fields)); - for i=1:numel(fields) - cell_settings{2*i-1} = fields{i}; - cell_settings{2*i} = settings.(fields{i}); - end -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/dice.m b/src/MATLAB/dependencies/dice.m deleted file mode 100644 index d64d8b6..0000000 --- a/src/MATLAB/dependencies/dice.m +++ /dev/null @@ -1,8 +0,0 @@ -function index = dice(region, ground_truth) -%DICE Dice's index of region similarity -% index = DICE(region, ground_truth) - region & ground_truth are bool -% masks representing estimate and ground truth regions. - - index = 2 * sum(region & ground_truth) / (sum(region) + sum(ground_truth)); - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/fetch_centroids_from_partition.m b/src/MATLAB/dependencies/fetch_centroids_from_partition.m deleted file mode 100644 index 73e8664..0000000 --- a/src/MATLAB/dependencies/fetch_centroids_from_partition.m +++ /dev/null @@ -1,17 +0,0 @@ -function [centroids, identification] = fetch_centroids_from_partition( data, partition ) -%FETCH_CENTROIDS_FROM_PARTITION Calculates cluster centroids -% [centroids, identification] = FETCH_CENTROIDS_FROM_PARTITION( data, partition ) - - %n = max(partition); - %m = min(partition); - centroids = []; - identification = []; - - for i=unique(partition) - %if sum(partition==i)>0 - centroids = [centroids mean(data(:,partition==i),2)]; - identification = [identification i]; - %end - end - -end diff --git a/src/MATLAB/dependencies/fetch_thresholds.m b/src/MATLAB/dependencies/fetch_thresholds.m deleted file mode 100644 index f6258a3..0000000 --- a/src/MATLAB/dependencies/fetch_thresholds.m +++ /dev/null @@ -1,150 +0,0 @@ -function [ thresholds ] = fetch_thresholds( params, varargin ) -%FETCH_THRESHOLDS Decomposes parameters into Gaussian peaks and finds -%crossings -% thresholds = FETCH_THRESHOLDS(params) - returns cell containing all -% thresholds for every column of parameters. If no crossings are present, -% no threshold will be returned for specific parameter. Parameter params -% should be column vector with values of parameter investigates or matrix -% consisting of such column vectors. For matrix m x n a cell 1 x n will -% be returned where every field contains all available thresholds as -% column vector. -% Additional parameters: -% - 'MaxComponents' - (default 10) -% - 'CachePath' - (default '.') -% - 'Cache' - default false - no cache is created. -% - 'FigurePath' - decomposition plot is saved (default empty - no figure -% is plotted) -% - 'Threshold' - returns only thresholds associated with components of -% higher amplitude (default 0) - - settings = apply_user_configs(varargin); - - nvarargs = length(varargin); - - %parameter in column with values in the rows - [n,m] = size(params); - - max_components_number = settings.MaxComponents; - - thresholds = cell(1,m); - - %for every parameter considered - for i=1:m - - a = cell(1,max_components_number); - mu = cell(1,max_components_number); - sig = cell(1,max_components_number); - TIC = cell(1,max_components_number); - BIC = Inf*ones(1,max_components_number); - param = params(:,i); - parfor compnum=1:max_components_number -% for compnum=1:max_components_number - try - [a{compnum},mu{compnum},sig{compnum},TIC{compnum},l_lik]=cacher(@gaussian_mixture_simple,{param,ones(n,1),compnum},'CachePath',settings.CachePath,'Enabled',settings.Cache); - BIC(compnum) = -2*l_lik+(3*compnum-1)*log(n); - catch - warning(['Failed to decompose ' num2str(i) '. parameter into ' num2str(compnum) ' components.']); - end - end - - best_compnum = find(BIC == min(BIC),1); - best_mu = mu{best_compnum}; - best_sig = sig{best_compnum}; - best_a = a{best_compnum}; - %FIX - TIC = TIC{best_compnum}; - - f = gmm_uborder_fun(best_mu, best_sig, best_a); - - if ~isempty(settings.FigurePath) - - summing = gmm_sum_fun(best_mu,best_sig,best_a); - fig = figure; - fig.CurrentAxes = axes('Position' , [0.1 0.1 0.8 0.8]); - ax = fig.CurrentAxes; - set(fig,'Visible','off'); - hold on - hist(ax,param,floor(sqrt(length(param)))); - [nn,xx] = hist(ax,param,floor(sqrt(length(param)))); - TIC = (xx(2)-xx(1))*n; - xvec=linspace(min(param),max(param),1000); - for jj=1:best_compnum, - h(1)=plot(ax,xvec,TIC*best_a(jj)*normpdf(xvec,best_mu(jj),best_sig(jj)),'g'); - set(h(1),'Linewidth',2) - end - yvec = summing(xvec)*TIC; - h(2)=plot(ax,xvec,yvec,'r'); - set(h(2),'Linewidth',3) - xlabel(ax,['Parameter ' num2str(i)]) - legend(ax,h,{'GMM components';'Final model'}) - title(ax,['Gaussian Mixture Model, KS = ',num2str(best_compnum)]); - hold off - if m>1 - mkfiledir([settings.FigurePath '_param_' num2str(i)]); - saveas(fig,[settings.FigurePath '_param_' num2str(i)]); - else - mkfiledir(settings.FigurePath); - saveas(fig,settings.FigurePath); - end - close gcf - - - end - - %if there are few components - if best_compnum>1 - %they are picked pairwise - for j=1:(best_compnum-1) - for k=(j+1):best_compnum - - weight_condition = TIC*best_a(j)*normpdf(best_mu(j),best_mu(j),best_sig(j)) > settings.Threshold && ... - TIC*best_a(k)*normpdf(best_mu(k),best_mu(k),best_sig(k)) > settings.Threshold; - - %their upper bound - g = gmm_uborder_fun(best_mu([j,k]), best_sig([j,k]), best_a([j,k])); - %the disjunction condition & weight condition is checked - if g(best_mu(j))==best_a(j)*normpdf(best_mu(j),best_mu(j),best_sig(j)) && ... - g(best_mu(k))==best_a(k)*normpdf(best_mu(k),best_mu(k),best_sig(k)) && ... - weight_condition - %their crossing is found - crossing = fminbnd(g,min(best_mu([j,k])),max((best_mu([j,k])))); - %and it is checked to be on the upper border - if f(crossing)==g(crossing) - %if it truly is, then it is saved - thresholds{i} = [thresholds{i}; crossing]; - end - end - end - end - thresholds{i} = sort(thresholds{i},'ascend'); - end - - end -end - -function settings = set_defaults() - settings.MaxComponents = 10; - settings.Cache = false; - settings.CachePath = '.'; - settings.FigurePath = ''; - settings.Threshold = 0; -end - - -function settings = apply_user_configs(configs) -%APPLY_USER_CONFIGS Applies all of the features specified by the user. -% settings = APPLY_USER_CONFIGS(configs) - - settings = set_defaults(); - - nvararg = length(configs); - - for i = 1:nvararg - if mod(i,2)==1 - property_name = configs{i}; - else - settings.(property_name) = configs{i}; - end - end - -end diff --git a/src/MATLAB/dependencies/filthy_merge.m b/src/MATLAB/dependencies/filthy_merge.m deleted file mode 100644 index 5263553..0000000 --- a/src/MATLAB/dependencies/filthy_merge.m +++ /dev/null @@ -1,24 +0,0 @@ -function [ merged ] = filthy_merge( primary, secondary, replaced ) -%FILTHY_MERGE Merges the vectors containing data partition and -%subpartition of one part leaving empty clusters. -% merged = FILTHY_MERGE(primary,secondary,repl) - returns the vector -% containing cluster specified by repl from vector primary split -% according to vector secondary. - - if isempty(secondary) - merged = primary; - return; - end - - merged = primary; - - %min_primary = min(primary); - max_primary = max(primary); - min_secondary = min(secondary); - %max_secondary = max(secondary); - - secondary = secondary + max_primary - min_secondary + 1; - - merged(primary==replaced) = secondary;% + replaced - 1; - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/gmm_segmentation.m b/src/MATLAB/dependencies/gmm_segmentation.m deleted file mode 100644 index 9eec940..0000000 --- a/src/MATLAB/dependencies/gmm_segmentation.m +++ /dev/null @@ -1,67 +0,0 @@ -function [partition, thresholds] = gmm_segmentation( data, varargin ) -%GMM_SEGMENTATION Performs segmentation basing on GMM decomposition -%thresholds -% partition = GMM_SEGMENTATION( data ) - features should be in rows, -% observations in columns. For mxn data matrix returns mxn partition -% matrix with partition of objects with respect to every feature (if any -% thresholds may be obtained). -% partition = GMM_SEGMENTATION( data, out_dir ) - saves all decomposition -% plots into specified directory -% [partition, thresholds] = GMM_SEGMENTATION( ___ ) - returns also -% obtained thresholds - - [m,~] = size(data); - if length(varargin)==1 - out_dir = varargin{1}; - if exist(out_dir,'dir')~=7 - mkdir(out_dir); - end -% [m,n] = size(data); -% parfor i=1:m -% hist(data(i,:),sqrt(n)); -% saveas(gcf,[out_dir '\hist_' num2str(i)],'png'); -% saveas(gcf,[out_dir '\hist_' num2str(i)],'fig'); -% close; -% hist(tukey_outlier_detection(data(i,:)),sqrt(n)); -% saveas(gcf,[out_dir '\hist_tukey_' num2str(i)],'png'); -% saveas(gcf,[out_dir '\hist_tukey_' num2str(i)],'fig'); -% close; -% end - - %%outlier detection disabled - %thresholds = fetch_thresholds(data',0,[out_dir '\score']); - %%outlier detection enabled - %thresholds = arrayfun(@(i) fetch_thresholds(tukey_outlier_detection(data(i,:))',0,[out_dir '\score_param_' num2str(i)]),1:m,'UniformOutput',false); - thresholds = cell(m,1); - for i = 1:m - outlier_removed_data = cacher(@huberta_outlier_detection,{data(i,:)},'CachePath',[out_dir '\score_param_' num2str(i)]); - thresholds{i} = cell2mat(fetch_thresholds(outlier_removed_data',0,[out_dir '\score_param_' num2str(i)])); - i - end - else - %%outlier detection disabled - %thresholds = fetch_thresholds(data'); - %%outlier detection enabled - %thresholds = arrayfun(@(i) fetch_thresholds(tukey_outlier_detection(data(i,:))'),1:m,'UniformOutput',false); - thresholds = cell(m,1); - for i = 1:m - thresholds{i} = cell2mat(fetch_thresholds(huberta_outlier_detection(data(i,:))')); - i - end - end - - partition = ones(size(data)); - - parfor i = 1:length(thresholds) - t = thresholds{i}; - if ~isempty(t) - row = partition(i,:)*length(t); - for j=1:length(t) - row(data(i,:)>=t(j)) = length(t) - (j-1); - end - partition(i,:) = row; - end - end - -end - diff --git a/src/MATLAB/dependencies/gmm_sum_fun.m b/src/MATLAB/dependencies/gmm_sum_fun.m deleted file mode 100644 index 6cf71e2..0000000 --- a/src/MATLAB/dependencies/gmm_sum_fun.m +++ /dev/null @@ -1,22 +0,0 @@ -function [ f ] = gmm_sum_fun( mu, sig, amp ) -%GMM_UBORDER_FUN Finds the function which represents GMM. -% f = GMM_SUM_FUN(mu,sig,amp) - returns a handle to function (f) -% which calculates value of sum operating on values of -% distributions specified by its mean (mu), standard deviation (sig) and -% amplitude (amp). Parameters: mu, sig, amp can be at max two dimensional. - - if sum(size(mu)==size(sig)) 3 || nargin == 0 - error('Wrong number of arguments provided'); - end - - xy = translate_min_to_zero(xy) + 1; - xy = invert_y(xy); - - img = []; - - for i = 1:size(vals, 2) - tmp_img = default(i) * uint8(ones(max(xy)))'; - idx = sub2ind(size(tmp_img), xy(:,2), xy(:,1)); - tmp_img(idx) = vals(:,i); - img = cat(3, img, tmp_img); - end - -end - -function xy = invert_y(xy) - - biggest = max(xy(:,2)); - xy(:,2) = 1 + biggest - xy(:,2); - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/mkfiledir.m b/src/MATLAB/dependencies/mkfiledir.m deleted file mode 100644 index fff88cd..0000000 --- a/src/MATLAB/dependencies/mkfiledir.m +++ /dev/null @@ -1,13 +0,0 @@ -function res = mkfiledir(fname) -%MKFILEDIR Makes directory for a filename specified, if non existing. -% res = MKFILEDIR(fname) - - last = find(fname=='\' | fname=='/',1,'last'); - dir_fname = fname(1:last-1); - if exist(dir_fname,'dir')~=7 - res = mkdir(dir_fname); - else - res = 1; - end - -end diff --git a/src/MATLAB/dependencies/partition_plotter.m b/src/MATLAB/dependencies/partition_plotter.m deleted file mode 100644 index 92dd977..0000000 --- a/src/MATLAB/dependencies/partition_plotter.m +++ /dev/null @@ -1,48 +0,0 @@ -function partition_plotter( xy, partition, index, out_path, filename, title_on, varargin ) -%PARTITION_PLOTTER Plots the partition using xy positions -% PARTITION_PLOTTER( xy, partition, index, out_path, filename, title_on ) -% PARTITION_PLOTTER( xy, partition, index, out_path, filename, title_on, coloring_enabled ) -% PARTITION_PLOTTER( xy, partition, index, out_path, filename, title_on, colors ) - - if exist(out_path,'dir')~=7 - mkdir(out_path); - end - - %plotting initialization - [~,objects_number] = size(xy); - left = min(xy(1,:)); - right = max(xy(1,:)); - top = max(xy(2,:)); - down = min(xy(2,:)); - moved_xy = [ xy(1,:)-left+1; xy(2,:)-down+1]; - width = max(moved_xy(1,:))+1; - height = max(moved_xy(2,:))+1; - - %mono plotting - A = ones(height,width); - part_num = max(partition); - for j=1:objects_number - A(moved_xy(2,j),moved_xy(1,j)) = 1-partition(j)/part_num; - end - if title_on - filename = [filename '__k_' num2str(part_num) '__dunn_' num2str(index) '_']; - end - imwrite(resize(A,5),[out_path '\' filename '_mono.png']); - - %color plotting - if ~isempty(varargin) && islogical(varargin{1}) && varargin{1} - %% COLORING DISABLED - C = color_mono_drawing([out_path '\' filename '_mono.png']); - imwrite(C,[out_path '\' filename '_color.png']) - %% COLORING DISABLED END - elseif ~isempty(varargin) && isfloat(varargin{1}) - C = cat(3, A, A, A); - colors = varargin{1}; - for j=1:objects_number - C(moved_xy(2,j),moved_xy(1,j),:) = colors(partition(j),:); - end - imwrite(C,[out_path '\' filename '_color.png']) - end - -end - diff --git a/src/MATLAB/dependencies/ppv.m b/src/MATLAB/dependencies/ppv.m deleted file mode 100644 index fc7cd48..0000000 --- a/src/MATLAB/dependencies/ppv.m +++ /dev/null @@ -1,7 +0,0 @@ -function index = ppv(region, ground_truth) -%PPV Positive predictive value -% index = PPV(region, ground_truth) - same API as dice - - index = sum(region & ground_truth) / sum(region); - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/resize.m b/src/MATLAB/dependencies/resize.m deleted file mode 100644 index 335dd37..0000000 --- a/src/MATLAB/dependencies/resize.m +++ /dev/null @@ -1,40 +0,0 @@ -function A = resize( a, multiplier ) -%RESIZE Scales matrix copying objects. -% A = RESIZE(a,multiplier) returns matrix A which consists of square -% block matrices of size multiplier and values the same for matrix on -% i,j-th position in A as for value on i,j-th position in a. - - if length(size(a))==3 - [h,w,z] = size(a); - A = NaN*zeros(h*multiplier,w*multiplier,z); - - if isa(a,'uint8') - for i=1:h - for j=1:w - for k=1:z - A(multiplier*(i-1)+1:multiplier*i,multiplier*(j-1)+1:multiplier*j,k) = uint8(ones(multiplier))*a(i,j,k); - end - end - end - else - for i=1:h - for j=1:w - for k=1:z - A(multiplier*(i-1)+1:multiplier*i,multiplier*(j-1)+1:multiplier*j,k) = ones(multiplier)*a(i,j,k); - end - end - end - end - else - A = NaN*zeros(size(a)*multiplier); - - [h,w] = size(a); - for i=1:h - for j=1:w - A(multiplier*(i-1)+1:multiplier*i,multiplier*(j-1)+1:multiplier*j) = ones(multiplier)*a(i,j); - end - end - end - -end - diff --git a/src/MATLAB/dependencies/rows_in.m b/src/MATLAB/dependencies/rows_in.m deleted file mode 100644 index 8720c7f..0000000 --- a/src/MATLAB/dependencies/rows_in.m +++ /dev/null @@ -1,7 +0,0 @@ -function found = rows_in(to_find, reference) -%ROWS_IN checks which rows are present in control matrix -% found = ROWS_IN(to_find, reference) - - found = ismember(to_find, reference, 'rows'); - -end \ No newline at end of file diff --git a/src/MATLAB/dependencies/translate_min_to_zero.m b/src/MATLAB/dependencies/translate_min_to_zero.m deleted file mode 100644 index 70ac567..0000000 --- a/src/MATLAB/dependencies/translate_min_to_zero.m +++ /dev/null @@ -1,8 +0,0 @@ -function normalized = translate_min_to_zero(data) -%TRANSLATE_MIN_TO_ZERO subtracts min of columns -% normalized = TRANSLATE_MIN_TO_ZERO(data)s - - mins = min(data,[],1); - normalized = data - repmat(mins, size(data, 1), 1); - -end \ No newline at end of file diff --git a/src/MATLAB/estimate_gmm.m b/src/MATLAB/estimate_gmm.m deleted file mode 100644 index 5488a53..0000000 --- a/src/MATLAB/estimate_gmm.m +++ /dev/null @@ -1,19 +0,0 @@ -function mdl = estimate_gmm(mz, data) -%ESTIMATE_GMM Estimates GMM model for given data -% mdl = ESTIMATE_GMM(mz, data, mz_thr) - estimates basic GMM model for dataset - - % mean spectrum calculation - meanspec=mean(data,1); - - % GMM - opts.base = 0; %if baseline correction - opts.draw = 0; %if draw results - opts.mz_thr = NaN; %M/Z threshold for merging - opts.if_merge = 0; %if merge components - opts.if_rem = 0; %if remove additional components - - mdl = ms_gmm_run1(mz,meanspec,opts); - - mdl.meanspec = meanspec; - -end diff --git a/src/MATLAB/load_divik_result.m b/src/MATLAB/load_divik_result.m deleted file mode 100644 index 3f9d1a1..0000000 --- a/src/MATLAB/load_divik_result.m +++ /dev/null @@ -1,8 +0,0 @@ -function divik_result = load_divik_result(path) -%LOAD_DIVIK_RESULT Loads result tree of DiviK algorithm -% divik_result = LOAD_DIVIK_RESULT(path) - - m = matfile(path); - divik_result = m.res_struct; - -end \ No newline at end of file diff --git a/src/MATLAB/macierz_splot.m b/src/MATLAB/macierz_splot.m deleted file mode 100644 index b4f3201..0000000 --- a/src/MATLAB/macierz_splot.m +++ /dev/null @@ -1,19 +0,0 @@ -w=mdl.w; -mu=mdl.mu; -sig=mdl.sig; -KS=mdl.KS; - -s=zeros(KS,size(wid_norm,2)); -parfor i=1:size(wid_norm,2); - y=wid_norm(:,i); - for kks=1:KS - y_gmm=w(kks)*normpdf(mz,mu(kks),sig(kks)); - wid=y.*y_gmm; - s(kks,i)=sum(wid); - end -end - - -data=s; -data=data'; -save('peptydy_data.mat','data','-v7.3'); diff --git a/src/MATLAB/merge_gmm_model_components.m b/src/MATLAB/merge_gmm_model_components.m deleted file mode 100644 index ae4d1eb..0000000 --- a/src/MATLAB/merge_gmm_model_components.m +++ /dev/null @@ -1,10 +0,0 @@ -function merged = merge_gmm_model_components(mdl, original_mz, mean_spectrum, mz_thr) -%MERGE_GMM_MODEL_COMPONENTS -% reduced = MERGE_GMM_MODEL_COMPONENTS(mdl, original_mz, mean_spectrum, mz_thr) - - opts.if_merge = 1; - opts.if_rem = 0; - opts.mz_thr = mz_thr; - [~,~,merged] = post_proc1(original_mz, mean_spectrum, mdl, opts) - -end diff --git a/src/MATLAB/pafft.m b/src/MATLAB/pafft.m deleted file mode 100644 index 6d39381..0000000 --- a/src/MATLAB/pafft.m +++ /dev/null @@ -1,10 +0,0 @@ -function aligned = pafft(mz, data) -%PAFFT Wraps adaptive_PAFFT in easy manner. -% aligned = PAFFT(mz, data) returns a matrix with aligned spectra. M/z -% are in columns, observations are in rows, the same as in matrix input -% data. - - reference = nanmean(data,2); - aligned = adaptive_PAFFT(mz,data', reference, 0.7, 0.1)'; - -end \ No newline at end of file diff --git a/src/MATLAB/preprocessing_wstepny.m b/src/MATLAB/preprocessing_wstepny.m deleted file mode 100644 index 1892f80..0000000 --- a/src/MATLAB/preprocessing_wstepny.m +++ /dev/null @@ -1,64 +0,0 @@ -% jesli potrzebne to ustalenie wspólnego zakresu danych ( do -% peptydów-lipidów tego nie potrzebowałam ale do innych chyba robione to było -% funkcją matlabową msresample ) - -% wczytanie danych mz-wartosci mz, mydata- macierz z widmami -jpegFiles = dir('*.txt'); -numfiles = length(jpegFiles); -dat = importdata(jpegFiles(1).name); -mz=dat(:,1,:); -clearvars dat -i=1; -k=1; -mydata=zeros(size(mz,1),numfiles); -for k = 1:numfiles - my(:,:,:) = importdata(jpegFiles(k).name); - mydata(:,i)=my(:,2,:); - i=1+i; - k - my=[]; -end - -% usuniecie linii bazowej -for i=1:numfiles -y_base(i,:) =msbackadj(mz,mydata(:,i)); -end -y_base(y_base<0)=0; - -% detekcja wartosci odstających ale nie wiem jak dokładnie to było -% wczesniej - - -%uliniawianie -reference=nanmean(y_base,1); -alignedSpectrum= adaptive_PAFFT(mz,y_base, reference, 0.7, 0.1); - - -%normalizacja do średniej z TIC -widma=alignedSpectrum; -%1 i 2 -for i=1:size(widma,2); -Tic1(i)=sum(widma(:,i)); -Tic2(i)=Tic1(i)/size(widma,1); -end -%3 -Tic3=nanmean(Tic2); -%4 -Tic4=Tic2/Tic3; -%5 -wid_norm=[]; -for i=1:size(widma,2); -wid_norm(:,i)=widma(:,i)*1/Tic4(i); -end - -% wyliczenie widma średniego do GMM -meanspec=mean(wid_norm,2); - -% GMM -opts.base = 0; %if baseline correction -opts.draw = 1; %if draw results -opts.mz_thr = 0.3; %M/Z threshold for merging -opts.if_merge = 0; %if merge components -opts.if_rem = 0; %if remove additional components - -mdl = ms_gmm_run(mz,meanspec,opts); diff --git a/src/MATLAB/reduce_gmm_by_component_area.m b/src/MATLAB/reduce_gmm_by_component_area.m deleted file mode 100644 index 7dd331f..0000000 --- a/src/MATLAB/reduce_gmm_by_component_area.m +++ /dev/null @@ -1,9 +0,0 @@ -function reduced = reduce_gmm_by_component_area(mdl, original_mz, mean_spectrum) -%REDUCE_GMM_BY_COMPONENT_AREA -% reduced = REDUCE_GMM_BY_COMPONENT_AREA(mdl, original_mz, mean_spectrum) - - opts.if_rem = 1; - opts.if_merge = 0; - [reduced,~,~] = post_proc1(original_mz, mean_spectrum, mdl, opts) - -end diff --git a/src/MATLAB/reduce_gmm_by_component_height.m b/src/MATLAB/reduce_gmm_by_component_height.m deleted file mode 100644 index a0acb81..0000000 --- a/src/MATLAB/reduce_gmm_by_component_height.m +++ /dev/null @@ -1,9 +0,0 @@ -function reduced = reduce_gmm_by_component_height(mdl, original_mz, mean_spectrum) -%REDUCE_GMM_BY_COMPONENT_HEIGHT -% reduced = REDUCE_GMM_BY_COMPONENT_HEIGHT(mdl, original_mz, mean_spectrum) - - opts.if_rem = 1; - opts.if_merge = 0; - [~,reduced,~] = post_proc1(original_mz, mean_spectrum, mdl, opts) - -end diff --git a/src/MATLAB/remove_baseline.m b/src/MATLAB/remove_baseline.m deleted file mode 100644 index d28990c..0000000 --- a/src/MATLAB/remove_baseline.m +++ /dev/null @@ -1,14 +0,0 @@ -function grounded = remove_baseline(mz, data) -%REMOVE_BASELINE removes baseline from the data -% grounded = REMOVE_BASELINE(mz, data) returns data set where baseline is -% removed, with subsequent m/z in columns and observations in rows. -% Observations order from data matrix is preserved. Matrix data should -% have m/z in columns and observations in rows as well. - - grounded = NaN(size(data)); - for spec_no = 1:size(data, 1) - grounded(spec_no, :) = msbackadj(mz, data(spec_no, :)); - end - grounded(grounded < 0) = 0; - -end \ No newline at end of file diff --git a/src/MATLAB/ticnorm.m b/src/MATLAB/ticnorm.m deleted file mode 100644 index 2eca0fd..0000000 --- a/src/MATLAB/ticnorm.m +++ /dev/null @@ -1,26 +0,0 @@ -function normalized = ticnorm(data) -%TICNORM performs TIC normalization on dataset -% normalized = TICNORM(data) returns TIC normalized dataset. Columns -% correspond to m/z, rows to observations, same as in matrix data. - - spect = data'; - - tic1 = NaN(1,size(spect,2)); - tic2 = NaN(1,size(spect,2)); - %1 i 2 - for i=1:size(spect,2) - tic1(i) = sum(spect(:,i)); - tic2(i) = tic1(i)/size(spect,1); - end - %3 - tic3 = nanmean(tic2); - %4 - tic4 = tic2/tic3; - %5 - normalized = NaN(size(spect)); - for i=1:size(spect,2) - normalized(:,i) = spect(:,i)*1/tic4(i); - end - normalized = normalized'; - -end \ No newline at end of file From a37d62cf9d11b5e136010214f858d90aad905ffd Mon Sep 17 00:00:00 2001 From: Grzegorz Mrukwa Date: Thu, 4 Jan 2018 01:16:34 +0100 Subject: [PATCH 2/4] Remove unnecessary property sheets Missed them last time. --- src/PropertySheets/ManagedCppProject.props | 24 --------------- .../ManagedCppTestProject.props | 24 --------------- src/PropertySheets/NativeProject.props | 24 --------------- src/PropertySheets/NativeTestProject.props | 25 ---------------- src/PropertySheets/OpenCvDependencies.props | 30 ------------------- 5 files changed, 127 deletions(-) delete mode 100644 src/PropertySheets/ManagedCppProject.props delete mode 100644 src/PropertySheets/ManagedCppTestProject.props delete mode 100644 src/PropertySheets/NativeProject.props delete mode 100644 src/PropertySheets/NativeTestProject.props delete mode 100644 src/PropertySheets/OpenCvDependencies.props diff --git a/src/PropertySheets/ManagedCppProject.props b/src/PropertySheets/ManagedCppProject.props deleted file mode 100644 index 3a036ca..0000000 --- a/src/PropertySheets/ManagedCppProject.props +++ /dev/null @@ -1,24 +0,0 @@ - - - - - 8.1 - Unicode - v140 - - - - Level4 - - - - - true - /std:c++latest %(AdditionalOptions) - $(SolutionDir);%(AdditionalIncludeDirectories) - - - $(TargetDir);%(AdditionalLibraryDirectories) - - - \ No newline at end of file diff --git a/src/PropertySheets/ManagedCppTestProject.props b/src/PropertySheets/ManagedCppTestProject.props deleted file mode 100644 index 9ce6bf1..0000000 --- a/src/PropertySheets/ManagedCppTestProject.props +++ /dev/null @@ -1,24 +0,0 @@ - - - - - 8.1 - Unicode - v140 - - - - Level4 - - - - - true - /std:c++latest %(AdditionalOptions) - $(SolutionDir);%(AdditionalIncludeDirectories) - - - $(TargetDir);%(AdditionalLibraryDirectories) - - - \ No newline at end of file diff --git a/src/PropertySheets/NativeProject.props b/src/PropertySheets/NativeProject.props deleted file mode 100644 index 3a036ca..0000000 --- a/src/PropertySheets/NativeProject.props +++ /dev/null @@ -1,24 +0,0 @@ - - - - - 8.1 - Unicode - v140 - - - - Level4 - - - - - true - /std:c++latest %(AdditionalOptions) - $(SolutionDir);%(AdditionalIncludeDirectories) - - - $(TargetDir);%(AdditionalLibraryDirectories) - - - \ No newline at end of file diff --git a/src/PropertySheets/NativeTestProject.props b/src/PropertySheets/NativeTestProject.props deleted file mode 100644 index 3c8bda0..0000000 --- a/src/PropertySheets/NativeTestProject.props +++ /dev/null @@ -1,25 +0,0 @@ - - - - - 8.1 - Unicode - v140 - - - - Level4 - - - - - true - /std:c++latest %(AdditionalOptions) - $(SolutionDir);%(AdditionalIncludeDirectories) - GTEST_LANG_CXX11=1;_UNICODE;UNICODE;%(PreprocessorDefinitions) - - - $(TargetDir);%(AdditionalLibraryDirectories) - - - \ No newline at end of file diff --git a/src/PropertySheets/OpenCvDependencies.props b/src/PropertySheets/OpenCvDependencies.props deleted file mode 100644 index c38101f..0000000 --- a/src/PropertySheets/OpenCvDependencies.props +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - $(SolutionDir)packages\opencv3.3.0.0.1\build\native\lib\$(Platform)\v120\$(Configuration);%(AdditionalLibraryDirectories) - ippicvmt.lib;opencv_bgsegm300d.lib;opencv_bioinspired300d.lib;opencv_calib3d300d.lib;opencv_ccalib300d.lib;opencv_core300d.lib;opencv_datasets300d.lib;opencv_face300d.lib;opencv_features2d300d.lib;opencv_flann300d.lib;opencv_hal300d.lib;opencv_highgui300d.lib;opencv_imgcodecs300d.lib;opencv_imgproc300d.lib;opencv_latentsvm300d.lib;opencv_line_descriptor300d.lib;opencv_ml300d.lib;opencv_objdetect300d.lib;opencv_optflow300d.lib;opencv_photo300d.lib;opencv_reg300d.lib;opencv_rgbd300d.lib;opencv_saliency300d.lib;opencv_shape300d.lib;opencv_stitching300d.lib;opencv_superres300d.lib;opencv_surface_matching300d.lib;opencv_text300d.lib;opencv_tracking300d.lib;opencv_ts300d.lib;opencv_video300d.lib;opencv_videoio300d.lib;opencv_videostab300d.lib;opencv_xfeatures2d300d.lib;opencv_ximgproc300d.lib;opencv_xobjdetect300d.lib;opencv_xphoto300d.lib;%(AdditionalDependencies) - - - xcopy /Y "$(SolutionDir)packages\opencv3.redist.3.0.0.1\build\native\bin\$(Platform)\v120\$(Configuration)\*.dll" "$(OutputPath)" - - - Copy OpenCV DLL-s - - - - - $(SolutionDir)packages\opencv3.3.0.0.1\build\native\lib\$(Platform)\v120\$(Configuration);%(AdditionalLibraryDirectories) - ippicvmt.lib;opencv_bgsegm300.lib;opencv_bioinspired300.lib;opencv_calib3d300.lib;opencv_ccalib300.lib;opencv_core300.lib;opencv_datasets300.lib;opencv_face300.lib;opencv_features2d300.lib;opencv_flann300.lib;opencv_hal300.lib;opencv_highgui300.lib;opencv_imgcodecs300.lib;opencv_imgproc300.lib;opencv_latentsvm300.lib;opencv_line_descriptor300.lib;opencv_ml300.lib;opencv_objdetect300.lib;opencv_optflow300.lib;opencv_photo300.lib;opencv_reg300.lib;opencv_rgbd300.lib;opencv_saliency300.lib;opencv_shape300.lib;opencv_stitching300.lib;opencv_superres300.lib;opencv_surface_matching300.lib;opencv_text300.lib;opencv_tracking300.lib;opencv_ts300.lib;opencv_video300.lib;opencv_videoio300.lib;opencv_videostab300.lib;opencv_xfeatures2d300.lib;opencv_ximgproc300.lib;opencv_xobjdetect300.lib;opencv_xphoto300.lib;%(AdditionalDependencies) - - - xcopy /Y "$(SolutionDir)packages\opencv3.redist.3.0.0.1\build\native\bin\$(Platform)\v120\$(Configuration)\*.dll" "$(OutputPath)" - - - Copy OpenCV DLL-s - - - \ No newline at end of file From d95c44ed6d0f2ae1eb7342cd0fe9f784a6533016 Mon Sep 17 00:00:00 2001 From: Grzegorz Mrukwa Date: Thu, 4 Jan 2018 01:19:31 +0100 Subject: [PATCH 3/4] Shorten README --- README.md | 60 ++++++++++--------------------------------------------- 1 file changed, 11 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 39243b5..2472a10 100644 --- a/README.md +++ b/README.md @@ -4,53 +4,25 @@ Spectre is a versatile tool used for analysis of MALDI-MSI data sets. -For the sake of simplicity, the toolset provided is available to be used -through web application interface, which is currently a work-in-progress. - In order to build and run the application, please refer to the [installation](#install) section. ## About -The project is currently in its early stage. However, it comprises the -implementation of our own spectra modelling based on Gaussian Mixture Models, -and Divisive IK-means algorithm for unsupervised segmentation, which can be -used for efficient dataset compression, as well as for knowledge discovery. -Aformentioned algorithms have already been published and refering links -have been enclosed under [references](#references) section. - -Also, several classification and clusterization methods will be provided soon, -along with supporting statistics. +This is desktop client for Divisive IK-means algorithm for unsupervised +segmentation, which can be used for efficient dataset compression, as +well as for knowledge discovery. It has already been published and +referring links have been enclosed under [references](#references) section. ## Install -### DiviK local client - Please refer to [this manual](docs/Spectre.DivikWpfClient.pdf), as MATLAB Common Runtime has to be installed. -### Spectre service & web application - -The Web service is all the time available [here](http://vaei-bit01.aei.polsl.pl/). -If you still would like to host it yourself, please contact us by -[e-mail](mailto:Grzegorz.Mrukwa@polsl.pl). We will provide you with exhaustive -explanation. - ## Exemplary usage -### DiviK local client - Please refer to [docs](docs/Spectre.DivikWpfClient.pdf). -### Spectre service & web application - -The application is available online [here](http://vaei-bit01.aei.polsl.pl/). - -Right now, our web application allows only for an interactive visualization -of some of the data we were using in the research, along with Divisive -Intelligent K-means algorithm results. More features will get documented -when they appear. - ## How to contribute? Please contact us by [e-mail](mailto:Grzegorz.Mrukwa@polsl.pl). We will answer @@ -70,31 +42,21 @@ This software is part of contribution made by [Data Mining Group of Silesian University of Technology](http://www.zaed.polsl.pl/), rest of which is published [here](https://github.com/ZAEDPolSl). -+ [Marczyk M, Polanska J, Polanski A: Comparison of Algorithms for Profile-Based -Alignment of Low Resolution MALDI-ToF Spectra. In Advances in Intelligent -Systems and Computing, Vol. 242 of Man-Machine Interactions 3, Gruca A, -Czachorski T, Kozielski S, editors. Springer Berlin Heidelberg 2014, p. 193-201 -(ISBN: 978-3-319-02308-3), ICMMI 2013, 22-25.10.2013 Brenna, Poland][1] + [P. Widlak, G. Mrukwa, M. Kalinowska, M. Pietrowska, M. Chekan, J. Wierzgon, M. Gawin, G. Drazek and J. Polanska, "Detection of molecular signatures of oral squamous cell carcinoma and normal epithelium - application of a novel methodology for unsupervised segmentation of imaging mass spectrometry data," -Proteomics, vol. 16, no. 11-12, pp. 1613-21, 2016][2] +Proteomics, vol. 16, no. 11-12, pp. 1613-21, 2016][1] + [M. Pietrowska, H. C. Diehl, G. Mrukwa, M. Kalinowska-Herok, M. Gawin, M. Chekan, J. Elm, G. Drazek, A. Krawczyk, D. Lange, H. E. Meyer, J. Polanska, C. Henkel, P. Widlak, "Molecular profiles of thyroid cancer subtypes: Classification based on features of tissue revealed by mass spectrometry -imaging," Biochimica et Biophysica Acta (BBA)-Proteins and Proteomics, 2016][3] +imaging," Biochimica et Biophysica Acta (BBA)-Proteins and Proteomics, 2016][2] + [G. Mrukwa, G. Drazek, M. Pietrowska, P. Widlak and J. Polanska, "A Novel Divisive iK-Means Algorithm with Region-Driven Feature Selection as a Tool for Automated Detection of Tumour Heterogeneity in MALDI IMS Experiments," in -International Conference on Bioinformatics and Biomedical Engineering, 2016][4] -+ [A. Polanski, M. Marczyk, M. Pietrowska, P. Widlak and J. Polanska, "Signal -partitioning algorithm for highly efficient Gaussian mixture modeling in mass -spectrometry," PloS one, vol. 10, no. 7, p. e0134256, 2015][5] - -[1]: http://link.springer.com/chapter/10.1007/978-3-319-02309-0_20 -[2]: http://onlinelibrary.wiley.com/doi/10.1002/pmic.201500458/pdf -[3]: http://www.sciencedirect.com/science/article/pii/S1570963916302175 -[4]: http://link.springer.com/chapter/10.1007/978-3-319-31744-1_11 -[5]: http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0134256 +International Conference on Bioinformatics and Biomedical Engineering, 2016][3] + +[1]: http://onlinelibrary.wiley.com/doi/10.1002/pmic.201500458/pdf +[2]: http://www.sciencedirect.com/science/article/pii/S1570963916302175 +[3]: http://link.springer.com/chapter/10.1007/978-3-319-31744-1_11 From ab2725e195aba7f7571a79207e7167766e19fcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=27Sand3r=27=20Gallus?= Date: Thu, 4 Jan 2018 01:43:05 +0100 Subject: [PATCH 4/4] Correct readme sentences --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2472a10..6cc5aff 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,10 @@ In order to build and run the application, please refer to the ## About -This is desktop client for Divisive IK-means algorithm for unsupervised +This is a desktop client for the Divisive IK-means algorithm for unsupervised segmentation, which can be used for efficient dataset compression, as -well as for knowledge discovery. It has already been published and -referring links have been enclosed under [references](#references) section. +well as knowledge discovery. It has already been published and +referring links have been enclosed under the [references](#references) section. ## Install