diff --git a/matlab/ExampleData/spatial clustering of groups/Example coordinates for the spatial clustering.txt b/matlab/ExampleData/spatial clustering of groups/Example coordinates for the spatial clustering.txt deleted file mode 100644 index 154e33b..0000000 --- a/matlab/ExampleData/spatial clustering of groups/Example coordinates for the spatial clustering.txt +++ /dev/null @@ -1,3 +0,0 @@ - 3.1013610e+006 6.7104850e+006 - 3.1016310e+006 6.7101990e+006 - 3.1015790e+006 6.7101790e+006 diff --git a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of diploid individuals.txt b/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of diploid individuals.txt deleted file mode 100644 index 57cff17..0000000 --- a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of diploid individuals.txt +++ /dev/null @@ -1,10 +0,0 @@ --9 102 56 80 100 90 118 90 88 104 1 --9 102 54 82 102 92 116 90 86 104 1 -88 104 56 84 102 -9 120 90 88 100 1 -86 102 56 80 102 -9 116 90 86 100 1 -88 102 54 80 102 90 116 92 -9 100 2 -88 102 56 80 100 90 118 90 -9 104 2 -80 102 54 82 102 92 116 90 86 104 2 -88 104 56 84 102 92 120 90 88 100 2 -86 102 56 80 -9 90 116 90 86 100 3 -88 102 54 80 -9 90 116 92 86 100 3 diff --git a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of haploid individuals.txt b/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of haploid individuals.txt deleted file mode 100644 index afd5492..0000000 --- a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of haploid individuals.txt +++ /dev/null @@ -1,5 +0,0 @@ -88 102 56 80 100 90 118 -9 88 104 1 -80 102 54 82 102 92 116 90 86 104 1 -88 104 56 84 102 -9 120 90 88 100 2 -86 102 56 80 102 90 116 90 86 100 2 -88 102 -9 80 102 90 116 92 86 100 3 diff --git a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt b/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt deleted file mode 100644 index 3450819..0000000 --- a/matlab/ExampleData/spatial clustering of groups/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt +++ /dev/null @@ -1,20 +0,0 @@ -88 102 56 80 100 90 118 90 88 104 1 -80 102 54 82 102 92 116 90 86 104 1 -88 102 56 80 100 90 118 90 88 104 1 -80 102 54 82 102 92 116 90 86 104 1 -88 -9 56 84 102 92 120 90 88 100 1 -86 -9 56 80 102 90 116 90 86 102 1 -88 -9 56 84 102 92 120 90 88 100 1 -86 -9 56 80 102 90 116 90 86 100 1 -88 102 54 82 102 90 116 92 86 102 2 -88 102 56 80 100 90 118 90 88 104 2 -88 102 54 80 102 90 116 92 86 102 2 -88 102 56 80 100 90 118 90 88 104 2 -80 102 54 82 102 92 116 -9 86 104 2 -88 104 56 84 102 92 120 -9 88 100 2 -80 102 54 82 102 92 116 -9 86 104 2 -88 104 56 84 102 92 120 -9 88 100 2 -86 102 56 80 102 90 116 90 86 100 3 -88 104 54 80 102 90 116 92 86 100 3 -86 102 56 80 104 90 116 90 86 100 3 -88 102 54 80 102 90 116 92 86 100 3 diff --git a/matlab/ExampleData/spatial clustering of groups/Example data in FASTA format for group-wise clustering of haploid individuals.fasta b/matlab/ExampleData/spatial clustering of groups/Example data in FASTA format for group-wise clustering of haploid individuals.fasta deleted file mode 100644 index 9501482..0000000 --- a/matlab/ExampleData/spatial clustering of groups/Example data in FASTA format for group-wise clustering of haploid individuals.fasta +++ /dev/null @@ -1,20 +0,0 @@ ->1 -AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT -TAGTAACATGGTCAGCTGCTCATCCGTATT - ->2 -ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG -AATGCACCACAAACATACCCCACAGGCTCG - ->3 -GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT -GTCTGAGTATGGATGGTGGAGGCGGAGCCG - ->4 -ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG -CAAGGATAAAGTTCACCCATCACGTTGACC - ->5 -ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG -GTAACATAGCTACCAGGTCCCATTCCAGGA - diff --git a/matlab/ExampleData/spatial clustering of groups/group names.txt b/matlab/ExampleData/spatial clustering of groups/group names.txt deleted file mode 100644 index 0895c27..0000000 --- a/matlab/ExampleData/spatial clustering of groups/group names.txt +++ /dev/null @@ -1,3 +0,0 @@ -group_A -groupB -C \ No newline at end of file diff --git a/matlab/ExampleData/spatial clustering of groups/group partition for FASTA-format.txt b/matlab/ExampleData/spatial clustering of groups/group partition for FASTA-format.txt deleted file mode 100644 index ddbf58e..0000000 --- a/matlab/ExampleData/spatial clustering of groups/group partition for FASTA-format.txt +++ /dev/null @@ -1,5 +0,0 @@ -1 -1 -2 -2 -3 \ No newline at end of file diff --git a/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering (not unique).txt b/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering (not unique).txt deleted file mode 100644 index 8dbd3cd..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering (not unique).txt +++ /dev/null @@ -1,5 +0,0 @@ - 3.1013610e+006 6.7104850e+006 - 3.1013610e+006 6.7104850e+006 - 3.1015790e+006 6.7101790e+006 - 3.1015910e+006 6.7100650e+006 - 3.1017660e+006 6.7104190e+006 \ No newline at end of file diff --git a/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering.txt b/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering.txt deleted file mode 100644 index 11a0739..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example coordinates for the spatial clustering.txt +++ /dev/null @@ -1,5 +0,0 @@ - 3.1013610e+006 6.7104850e+006 - 3.1011410e+006 6.7108850e+006 - 3.1015790e+006 6.7101790e+006 - 3.1015910e+006 6.7100650e+006 - 3.1017660e+006 6.7104190e+006 \ No newline at end of file diff --git a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of diploid individuals.txt b/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of diploid individuals.txt deleted file mode 100644 index c4b064e..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of diploid individuals.txt +++ /dev/null @@ -1,10 +0,0 @@ --9 102 56 80 100 90 118 90 88 104 1 --9 102 54 82 102 92 116 90 86 104 1 -88 104 56 84 102 -9 120 90 88 100 2 -86 102 56 80 102 -9 116 90 86 100 2 -88 102 54 80 102 90 116 92 -9 100 3 -88 102 56 80 100 90 118 90 -9 104 3 -80 102 54 82 102 92 116 90 86 104 4 -88 104 56 84 102 92 120 90 88 100 4 -86 102 56 80 -9 90 116 90 86 100 5 -88 102 54 80 -9 90 116 92 86 100 5 diff --git a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of haploid individuals.txt b/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of haploid individuals.txt deleted file mode 100644 index e0ed559..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of haploid individuals.txt +++ /dev/null @@ -1,5 +0,0 @@ -88 102 56 80 100 90 118 -9 88 104 1 -80 102 54 82 102 92 116 90 86 104 2 -88 104 56 84 102 -9 120 90 88 100 3 -86 102 56 80 102 90 116 90 86 100 4 -88 102 -9 80 102 90 116 92 86 100 5 diff --git a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of tetraploid individuals.txt b/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of tetraploid individuals.txt deleted file mode 100644 index 00f03f1..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example data in BAPS format for clustering of tetraploid individuals.txt +++ /dev/null @@ -1,20 +0,0 @@ -88 102 56 80 100 90 118 90 88 104 1 -80 102 54 82 102 92 116 90 86 104 1 -88 102 56 80 100 90 118 90 88 104 1 -80 102 54 82 102 92 116 90 86 104 1 -88 -9 56 84 102 92 120 90 88 100 2 -86 -9 56 80 102 90 116 90 86 102 2 -88 -9 56 84 102 92 120 90 88 100 2 -86 -9 56 80 102 90 116 90 86 100 2 -88 102 54 82 102 90 116 92 86 102 3 -88 102 56 80 100 90 118 90 88 104 3 -88 102 54 80 102 90 116 92 86 102 3 -88 102 56 80 100 90 118 90 88 104 3 -80 102 54 82 102 92 116 -9 86 104 4 -88 104 56 84 102 92 120 -9 88 100 4 -80 102 54 82 102 92 116 -9 86 104 4 -88 104 56 84 102 92 120 -9 88 100 4 -86 102 56 80 102 90 116 90 86 100 5 -88 104 54 80 102 90 116 92 86 100 5 -86 102 56 80 104 90 116 90 86 100 5 -88 102 54 80 102 90 116 92 86 100 5 diff --git a/matlab/ExampleData/spatial clustering of individuals/Example data in FASTA format for clustering of haploid individuals.fasta b/matlab/ExampleData/spatial clustering of individuals/Example data in FASTA format for clustering of haploid individuals.fasta deleted file mode 100644 index 9501482..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/Example data in FASTA format for clustering of haploid individuals.fasta +++ /dev/null @@ -1,20 +0,0 @@ ->1 -AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT -TAGTAACATGGTCAGCTGCTCATCCGTATT - ->2 -ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG -AATGCACCACAAACATACCCCACAGGCTCG - ->3 -GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT -GTCTGAGTATGGATGGTGGAGGCGGAGCCG - ->4 -ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG -CAAGGATAAAGTTCACCCATCACGTTGACC - ->5 -ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG -GTAACATAGCTACCAGGTCCCATTCCAGGA - diff --git a/matlab/ExampleData/spatial clustering of individuals/individual indices in population.txt b/matlab/ExampleData/spatial clustering of individuals/individual indices in population.txt deleted file mode 100644 index ddbf58e..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/individual indices in population.txt +++ /dev/null @@ -1,5 +0,0 @@ -1 -1 -2 -2 -3 \ No newline at end of file diff --git a/matlab/ExampleData/spatial clustering of individuals/population names.txt b/matlab/ExampleData/spatial clustering of individuals/population names.txt deleted file mode 100644 index c9b6134..0000000 --- a/matlab/ExampleData/spatial clustering of individuals/population names.txt +++ /dev/null @@ -1,3 +0,0 @@ -pop_A -popB -C diff --git a/matlab/add_BAPS_to_path.m b/matlab/add_BAPS_to_path.m deleted file mode 100644 index 908eee9..0000000 --- a/matlab/add_BAPS_to_path.m +++ /dev/null @@ -1,41 +0,0 @@ -baps_path = cd; -addpath(genpath(baps_path)); - -% % addpath(genpath('/home/ai2/murphyk/matlab/BNT')) -% % fails to add directories which only contain directories but no regular files -% % e.g., BNT/inference -% % This bug has been fixed in matlab 6.5 -% -% global BNT_HOME -% % BNT_HOME = 'C:\kpmurphy\matlab\BNT'; -% % BNT_HOME = '/home/ai2/murphyk/matlab/BNT'; -% BNT_HOME = 'D:\Matlab\BNT'; -% -% -% files = {'CPDs', 'general', 'misc', 'graph', 'graph/C', 'Graphics', 'stats1', 'stats2', ... -% 'netlab2', 'HMM', 'Kalman', 'Entropic', 'Entropic/Brand', ... -% 'inference', 'inference/static', 'inference/dynamic', 'inference/online', ... -% 'learning', 'potentials', 'potentials/Tables', ... -% 'examples/dynamic', 'examples/dynamic/HHMM', 'examples/dynamic/HHMM/Square', ... -% 'examples/dynamic/HHMM/Map', ... -% 'examples/dynamic/HHMM/Motif', 'examples/dynamic/SLAM', 'examples/limids', ... -% 'examples/static', 'examples/static/Misc', 'examples/static/Models', ... -% 'examples/static/Belprop', ... -% 'examples/static/Zoubin', 'examples/static/HME', 'examples/static/SCG', ... -% 'examples/static/dtree', ... -% 'examples/static/StructLearn', 'examples/static/fgraph'}; -% -% -% %eval(sprintf('addpath %s', BNT_HOME)); -% eval(sprintf('addpath ''%s'' ', BNT_HOME)); -% % to cope with filenames with spaces, we must use quotes -% % Can use fullfile to get either / for unix or \ for windows -% % use 'isunix' to determine the operating system -% -% for i=1:length(files) -% f = files{i}; -% eval(sprintf('addpath ''%s''/%s', BNT_HOME, f)); -% end -% -% -% diff --git a/matlab/admixture/admix1.m b/matlab/admixture/admix1.m deleted file mode 100644 index beda82a..0000000 --- a/matlab/admixture/admix1.m +++ /dev/null @@ -1,846 +0,0 @@ -function admix1(tietue) -% Jos tietue == -1, ladataan mixture result file. -% Muussa tapauksessa saadaan tarvittavat muuttujat -% tietueen kentist? - -% set for debugging, must be disabled before publishing. -% rand('state',0) - -global COUNTS; global PARTITION; global SUMCOUNTS; -clearGlobalVars; - -if (~isstruct(tietue)) - [filename, pathname] = uigetfile('*.mat', 'Load mixture result file'); - if (filename==0 & pathname==0), return; - else - disp('---------------------------------------------------'); - disp(['Reading mixture result from: ',[pathname filename],'...']); - end - pause(0.0001); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - struct_array = load([pathname filename]); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - disp('Incorrect file format'); - return - end - else - disp('Incorrect file format'); - return; - end - - if isfield(c, 'gene_lengths') && ... - (strcmp(c.mixtureType,'linear_mix') | ... - strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model - % Redirect the call to the linkage admixture function. - c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column - linkage_admix(c); - return - end - - PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS; - alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames; - rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle; -else - PARTITION = tietue.PARTITION; - COUNTS = tietue.COUNTS; - SUMCOUNTS = tietue.SUMCOUNTS; - alleleCodes = tietue.alleleCodes; - adjprior = tietue.adjprior; - popnames = tietue.popnames; - rowsFromInd = tietue.rowsFromInd; - data = double(tietue.data); - npops = tietue.npops; - noalle = tietue.noalle; -end - -answers = inputdlg({['Input the minimum size of a population that will'... - ' be taken into account when admixture is estimated.']},... - 'Input minimum population size',[1],... - {'5'}); -if isempty(answers) return; end -alaRaja = str2num(answers{1,1}); -[npops] = poistaLiianPienet(npops, rowsFromInd, alaRaja); - -nloci = size(COUNTS,2); -ninds = size(data,1)/rowsFromInd; - -answers = inputdlg({['Input number of iterations']},'Input',[1],{'50'}); -if isempty(answers) return; end -iterationCount = str2num(answers{1,1}); - -answers = inputdlg({['Input number of reference individuals from each population']},'Input',[1],{'50'}); -if isempty(answers) nrefIndsInPop = 50; -else nrefIndsInPop = str2num(answers{1,1}); -end - -answers = inputdlg({['Input number of iterations for reference individuals']},'Input',[1],{'10'}); -if isempty(answers) return; end -iterationCountRef = str2num(answers{1,1}); - - -% First calculate log-likelihood ratio for all individuals: -likelihood = zeros(ninds,1); -allfreqs = computeAllFreqs2(noalle); -for ind = 1:ninds - omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd); - osuusTaulu = zeros(1,npops); - if PARTITION(ind)==0 - % Yksil?on outlier - elseif PARTITION(ind)~=0 - if PARTITION(ind)>0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLogml(omaFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLogml(omaFreqs, osuusTaulu); - logmlAlku = logml; - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - logmlLoppu = logml; - likelihood(ind) = logmlLoppu-logmlAlku; - end -end - -% Analyze further only individuals who have log-likelihood ratio larger than 3: -to_investigate = (find(likelihood>3))'; -disp('Possibly admixed individuals: '); -for i = 1:length(to_investigate) - disp(num2str(to_investigate(i))); -end -disp(' '); -disp('Populations for possibly admixed individuals: '); -admix_populaatiot = unique(PARTITION(to_investigate)); -for i = 1:length(admix_populaatiot) - disp(num2str(admix_populaatiot(i))); -end - -% THUS, there are two types of individuals, who will not be analyzed with -% simulated allele frequencies: those who belonged to a mini-population -% which was removed, and those who have log-likelihood ratio less than 3. -% The value in the PARTITION for the first kind of individuals is 0. The -% second kind of individuals can be identified, because they do not -% belong to "to_investigate" array. When the results are presented, the -% first kind of individuals are omitted completely, while the second kind -% of individuals are completely put to the population, where they ended up -% in the mixture analysis. These second type of individuals will have a -% unit p-value. - - -% Simulate allele frequencies a given number of times and save the average -% result to "proportionsIt" array. - -proportionsIt = zeros(ninds,npops); -for iterationNum = 1:iterationCount - disp(['Iter: ' num2str(iterationNum)]); - allfreqs = simulateAllFreqs(noalle); % Allele frequencies on this iteration. - - for ind=to_investigate - %disp(num2str(ind)); - omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd); - osuusTaulu = zeros(1,npops); - if PARTITION(ind)==0 - % Yksil?on outlier - elseif PARTITION(ind)~=0 - if PARTITION(ind)>0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLogml(omaFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLogml(omaFreqs, osuusTaulu); - - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - end - proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu; - proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum; - end -end - -%disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); -%disp('each population.'); - -%allfreqs = simulateAllFreqs(noalle); % Simuloidaan alleelifrekvenssisetti -allfreqs = computeAllFreqs2(noalle); % Koitetaan tällaista. - - -% Initialize the data structures, which are required in taking the missing -% data into account: -n_missing_levels = zeros(npops,1); % number of different levels of "missingness" in each pop (max 3). -missing_levels = zeros(npops,3); % the mean values for different levels. -missing_level_partition = zeros(ninds,1); % level of each individual (one of the levels of its population). -for i=1:npops - inds = find(PARTITION==i); - % Proportions of non-missing data for the individuals: - non_missing_data = zeros(length(inds),1); - for j = 1:length(inds) - ind = inds(j); - non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci); - end - if all(non_missing_data>0.9) - n_missing_levels(i) = 1; - missing_levels(i,1) = mean(non_missing_data); - missing_level_partition(inds) = 1; - else - [ordered, ordering] = sort(non_missing_data); - %part = learn_simple_partition(ordered, 0.05); - part = learn_partition_modified(ordered); - aux = sortrows([part ordering],2); - part = aux(:,1); - missing_level_partition(inds) = part; - n_levels = length(unique(part)); - n_missing_levels(i) = n_levels; - for j=1:n_levels - missing_levels(i,j) = mean(non_missing_data(find(part==j))); - end - end -end - -% Create and analyse reference individuals for populations -% with potentially admixed individuals: -refTaulu = zeros(npops,100,3); -for pop = admix_populaatiot' - - for level = 1:n_missing_levels(pop) - - potential_inds_in_this_pop_and_level = ... - find(PARTITION==pop & missing_level_partition==level &... - likelihood>3); % Potential admix individuals here. - - if ~isempty(potential_inds_in_this_pop_and_level) - - %refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); - refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ... - pop, missing_levels(pop,level)); - - disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']); - refProportions = zeros(nrefIndsInPop,npops); - for iter = 1:iterationCountRef - %disp(['Iter: ' num2str(iter)]); - allfreqs = simulateAllFreqs(noalle); - - for ind = 1:nrefIndsInPop - omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd); - osuusTaulu = zeros(1,npops); - osuusTaulu(pop)=1; - logml = computeIndLogml(omaFreqs, osuusTaulu); - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu; - refProportions(ind,:) = refProportions(ind,:)./iter; - end - end - for ind = 1:nrefIndsInPop - omanOsuus = refProportions(ind,pop); - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1; - end - end - end -end - -% Rounding of the results: -proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt); -proportionsIt = proportionsIt./100; -for ind = 1:ninds - if ~any(to_investigate==ind) - if PARTITION(ind)>0 - proportionsIt(ind,PARTITION(ind))=1; - end - else - % In case of a rounding error, the sum is made equal to unity by - % fixing the largest value. - if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1) - [isoin,indeksi] = max(proportionsIt(ind,:)); - erotus = sum(proportionsIt(ind,:))-1; - proportionsIt(ind,indeksi) = isoin-erotus; - end - end -end - -% Calculate p-value for each individual: -uskottavuus = zeros(ninds,1); -for ind = 1:ninds - pop = PARTITION(ind); - if pop==0 % Individual is outlier - uskottavuus(ind)=1; - elseif isempty(find(to_investigate==ind)) - % Individual had log-likelihood ratio<3 - uskottavuus(ind)=1; - else - omanOsuus = proportionsIt(ind,pop); - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - level = missing_level_partition(ind); - refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level)); - uskottavuus(ind) = refPienempia / nrefIndsInPop; - end -end - -tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount); - -viewPartition(proportionsIt, popnames); - -talle = questdlg(['Do you want to save the admixture results?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - - if (~isstruct(tietue)) - c.proportionsIt = proportionsIt; - c.pvalue = uskottavuus; % Added by Jing - c.mixtureType = 'admix'; % Jing - c.admixnpops = npops; -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - else - tietue.proportionsIt = proportionsIt; - tietue.pvalue = uskottavuus; % Added by Jing - tietue.mixtureType = 'admix'; - tietue.admixnpops = npops; -% save([pathname filename], 'tietue'); - save([pathname filename], 'tietue', '-v7.3'); % added by Lu Cheng, 08.06.2012 - end -end - - -%---------------------------------------------------------------------------- - - -function [npops] = poistaLiianPienet(npops, rowsFromInd, alaraja) -% Muokkaa tulokset muotoon, jossa outlier yksilöt on -% poistettu. Tarkalleen ottaen poistaa ne populaatiot, -% joissa on vähemmän kuin 'alaraja':n verran yksilöit? - -global PARTITION; -global COUNTS; -global SUMCOUNTS; - -popSize=zeros(1,npops); -for i=1:npops - popSize(i)=length(find(PARTITION==i)); -end -miniPops = find(popSize0))); -for n=1:length(korit) - kori = korit(n); - yksilot = find(PARTITION==kori); - PARTITION(yksilot) = n; -end -COUNTS(:,:,miniPops) = []; -SUMCOUNTS(miniPops,:) = []; - -npops = npops-length(miniPops); - -%------------------------------------------------------------------------ - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------- - - -function allFreqs = computeAllFreqs2(noalle) -% Lisää a priori jokaista alleelia -% joka populaation joka lokukseen j 1/noalle(j) verran. - -global COUNTS; -global SUMCOUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS)); -sumCounts = reshape(sumCounts', [1, nloci, npops]); -sumCounts = repmat(sumCounts, [max_noalle, 1 1]); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allFreqs = counts./sumCounts; - - -function allfreqs = simulateAllFreqs(noalle) -% Lisää jokaista alleelia joka populaation joka lokukseen j 1/noalle(j) -% verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista -% simuloidaan arvot populaatioiden alleelifrekvensseille. - -global COUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allfreqs = zeros(size(counts)); - -for i=1:npops - for j=1:nloci - simuloidut = randdir(counts(1:noalle(j),j,i) , noalle(j)); - allfreqs(1:noalle(j),j,i) = simuloidut; - end -end - -%-------------------------------------------------------------------------- - - -function refData = simulateIndividuals(n,rowsFromInd,allfreqs,pop, missing_level) -% simulate n individuals from population pop, such that approximately -% proportion "missing_level" of the alleles are present. - -nloci = size(allfreqs,2); - -refData = zeros(n*rowsFromInd,nloci); -counter = 1; % which row will be generated next. - -for ind = 1:n - for loc = 1:nloci - for k=0:rowsFromInd-1 - if randarvo); -all = min(isommat); - - -%-------------------------------------------------------------------------- - - -function omaFreqs = computePersonalAllFreqs(ind, data, allFreqs, rowsFromInd) -% Laskee npops*(rowsFromInd*nloci) taulukon, jonka kutakin saraketta -% vastaa yksilön ind alleeli. Eri rivit ovat alleelin alkuperäfrekvenssit -% eri populaatioissa. Jos yksilölt?puuttuu jokin alleeli, niin vastaavaan -% kohtaa tulee sarake ykkösi? - -global COUNTS; -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -rows = data(computeRows(rowsFromInd, ind, 1),:); - -omaFreqs = zeros(npops, (rowsFromInd*nloci)); -pointer = 1; -for loc=1:size(rows,2) - for all=1:size(rows,1) - if rows(all,loc)>=0 - try, - omaFreqs(:,pointer) = ... - reshape(allFreqs(rows(all,loc),loc,:), [npops,1]); - catch - a=0; - end - else - omaFreqs(:,pointer) = ones(npops,1); - end - pointer = pointer+1; - end -end - - -%--------------------------------------------------------------------------- - - -function loggis = computeIndLogml(omaFreqs, osuusTaulu) -% Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät -% määritellyiksi kuten osuusTaulu:ssa. - -apu = repmat(osuusTaulu', [1 size(omaFreqs,2)]); -apu = apu .* omaFreqs; -apu = sum(apu); - -apu = log(apu); - -loggis = sum(apu); - - -%-------------------------------------------------------------------------- - - -function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi) -% Päivittää osuusTaulun muutoksen jälkeen. - -global COUNTS; -npops = size(COUNTS,3); - -i1 = rem(indeksi,npops); -if i1==0, i1 = npops; end; -i2 = ceil(indeksi / npops); - -osuusTaulu(i1) = osuusTaulu(i1)-osuus; -osuusTaulu(i2) = osuusTaulu(i2)+osuus; - - -%------------------------------------------------------------------------- - - -function [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml) - -ready = 0; -while ready ~= 1 - muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml); - [maxMuutos, indeksi] = max(muutokset(1:end)); - if maxMuutos>0 - osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi); - logml = logml + maxMuutos; - else - ready = 1; - end -end - - - -%--------------------------------------------------------------------------- - - -function muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) -% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -% muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -% todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole -% mitään siirrettävää, on vastaavassa kohdassa rivi nollia. - -global COUNTS; -npops = size(COUNTS,3); - -notEmpty = find(osuusTaulu>0.005); -muutokset = zeros(npops); -empties = ~notEmpty; - -for i1=notEmpty - - osuusTaulu(i1) = osuusTaulu(i1)-osuus; - - for i2 = [1:i1-1 i1+1:npops] - osuusTaulu(i2) = osuusTaulu(i2)+osuus; - loggis = computeIndLogml(omaFreqs, osuusTaulu); - muutokset(i1,i2) = loggis-logml; - osuusTaulu(i2) = osuusTaulu(i2)-osuus; - end - - osuusTaulu(i1) = osuusTaulu(i1)+osuus; -end - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - - -%-------------------------------------------------------------------------- - - -function tulostaAdmixtureTiedot(proportions, uskottavuus, alaRaja, niter) -h0 = findobj('Tag','filename1_text'); -inputf = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outf = get(h0,'String'); clear h0; - -if length(outf)>0 - fid = fopen(outf,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -ninds = length(uskottavuus); -npops = size(proportions,2); -disp(' '); -dispLine; -disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); -disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); -disp(['Data file: ' inputf]); -disp(['Number of individuals: ' num2str(ninds)]); -disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); -disp(' '); -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); - fprintf(fid, '\n'); -end - -ekaRivi = blanks(6); -for pop = 1:npops - ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; -end -ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 -disp(ekaRivi); -for ind = 1:ninds - rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; - if any(proportions(ind,:)>0) - for pop = 1:npops-1 - rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - end - rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; - rivi = [rivi ownNum2Str(uskottavuus(ind))]; - end - disp(rivi); - if fid ~= -1 - fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); - end -end -if fid ~= -1 - fclose(fid); -else - diary off -end - -%------------------------------------------------------ - -function str = proportion2str(prob) -%prob belongs to [0.00, 0.01, ... ,1]. -%str is a 4-mark presentation of proportion. - -if abs(prob)<1e-3 - str = '0.00'; -elseif abs(prob-1) < 1e-3; - str = '1.00'; -else - prob = round(100*prob); - if prob<10 - str = ['0.0' num2str(prob)]; - else - str = ['0.' num2str(prob)]; - end; -end; - -%------------------------------------------------------- - -function g=randga(a,b) -flag = 0; -if a>1 -c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a); -U1=-1; -while flag == 0, -if a<=2.5, -U1=rand;U2=rand; -else -while ~(U1>0 & U1<1), -U1=rand;U2=rand; -U1 = U2 + c5*(1-1.86*U1); -end %while -end %if -W = c2*U2/U1; -if c3*U1+W+(1/W)<=c4, -flag = 1; -g = c1*W/b; -elseif c3*log(U1)-log(W)+W<1, -flag = 1; -g = c1*W/b; -else -U1=-1; -end %if -end %while flag -elseif a==1 -g=sum(-(1/b)*log(rand(a,1))); -else -while flag == 0, -U = rand(2,1); -if U(1)>exp(1)/(a+exp(1)), -g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1))); -if U(2)<=g^(a-1), -flag = 1; -end %if -else -g = ((a+exp(1))*U(1)/((exp(1))^(1/a))); -if U(2)<=exp(-g), -flag = 1; -end %if -end %if -end %while flag -g=g/b; -end %if; - - -%------------------------------------------------- - -function svar=randdir(counts,nc) -% Käyttöesim randdir([10;30;60],3) - -svar=zeros(nc,1); -for i=1:nc - svar(i,1)=randga(counts(i,1),1); -end -svar=svar/sum(svar); - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% Individuals inds have been given. The function returns a vector, -% containing the indices of the rows, which contain data from the -% individuals. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - -%-------------------------------------------------------------------------- -%----- - -function str = ownNum2Str(number) - -absolute = abs(number); - -if absolute < 1000 - str = num2str(number); -elseif absolute < 10000000 - first_three = rem(number,1000); - next_four = (number - first_three) /1000; - first_three = abs(first_three); - if first_three<10 - first_three = ['00' num2str(first_three)]; - elseif first_three<100 - first_three = ['0' num2str(first_three)]; - else - first_three = num2str(first_three); - end; - str = [num2str(next_four) first_three]; -elseif absolute < 100000000 - first_four = rem(number,10000); - next_four = (number - first_four) /10000; - first_four = abs(first_four); - if first_four<10 - first_four = ['000' num2str(first_four)]; - elseif first_four<100 - first_four = ['00' num2str(first_four)]; - elseif first_four<1000 - first_four = ['0' num2str(first_four)]; - else - first_four = num2str(first_four); - end; - str = [num2str(next_four) first_four]; -else - str = num2str(number); -end; - -%------------------------------------------------ - - -function part = learn_partition_modified(ordered) -% This function is called only if some individual has less than 90 per cent -% non-missing data. The function uses fuzzy clustering for the "non-missingness" -% values, finding maximum three clusters. If two of the found clusters are such -% that all the values are >0.9, then those two are further combined. - -part = learn_simple_partition(ordered,0.05); -nclust = length(unique(part)); -if nclust==3 - mini_1 = min(ordered(find(part==1))); - mini_2 = min(ordered(find(part==2))); - mini_3 = min(ordered(find(part==3))); - - if mini_1>0.9 & mini_2>0.9 - part(find(part==2)) = 1; - part(find(part==3)) = 2; - - elseif mini_1>0.9 & mini_3>0.9 - part(find(part==3)) = 1; - - elseif mini_2>0.9 & mini_3>0.9 - % This is the one happening in practice, since the values are - % ordered, leading to mini_1 <= mini_2 <= mini_3 - part(find(part==3)) = 2; - end -end \ No newline at end of file diff --git a/matlab/admixture/admix2.m b/matlab/admixture/admix2.m deleted file mode 100644 index f682572..0000000 --- a/matlab/admixture/admix2.m +++ /dev/null @@ -1,622 +0,0 @@ -function admix2 - -global PARTITION; global COUNTS; -global SUMCOUNTS; -clearGlobalVars; - -input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'BAPS-format', 'GenePop-format', 'BAPS-format'); - -switch input_type - -case 'BAPS-format' - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in BAPS-format'); - if filename==0 - return; - end - - data = load([pathname filename]); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('Incorrect Data-file.'); - return; - end - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load Partition'); - if filename==0 - return; - end - PARTITION = load([pathname filename]); - if ~(size(PARTITION,2)==1) | ~(size(PARTITION,1)==ninds) - disp('Incorrect Partition-file.'); - return; - end - - input_pops = questdlg(['When using data which are in BAPS-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); - if isequal(input_pops,'Yes') - waitALittle; - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - waitALittle; - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - popnames = []; - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - popnames = []; - end - else - popnames = []; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - data = data(:, 1:end-1); - npops = length(unique(PARTITION(find(PARTITION>=0)))); - - -case 'GenePop-format' - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in GenePop-format'); - if filename==0 - return; - end - - kunnossa = testaaGenePopData([pathname filename]); - if kunnossa==0 - return - end - - [data,popnames]=lueGenePopData([pathname filename]); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - data = data(:, 1:end-1); - - npops = size(popnames,1); - ninds = size(data,1)/rowsFromInd; - PARTITION = zeros(ninds,1); - ind = 1; - for pop = 1:npops-1 - while (ind < popnames{pop+1,2}) - PARTITION(ind) = pop; - ind = ind+1; - end - end - while (ind <= ninds) - PARTITION(ind) = npops; - ind = ind+1; - end - - all_in_text = questdlg(['Do you wish to use also the last population in the ',... - 'data to define one more population for admixture analysis: '],... - 'Define a population based on the last population in the data?', ... - 'Yes', 'No', 'Yes'); - if isequal(all_in_text, 'No') - PARTITION(find(PARTITION==npops)) = -1; - npops = npops-1; - end - otherwise return -end - -initialPartition = PARTITION(:,ones(1,rowsFromInd))'; -initialPartition = initialPartition(:); -[sumcounts, counts, logml] = ... - initialCounts(initialPartition, data, npops, rowsFromInd, noalle); -COUNTS = counts; SUMCOUNTS = sumcounts; - -clear('initialPartition', 'counts', 'sumcounts', ... - 'filename', 'ind', 'input_type', ... - 'logml', 'ninds', 'pathname', 'pop', 'priorTerm'); -clear('indicesfile','indicespath','input_pops','kysyToinen',... - 'namefile','namepath'); -c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; -c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; c.noalle = noalle; -admix1(c); - -%------------------------------------------------------------------------ - - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------- - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisessä sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, että on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yhtä pitkiä. - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - -%--------------------------------------------------------------------------------------- - - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, miltä yksilöltä -% kyseinen rivi on peräisin. Funktio tutkii ensin, että montako -% riviä maksimissaan on peräisin yhdeltä yksilöltä, jolloin saadaan -% tietää onko kyseessä haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhjiä rivejä niille yksilöille, joilta on peräisin vähemmän -% rivejä kuin maksimimäärä. -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytössä oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, että yhden lokuksen j -% koodit saavat arvoja välillä 1,...,noalle(j). -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivejä - miss = maxRowsFromInd-rowsFromInd(ind); % Tältä yksilöltä puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin neljä täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, että pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, että pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - - -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; - - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen välissä täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntejä, 1 jos odotetaan muita merkkejä -for i=1:pit - merkki = line(i); - if (isspace(merkki) & tila==0) - %Ei tehdä mitään. - elseif (isspace(merkki) & tila==1) - tila = 0; - elseif (~isspace(merkki) & tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) & tila==1) - %Ei tehdä mitään - end -end - -%------------------------------------------------------- - -function pal = testaaPop(rivi) -% pal=1, mikäli rivi alkaa jollain seuraavista -% kirjainyhdistelmistä: Pop, pop, POP. Kaikissa muissa -% tapauksissa pal=0. - -if length(rivi)<3 - pal = 0; - return -end -if (all(rivi(1:3)=='Pop') | ... - all(rivi(1:3)=='pop') | ... - all(rivi(1:3)=='POP')) - pal = 1; - return -else - pal = 0; - return -end - -%----------------------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -initializeGammaln(ninds, rowsFromInd, max(noalle)); - -logml = computeLogml(counts, sumcounts, noalle, data, rowsFromInd); - - -%----------------------------------------------------------------------- - - -function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -nloci = size(counts,2); -npops = size(counts,3); -adjnoalle = zeros(max(noalle),nloci); -for j=1:nloci - adjnoalle(1:noalle(j),j)=noalle(j); - if (noalle(j)0)); -end diff --git a/matlab/admixture/learn_simple_partition.m b/matlab/admixture/learn_simple_partition.m deleted file mode 100644 index e9b1bed..0000000 --- a/matlab/admixture/learn_simple_partition.m +++ /dev/null @@ -1,70 +0,0 @@ -function part = learn_simple_partition(ordered_points, fii) -% Goes through all the ways to divide the points into two or three groups. -% Chooses the partition which obtains highest logml. - -npoints = length(ordered_points); - -% One cluster: -val = calculatePopLogml(ordered_points,fii); -bestValue = val; -best_type = 'single'; - -% Two clusters: -for i=1:npoints-1 - % The right endpoint of the first cluster. - val_1 = calculatePopLogml(ordered_points(1:i),fii); - val_2 = calculatePopLogml(ordered_points(i+1:end),fii); - total = val_1 + val_2; - if total>bestValue - bestValue = total; - best_type = 'double'; - best_i = i; - end -end - -% Three clusters: -for i=1:npoints-2 - for j=i+1:npoints-1 - val_1 = calculatePopLogml(ordered_points(1:i),fii); - val_2 = calculatePopLogml(ordered_points(i+1:j),fii); - val_3 = calculatePopLogml(ordered_points(j+1:end),fii); - total = val_1 + val_2 + val_3; - if total>bestValue - bestValue = total; - best_type = 'triple'; - best_i = i; - best_j = j; - end - end -end - -part = zeros(npoints,1); - -switch best_type - case 'single' - part = ones(npoints,1); - case 'double' - part(1:best_i) = 1; - part(best_i+1:end) = 2; - case 'triple' - part(1:best_i) = 1; - part(best_i+1:best_j) = 2; - part(best_j+1:end) = 3; -end - - -%------------------------------------------ - - -function val = calculatePopLogml(points,fii) -% Calculates fuzzy (log) marginal likelihood for a population of real -% values using estimate "fii" for the dispersion value, and Jeffreys prior -% for the mean parameter. - -n = length(points); -fuzzy_ones = sum(points); -fuzzy_zeros = n-fuzzy_ones; - -val = gammaln(1) - gammaln(1 + n/fii) ... - + gammaln(0.5 + fuzzy_ones/fii) + gammaln(0.5 + fuzzy_zeros/fii) ... - - gammaln(0.5) - gammaln(0.5); \ No newline at end of file diff --git a/matlab/admixture/linkage_admix.m b/matlab/admixture/linkage_admix.m deleted file mode 100644 index 8fb4eed..0000000 --- a/matlab/admixture/linkage_admix.m +++ /dev/null @@ -1,1034 +0,0 @@ -function linkage_admix(tietue) - -global COUNTS; global PARTITION; global SUMCOUNTS; -clearGlobalVars; - -PARTITION = tietue.PARTITION; -COUNTS = tietue.COUNTS; -SUMCOUNTS = tietue.SUMCOUNTS; -rowsFromInd = tietue.rowsFromInd; -data = double(tietue.data); -npops = tietue.npops; -noalle = tietue.noalle; -switch tietue.mixtureType - case 'linear_mix' - linkage_model = 'linear'; - case 'codon_mix' - linkage_model = 'codon'; -end -if isfield(tietue,'gene_lengths') - gene_lengths = tietue.gene_lengths; -else - [filename, pathname] = uigetfile('*.txt', 'Load file with lengths of the genes (same order as in data).'); - gene_lengths = load([pathname filename]); -end - -if length(unique(PARTITION(find(PARTITION>0))))==1 - disp('Only one population in the input file'); - disp('No admixture detected'); - return -end - -answers = inputdlg({['Input the minimum size of a population that will'... - ' be taken into account when admixture is estimated.']},... - 'Input minimum population size',1,{'5'}); -if isempty(answers), return; end -alaRaja = str2double(answers{1,1}); -[npops] = poistaLiianPienet(npops, rowsFromInd, alaRaja); - -nloci = size(COUNTS,2); -ninds = size(data,1)/rowsFromInd; - -answers = inputdlg({'Input number of iterations'},'Input',1,{'50'}); -if isempty(answers), return; end -iterationCount = str2double(answers{1,1}); - -answers = inputdlg({'Input number of reference individuals from each population'},'Input',1,{'50'}); -if isempty(answers), nrefIndsInPop = 50; -else nrefIndsInPop = str2double(answers{1,1}); -end - -answers = inputdlg({'Input number of iterations for reference individuals'},'Input',1,{'10'}); -if isempty(answers), return; end -iterationCountRef = str2double(answers{1,1}); - -[cliq_data, sep_data, cliq_counts, component_mat] = createCliqData(data, gene_lengths, noalle, ... - linkage_model, rowsFromInd); - -% Repeat: simulate clique frequencies and estimate proportions. Save the -% average proportions in "proportionsIt". - -proportionsIt = zeros(ninds,npops); -for iterationNum = 1:iterationCount - disp(['Iter: ' num2str(iterationNum)]); - %allfreqs = simulateAllFreqs(noalle); - [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); - for ind=1:ninds - - %omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd); - [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(ind, ... - cliq_data, cliq_freqs, sep_data, sep_freqs, rowsFromInd, gene_lengths, linkage_model); - osuusTaulu = zeros(1,npops); - if PARTITION(ind)==0 - % Outlier individual - elseif PARTITION(ind)~=0 - if PARTITION(ind)>0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Individuals who are not assigned to any cluster. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - end - end - proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu; - proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum; - end -end - -disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); -disp('each population.'); - -%allfreqs = simulateAllFreqs(noalle); % Simuloidaan alleelifrekvenssisetti -%allfreqs = computeAllFreqs2(noalle); % Koitetaan tällaista. -%refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); - -exp_cliq_freqs = ... - computeExpectedFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); -[ref_cliq_data, ref_sep_data] = ... - simulateLinkageIndividuals(nrefIndsInPop, rowsFromInd, exp_cliq_freqs, ... - gene_lengths, noalle, component_mat, linkage_model); -nrefInds = npops*nrefIndsInPop; - -disp(['Analysing the reference individuals in ' num2str(iterationCountRef) ' iterations.']); -refProportions = zeros(nrefInds,npops); -for iter = 1:iterationCountRef - disp(['Iter: ' num2str(iter)]); - %allfreqs = simulateAllFreqs(noalle); - [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); - for ind = 1:nrefInds - %omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd); - [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(ind, ... - ref_cliq_data, cliq_freqs, ref_sep_data, sep_freqs, rowsFromInd, gene_lengths, linkage_model); - osuusTaulu = zeros(1,npops); - pop = ceil(ind/nrefIndsInPop); - osuusTaulu(pop)=1; - %logml = computeIndLogml(omaFreqs, osuusTaulu); - logml = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - for osuus = [0.5 0.25 0.05 0.01] - %[osuusTaulu, logml] = searchBest(osuus, osuusTaulu, omaFreqs, logml); - [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - end - refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu; - refProportions(ind,:) = refProportions(ind,:)./iter; - end -end -refTaulu = zeros(npops,100); -for ind = 1:nrefInds - pop = ceil(ind/nrefIndsInPop); - omanOsuus = refProportions(ind,pop); - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - refTaulu(pop, round(omanOsuus*100)) = refTaulu(pop, round(omanOsuus*100))+1; -end - -% Rounding: -proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt); -proportionsIt = proportionsIt./100; -for ind = 1:ninds - % if sum not equal to one, fix the largest part. - if (PARTITION(ind)>0) && (sum(proportionsIt(ind,:)) ~= 1) - [isoin,indeksi] = max(proportionsIt(ind,:)); - erotus = sum(proportionsIt(ind,:))-1; - proportionsIt(ind,indeksi) = isoin-erotus; - end -end - -% "p-value" for admixture -uskottavuus = zeros(ninds,1); -for ind = 1:ninds - pop = PARTITION(ind); - if pop==0 % an outlier - uskottavuus(ind)=1; - else - omanOsuus = proportionsIt(ind,pop); - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus))); - uskottavuus(ind) = refPienempia / nrefIndsInPop; - end -end - -tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount); - -%viewPartition(proportionsIt, popnames); - -[filename, pathname] = uiputfile('*.mat','Save admixture results as'); -if (filename == 0) & (pathname == 0) - % Cancel was pressed - return -end - -% copy 'baps4_output.baps' into the text file with the same name. -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') -end - -tietue.proportionsIt = proportionsIt; -tietue.pvalue = uskottavuus; % Added by Jing -tietue.admixnpops = npops; -tietue.mixtureType = 'admix'; % added by jing on 09.09.2008 -% save([pathname filename], 'tietue'); -save([pathname filename], 'tietue','-v7.3'); % added by Lu Cheng, 08.06.2012 - - -%---------------------------------------------------------------------------- - - -function [npops] = poistaLiianPienet(npops, rowsFromInd, alaraja) -% Muokkaa tulokset muotoon, jossa outlier yksilät on -% poistettu. Tarkalleen ottaen poistaa ne populaatiot, -% joissa on vähemmän kuin 'alaraja':n verran yksiläit? - -global PARTITION; -global COUNTS; -global SUMCOUNTS; - -popSize=zeros(1,npops); -for i=1:npops - popSize(i)=length(find(PARTITION==i)); -end -miniPops = find(popSize0))); -for n=1:length(korit) - kori = korit(n); - yksilot = find(PARTITION==kori); - PARTITION(yksilot) = n; -end -COUNTS(:,:,miniPops) = []; -SUMCOUNTS(miniPops,:) = []; - -npops = npops-length(miniPops); - -%------------------------------------------------------------------------ - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------- - - -function allFreqs = computeAllFreqs2(noalle) -% Lisää a priori jokaista alleelia -% joka populaation joka lokukseen j 1/noalle(j) verran. - -global COUNTS; -global SUMCOUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS)); -sumCounts = reshape(sumCounts', [1, nloci, npops]); -sumCounts = repmat(sumCounts, [max_noalle, 1 1]); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allFreqs = counts./sumCounts; - -%-------------------------------------------------------------------------- - - -function refData = simulateIndividuals(n,rowsFromInd,allfreqs) -% simuloidaan n yksilää jokaisesta populaatiosta. ( - -npops = size(allfreqs,3); -nloci = size(allfreqs,2); -ninds = n*npops; - -refData = zeros(ninds*rowsFromInd,nloci); -counter = 1; % Pitää kirjaa mille riville seuraavaksi simuloidaan. - -for ind = 1:ninds - pop = ceil(ind/n); - for loc = 1:nloci - for k=0:rowsFromInd-1 - refData(counter+k,loc) = simuloiAlleeli(allfreqs,pop,loc); - end - end - counter = counter+rowsFromInd; -end - -function all = simuloiAlleeli(allfreqs,pop,loc) -% Simuloi populaation pop lokukseen loc alleelin. -freqs = allfreqs(:,loc,pop); -cumsumma = cumsum(freqs); -arvo = rand; -isommat = find(cumsumma>arvo); -all = min(isommat); - -%--------------------------------------------------------------------------- - - -function loggis = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, proportions) -% Calculates the likelihood when the origins are defined by "proportions". - -aux = proportions * ownCliqFreqs; -aux = log(aux); -loggis = sum(aux); - -clear aux; - -aux2 = proportions * ownSepFreqs; -aux2 = log(aux2); -loggis = loggis - sum(aux2); - - -%-------------------------------------------------------------------------- - - -function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi) -% Päivittää osuusTaulun muutoksen jälkeen. - -global COUNTS; -npops = size(COUNTS,3); - -i1 = rem(indeksi,npops); -if i1==0, i1 = npops; end; -i2 = ceil(indeksi / npops); - -osuusTaulu(i1) = osuusTaulu(i1)-osuus; -osuusTaulu(i2) = osuusTaulu(i2)+osuus; - - -%------------------------------------------------------------------------- - - -function [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml) - -ready = 0; -while ready ~= 1 - muutokset = calcChanges(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - [maxMuutos, indeksi] = max(muutokset(1:end)); - if maxMuutos>0 - osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi); - logml = logml + maxMuutos; - else - ready = 1; - end -end - - - -%--------------------------------------------------------------------------- - - -function muutokset = calcChanges(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml) -% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -% muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -% todennäkäisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole -% mitään siirrettävää, on vastaavassa kohdassa rivi nollia. - -global COUNTS; -npops = size(COUNTS,3); - -notEmpty = find(osuusTaulu>0.005); -muutokset = zeros(npops); -empties = ~notEmpty; - -for i1=notEmpty - - osuusTaulu(i1) = osuusTaulu(i1)-osuus; - - for i2 = [1:i1-1 i1+1:npops] - osuusTaulu(i2) = osuusTaulu(i2)+osuus; - loggis = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - muutokset(i1,i2) = loggis-logml; - osuusTaulu(i2) = osuusTaulu(i2)-osuus; - end - - osuusTaulu(i1) = osuusTaulu(i1)+osuus; -end - - -%--------------------------------------------------------------- - - -function dispLine -disp('---------------------------------------------------'); - - -%-------------------------------------------------------------------------- - - -function tulostaAdmixtureTiedot(proportions, uskottavuus, alaRaja, niter) -h0 = findobj('Tag','filename1_text'); -inputf = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outf = get(h0,'String'); clear h0; - -if length(outf)>0 - fid = fopen(outf,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -ninds = length(uskottavuus); -npops = size(proportions,2); -disp(' '); -dispLine; -disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); -disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); -disp(['Data file: ' inputf]); -disp(['Number of individuals: ' num2str(ninds)]); -disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); -disp(' '); -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); - fprintf(fid, '\n'); -end - -ekaRivi = blanks(6); -for pop = 1:npops - ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; -end -ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 -disp(ekaRivi); -for ind = 1:ninds - rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; - if any(proportions(ind,:)>0) - for pop = 1:npops-1 - rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - end - rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; - rivi = [rivi ownNum2Str(uskottavuus(ind))]; - end - disp(rivi); - if fid ~= -1 - fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); - end -end -if fid ~= -1 - fclose(fid); -else - diary off -end - -%------------------------------------------------------ - -function str = proportion2str(prob) -%prob belongs to [0.00, 0.01, ... ,1]. -%str is a 4-mark presentation of proportion. - -if abs(prob)<1e-3 - str = '0.00'; -elseif abs(prob-1) < 1e-3; - str = '1.00'; -else - prob = round(100*prob); - if prob<10 - str = ['0.0' num2str(prob)]; - else - str = ['0.' num2str(prob)]; - end; -end; - -%------------------------------------------------------- - -function g=randga(a,b) -flag = 0; -if a>1 -c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a); -U1=-1; -while flag == 0, -if a<=2.5, -U1=rand;U2=rand; -else -while ~(U1>0 & U1<1), -U1=rand;U2=rand; -U1 = U2 + c5*(1-1.86*U1); -end %while -end %if -W = c2*U2/U1; -if c3*U1+W+(1/W)<=c4, -flag = 1; -g = c1*W/b; -elseif c3*log(U1)-log(W)+W<1, -flag = 1; -g = c1*W/b; -else -U1=-1; -end %if -end %while flag -elseif a==1 -g=sum(-(1/b)*log(rand(a,1))); -else -while flag == 0, -U = rand(2,1); -if U(1)>exp(1)/(a+exp(1)), -g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1))); -if U(2)<=g^(a-1), -flag = 1; -end %if -else -g = ((a+exp(1))*U(1)/((exp(1))^(1/a))); -if U(2)<=exp(-g), -flag = 1; -end %if -end %if -end %while flag -g=g/b; -end %if; - - -%------------------------------------------------- - -function svar=randdir(counts,nc) -% Käyttäesim randdir([10;30;60],3) - -svar=zeros(nc,1); -for i=1:nc - svar(i,1)=randga(counts(i,1),1); -end -svar=svar/sum(svar); - -%-------------------------------------------------- - -function waitALittle -A = rand(500); -gammaln(A); - - -%-------------------------------------------------- - -function [cliq_data, sep_data, cliq_counts, component_mat] = ... - createCliqData(data, gene_lengths, noalle, linkage_model, ... - rowsFromInd) -% cliq_data: cell array, each cell corresponds to one gene. Element (i,j) -% in cell k is the code of the allele combination in the j:th clique in -% gene k, for individual i. - -% sep_data: like cliq_data. i:th separator separates cliques i and i+1. - -% cliq_counts: cell array, each cell corresponds to one gene. Each cell is -% a 3-dimensional array, where the element (i,j,k) is the observed count of -% allele combination i, in clique j, in population k. - -% The coding of the allele combinations: If a clique of 3 sites has -% noalle values 3,2,4, then the allele combinations are given numbers in -% lexicographic order: 111, 112, 113, 114, 121, 122, ..., 324. - -%------------------------------------------------------------------------ - -% cliq_data on cell-array, jossa kukin solu vastaa yht?geeni? Alkio (i,j) -% solussa k merkitsee sen alleelikombinaation koodia, joka yksiläll?i -% havaitaan geenin k klikiss?numero j. - -% cliq_counts on cell-array, jossa myäs kukin solu vastaa yht?geeni? -% Kukin solu on kolmiulotteinen taulukko, jonka alkio (i,j,k) on -% populaatiossa k, ko geenin j:nness?klikiss?havaitun alleelikombinaation -% i lukumäär? - -% Alleelikombinaatioiden koodaus: Jos kolmen position klikiss?on (koko -% datassa) noalle:t 3,2,4, (eli ekassa positiossa alleelit 1-3, tokassa 1-2 -% ja kolmannessa alleelit 1-4), niin alleelikombinaatiot numeroidaan -% leksikograafisessa järjestyksess? 111, 112, 113, 114, 121, 122, ..., -% 324. - -%----------------------------------------------------------------------- - -global PARTITION; - -if sum(gene_lengths) ~= size(data,2) - disp('Error 155'); -end -if ~isa(data,'double') - data = double(data); % Required in matlab 6 -end - -ninds = size(data,1); -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; % Number of cliques in each gene -else - n_cliques = gene_lengths-1; % Use linear model -end -max_noalle = zeros(n_genes,1); % Maximum "clique noalle" in each gene. - -component_mat = zeros(n_genes, max(gene_lengths)); -cum_length = cumsum(gene_lengths); -component_mat(1,1:gene_lengths(1))=1:gene_lengths(1); -for i = 2:n_genes - component_mat(i,1:gene_lengths(i)) = cum_length(i-1)+1:cum_length(i); -end - -for i = 1:n_genes - % What is the largest number of different values that are observed for - % some clique in this gene: - number = 0; - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - positions = component_mat(i, [1 2]); - number = prod(noalle(positions)); - else - % gene_lengths(i) == 1 - positions = component_mat(i,1); - number = noalle(positions); - end - else - for j = 1:n_cliques(i) - if strcmp(linkage_model,'codon'), positions = component_mat(i , j:j+2); - else positions = component_mat(i, j:j+1); - end - - cand = prod(noalle(positions)); - if cand>number - number=cand; - end - end - end - max_noalle(i) = number; -end - -cliq_data = cell(n_genes,1); % An array for each gene. -% (i,j) is the combination which individual i has in clique j (in haploid case..). - -for i = 1:n_genes - if n_cliques(i)<1 - cliq_data{i} = zeros(ninds, 1); - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - positions = component_mat(i, [1 2]); - rows = data(:,positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - else - positions = component_mat(i,1); - rows = data(:,positions); - observations = rows; - end - cliq_data{i}(:,1) = observations; - else - cliq_data{i} = zeros(ninds, n_cliques(i)); - for j = 1:n_cliques(i) - if strcmp(linkage_model,'codon') - positions = component_mat(i,j:j+2); - rows = data(:, positions); - observations = (rows(:,1)-1) * prod(noalle(positions(2:3))) + ... - (rows(:,2)-1) * noalle(positions(3)) + rows(:,3); - else - positions = component_mat(i,j:j+1); - rows = data(:,positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - end - cliq_data{i}(:,j) = observations; - end - end -end - -cliq_counts = cell(n_genes,1); -% (i,j,k) is the count of combination i, in clique j, in population k. - -npops = length(unique(PARTITION)); -for i = 1:n_genes - cliq_counts{i} = zeros(max_noalle(i), max(1,n_cliques(i)), npops); - for j = 1:npops - partition = repmat(PARTITION', [rowsFromInd 1]); - partition = partition(:); % Partition for rows in the data (instead of individuals). - inds_now = find(partition==j); - ninds_now = length(inds_now); - data_now = cliq_data{i}(inds_now,:); - - for k = 1:max(n_cliques(i),1) - apu = zeros(ninds_now, max_noalle(i)); - apu(sub2ind([ninds_now max_noalle(i)],... - (1:ninds_now)', data_now(:,k)))=1; - cliq_counts{i}(:, k, j) = (sum(apu,1))'; - end - end -end - -sep_data = cell(n_genes,1); -n_separators = n_cliques-1; -for i = 1:n_genes - sep_data{i} = zeros(ninds, n_separators(i)); - for j = 1:n_separators(i) - if strcmp(linkage_model, 'codon') - positions = component_mat(i,j+1:j+2); - rows = data(:, positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - else - positions = component_mat(i,j+1); - rows = data(:,positions); - observations = rows; - end - sep_data{i}(:,j) = observations; - end -end - - -%------------------------------------------------------ - - -function [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, ... - gene_lengths, linkage_model) - -% cliq_freqs: cell-array. Element (i,j,k) in cell m is the frequence of -% combination i, in clique j of the m:th gene, in population k. - -% sep_freqs: like cliq_freqs, but for the separators. - -%------------------------------------------------------------------------ - -% cliq_freqs: cell-array, jossa on vastaavat dimensiot kuin cliq_counts:issa. -% solun m alkio (i,j,k) on geenin m, klikin j, kombinaation i, frekvenssi -% populaatiossa k. - -% sep_freqs: cell-array, kuten cl_freqs, mutta separaattoreille. - -%------------------------------------------------------------------------- - -global PARTITION; - -n_genes = length(cliq_counts); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; % Number of cliques in each gene -else - n_cliques = gene_lengths-1; % Use linear model -end -npops = length(unique(PARTITION)); - -cliq_freqs = cell(n_genes,1); -sep_freqs = cell(n_genes,1); - -for i=1:n_genes - - cliq_freqs{i} = zeros(size(cliq_counts{i})); - - positions = component_mat(i,1:gene_lengths(i)); - - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - cliq_noalle = noalle(positions(1)) .* noalle(positions(2)); - sep_noalle = []; - else - % gene_lengths(i) == 1 - cliq_noalle = noalle(positions(1)); - sep_noalle = []; - end - sep_freqs{i} = []; - else - if strcmp(linkage_model, 'codon') - cliq_noalle = noalle(positions(1:end-2)) .* noalle(positions(2:end-1)) .* ... - noalle(positions(3:end)); - sep_noalle = noalle(positions(2:end-2)) .* noalle(positions(3:end-1)); - else - cliq_noalle = noalle(positions(1:end-1)) .* noalle(positions(2:end)); - sep_noalle = noalle(positions(2:end-1)); - end - sep_freqs{i} = zeros(max(sep_noalle), n_cliques(i)-1, npops); - end - - % First clique: - prior = (1 / cliq_noalle(1)) * ones(cliq_noalle(1),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(1),1,:); - for k = 1:npops - simul = randdir(counts_now(:,1,k), cliq_noalle(1)); - cliq_freqs{i}(1:cliq_noalle(1),1,k) = simul; - end - - for j=2:n_cliques(i) - % Obtain freqs for j-1:th separator by marginalization from j-1:th - % clique, and draw values for the frequencies of the j:th clique: - - aux = cliq_freqs{i}(1:cliq_noalle(j-1), j-1, :); % Freqs of the previous clique - aux = reshape(aux, [sep_noalle(j-1), noalle(positions(j-1)), npops]); - - % Freqs for separator by marginalization from the previous clique: - sep_freqs{i}(1:sep_noalle(j-1),j-1,:) = sum(aux,2); - - prior = (1 / cliq_noalle(j)) * ones(cliq_noalle(j),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(j),j,:); - for k = 1:npops - % Simulate conditional frequencies: - for m = 1:sep_noalle(j-1) - if strcmp(linkage_model, 'codon') - values = (m-1)*noalle(positions(j+2))+1:m*noalle(positions(j+2)); - else - values = (m-1)*noalle(positions(j+1))+1:m*noalle(positions(j+1)); - end - simul = randdir(counts_now(values,1,k), length(values)); - cliq_freqs{i}(values,j,k) = simul * sep_freqs{i}(m,j-1,k); % MIETI TARKKAAN! - end - end - end -end - - -%-------------------------------------------------------------------- - - -function [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(... - ind, cl_data, cl_freqs, sep_data, sep_freqs, rowsFromInd, ... - gene_lengths, linkage_model) - -% ownCliqFreqs is (npops * (n_cliques*rowsFromInd)) table, where each column -% contains the frequencies of the corresponding clique_combination, in -% different populations. - -% ownSepFreqs is (npops * (n_seps*rowsFromInd)) table, like ownCliqFreqs. - -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; - n_cliques = max([n_cliques ones(n_genes,1)], [], 2); % for genes shorter than clique -else - n_cliques = gene_lengths-1; % Use linear model. - n_cliques = max([n_cliques ones(n_genes,1)], [], 2); -end - -total_n_cliques = sum(n_cliques); -npops = size(cl_freqs{1},3); - -ownCliqFreqsXX = zeros(1, total_n_cliques*rowsFromInd, npops); - -pointer = 1; -for i = 1:n_genes - ind_data = cl_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd , :); - for j = 1:n_cliques(i) % MUUTA! - for k = 1:rowsFromInd - code = ind_data(k,j); - ownCliqFreqsXX(1,pointer,:) = cl_freqs{i}(code,j,:); - pointer = pointer+1; - end - end -end - -ownCliqFreqs = (squeeze(ownCliqFreqsXX))'; - -n_separators = n_cliques-1; -total_n_separators = sum(n_separators); - -ownSepFreqsXX = zeros(1, total_n_separators*rowsFromInd, npops); - -pointer = 1; -for i = 1:n_genes - ind_data = sep_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd , :); - for j = 1:n_separators(i) - for k = 1:rowsFromInd - code = ind_data(k,j); - ownSepFreqsXX(1,pointer,:) = sep_freqs{i}(code,j,:); - pointer = pointer+1; - end - end -end - -if (total_n_separators*rowsFromInd)==1 - ownSepFreqs = (squeeze(ownSepFreqsXX)); -else - ownSepFreqs = (squeeze(ownSepFreqsXX))'; -end - - -%------------------------------------------------------------------------- - - -function exp_cliq_freqs = computeExpectedFreqs(cliq_counts, ... - noalle, component_mat, gene_lengths, linkage_model) - -% Returns the expected values for the clique and separator frequencies in -% different populations. - -% exp_cliq_freqs: cell-array. Element (i,j,k) in cell m is the expected -% frequence of combination i, in clique j of the m:th gene, in -% population k. - -n_genes = length(gene_lengths); - -if strcmp(linkage_model, 'codon') - n_cliques = gene_lengths-2; -else - n_cliques = gene_lengths-1; % Linear model -end - -npops = size(cliq_counts{1},3); -exp_cliq_freqs = cell(n_genes,1); - -for i = 1:n_genes - - exp_cliq_freqs{i} = zeros(size(cliq_counts{i})); - positions = component_mat(i,1:gene_lengths(i)); - - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - cliq_noalle = noalle(positions(1)) .* noalle(positions(2)); - else - % gene_lengths(i) == 1 - cliq_noalle = noalle(positions(1)); - end - else - if strcmp(linkage_model, 'codon') - cliq_noalle = noalle(positions(1:end-2)) .* noalle(positions(2:end-1)) .* ... - noalle(positions(3:end)); - else - cliq_noalle = noalle(positions(1:end-1)) .* noalle(positions(2:end)); - end - end - - for j = 1:max(1, n_cliques(i)) - prior = (1 / cliq_noalle(j)) * ones(cliq_noalle(j),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(j),j,:); - exp_cliq_freqs{i}(1:cliq_noalle(j),j,:) = ... - counts_now ./ repmat(sum(counts_now,1), [cliq_noalle(j) 1 1]); - end -end - - -%---------------------------------------------------------- - - -function [ref_cliq_data, ref_sep_data] = ... - simulateLinkageIndividuals(n, rowsFromInd, exp_cliq_freqs, gene_lengths, ... - noalle, component_mat, linkage_model) - -% Simulates n individuals from each population using expected frequencies -% for cliques and separators. - -% ref_cliq_data: cell array, each cell corresponds to one gene. Elements -% ((i-1)*rowsFromInd+1:i*rowsFromInd, j) in cell k are the codes of the allele -% combinations in the j:th clique in gene k, for individual i. - -% ref_sep_data: like ref_cliq_data. i:th separator separates cliques i and i+1. - -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; -else - n_cliques = gene_lengths-1; % Linear model -end -npops = size(exp_cliq_freqs{1},3); -ninds = n*npops; - -ref_cliq_data = cell(n_genes,1); -ref_sep_data = cell(n_genes,1); - -for i = 1:n_genes - ref_cliq_data{i} = zeros(ninds*rowsFromInd, max(n_cliques(i),1)); % Added: rowsFromInd - - positions = component_mat(i,1:gene_lengths(i)); - - if strcmp(linkage_model,'codon') - sep_noalle = noalle(positions(2:end-2)) .* noalle(positions(3:end-1)); - else - sep_noalle = noalle(positions(2:end-1)); - end - ref_sep_data{i} = zeros(ninds*rowsFromInd, n_cliques(i)-1); % Added: rowsFromInd - - for ind = 1:ninds - pop = ceil(ind/n); - - % First clique: - freqs = exp_cliq_freqs{i}(:,1,pop); - freqs = repmat(freqs, [1 rowsFromInd]); - codes = simulateCodes(freqs); - ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, 1) = codes; - - for j = 2:n_cliques(i) - previous_cliq = ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j-1); - - % Value for j-1:th separator: - sep_codes = rem(previous_cliq, sep_noalle(j-1)); - sep_codes(find(sep_codes==0)) = sep_noalle(j-1); - ref_sep_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j-1) = sep_codes; - - % Value for j:th clique: - if strcmp(linkage_model,'codon') - freqs = zeros(noalle(positions(j+2)),rowsFromInd); - values = zeros(noalle(positions(j+2)),rowsFromInd); - for k = 1:rowsFromInd - values(:,k) = ((sep_codes(k)-1)*noalle(positions(j+2))+1 : sep_codes(k)*noalle(positions(j+2)))'; - freqs(:,k) = exp_cliq_freqs{i}(values(:,k), j, pop); - end - freqs = freqs ./ repmat(sum(freqs,1), [noalle(positions(j+2)) 1]); - else - freqs = zeros(noalle(positions(j+1)),rowsFromInd); - values = zeros(noalle(positions(j+1)),rowsFromInd); - for k = 1:rowsFromInd - values(:,k) = ((sep_codes(k)-1)*noalle(positions(j+1))+1 : sep_codes(k)*noalle(positions(j+1)))'; - freqs(:,k) = exp_cliq_freqs{i}(values(:,k), j, pop); - end - freqs = freqs ./ repmat(sum(freqs,1), [noalle(positions(j+1)) 1]); - end - codes = simulateCodes(freqs); - codes = values(sub2ind(size(values),codes,(1:rowsFromInd)')); - ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j) = codes; - end - end -end - -function codes = simulateCodes(freqs) -% Freqs is a table where each column is a distribution. The -% number of columns in freqs must be equal to rowsFromInd. -% A value is drawn from each distribution in different columns. The values -% are saved in codes, which is (rowsFromInd*1) table. - -[nrows, rowsFromInd] = size(freqs); -codes = nrows+1-sum(cumsum(freqs)>repmat(rand(1,rowsFromInd),[nrows 1]),1); -codes = codes'; \ No newline at end of file diff --git a/matlab/compileBaps6.m b/matlab/compileBaps6.m deleted file mode 100644 index 182d481..0000000 --- a/matlab/compileBaps6.m +++ /dev/null @@ -1,6 +0,0 @@ -if ~exist('BAPS_package','dir') - mkdir('BAPS_package'); -end - -mcc -m ./general/baps6.m -a ./admixture -a ./general -a ./graph -a ./independent -a ./linkage -a ./parallel -a ./spatial -d ./BAPS_package - diff --git a/matlab/general/baps4cbf.m b/matlab/general/baps4cbf.m deleted file mode 100644 index 3f67d5e..0000000 --- a/matlab/general/baps4cbf.m +++ /dev/null @@ -1,993 +0,0 @@ -function baps4cbf(action) - -base = findobj('Tag','base_figure'); -%setWindowOnTop(base,'false') - -switch action - - case 'mix1_button' - greedyMix(-1); - - case 'mix2_button' - greedyPopMix; - - case 'trained_button' - trainedMix; - - case 'spatial_button' - spatialMixture; - showmethebaps; - - case 'spatial2_button' - spatialPopMixture; - showmethebaps; - - case 'linkage_button' - linkageMixture_speed; - %linkageMixture_ultraspeed; - showmethebaps; - - case 'admix1_button' - admix1(-1); - showmethebaps; - case 'admix2_button' - admix2; - - case 'compare_menu' - compare; - case 'compare_admix_menu' - compare_admix; - - case 'load_mixture_menu' - loadMixture; - - case 'load_admixture_menu' - loadAdmixture; - - case 'load_spatial_menu' - loadSpatial; - - case 'output_menu' - asetaOutputFile; - - case 'remove_outputfile_menu' - poistaOutputFile; - - case 'close_menu' - closeFile; - - case 'exit_menu' - h0 = findobj('Tag','base_figure'); delete(h0); - h0 = findobj('Tag','image_figure'); delete(h0); - - case 'loadfigure_menu' - loadFigure; - - case 'plot_coordinates_menu' - plotCoordinates; - - case 'partitio_menu' - viewPartitio; - - case 'admix_menu' - viewAdmixture; - - case 'likelihood_menu' - viewLoghood; - - case 'energy_menu' - viewEnergy; - - case 'geneflow_menu' - viewGeneflow; - - case 'voronoi_menu' - voronoiTessellation; - - case 'varmuus_menu' - localUncertainty; - - case 'changecolor_menu' - changeColor; - - case 'helpdoc' - openHelpDoc; - - case 'helponline' - openHelpHtml; - - case 'about' - openAboutWindow; - - case 'calculate_kl' - calculateDis('KL'); - - case 'calculate_nei' - calculateDis('Nei'); - - case 'calculate_hamming' - calculateDis('Hamming'); - - case 'upgma_menu' - viewPhylogeny('upgma'); - - case 'nj_menu' - viewPhylogeny('nj'); - - case 'mutationplot_menu' - mutationPlot(-1); - - case 'fixk_menu' - goToFixedK; - - case 'partitioncompare_menu' - goToPartitionCompare; -end - -return - -%-------------------------------------------------------------------------- -%KUVIEN LATAAMINEN -%-------------------------------------------------------------------------- - - -function loadFigure -waitALittle; -[filename,pathname] = uigetfile('*.fig','Load Figure'); -if (sum(filename)==0) || (sum(pathname)==0) - return; -end -fig_file_name = [pathname filename]; -open(fig_file_name); - -% ---------------------------- -% Old version -% ---------------------------- -% % Loads previously saved figure. -% -% waitALittle; -% [filename,pathname] = uigetfile('*.mat','Load Figure'); -% if (sum(filename)==0) || (sum(pathname)==0) -% return; -% end -% fig_file_name = [pathname filename]; -% %Figure file format must be *.mat. Ensure it: -% isMat = isTheFileMatFile(fig_file_name); -% if isMat == 0 -% msgbox(['Only figures that have been saved in BAPS can be loaded in BAPS. ' ... -% 'Those figures have extension ".mat".'],'Error', ... -% 'error'); -% return; -% end; -% struct_array = load([pathname filename]); -% if isfield(struct_array,'tiedot') %Matlab versio -% tiedot = struct_array.tiedot; -% if ~isfield(tiedot,'info') -% disp('Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'info') %Mideva versio -% tiedot = struct_array; -% else -% disp('Incorrect file format'); -% return; -% end -% -% if isfield(tiedot, 'rows') -% rows = tiedot.rows; -% partition = tiedot.info; -% popnames = tiedot.popnames; -% viewPopMixPartition(partition, rows, popnames); -% else -% popnames = tiedot.popnames; -% info = tiedot.info; -% if (size(info,2)>1) -% %info on osuudet -% osuudet = info; -% viewPartition(osuudet,popnames); -% else -% info = round(info); -% partition = info; -% viewMixPartition(partition, popnames); -% end -% end - - -function isMat = isTheFileMatFile(filename) -%Checks that the file 'filename' is of the -%*.mat format. If so, isMat = 1. Otherwise, isMat = 0. - -len = length(filename); -if len < 5 - isMat = 0; return; -end; -ending = filename(end-3:end); -if isequal(ending,'.mat') - isMat = 1; -else - isMat = 0; -end; - -%-------------------------------------------------------------------- - -function asetaOutputFile -waitALittle; -[filename, pathname] = uiputfile('*.txt', 'Specify output file'); -if filename==0 - return -end - -h0 = findobj('Tag','filename2_text'); -set(h0,'String',[pathname filename]); - - -%--------------------------------------------------- - - -function poistaOutputFile -h0 = findobj('Tag','filename2_text'); -set(h0,'String',''); - - -%----------------------------------------------------- - -function plotCoordinates - -waitALittle; -[filename, pathname] = uigetfile('*.txt', 'Load Coordinate File'); -if filename==0 - return -end -X = load([pathname filename]); -if size(X,2)~=2 - disp('Incorrect file format'); - return -end - -waitALittle; -[filename, pathname] = uigetfile('*.mat', 'Load mixture clustering of individuals'); -%load([pathname filename],'c'); -struct_array = load([pathname filename]); -if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') - disp('Incorrect file format'); - return - end -elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - disp('Incorrect file format'); - return - end -else - disp('Incorrect file format'); - return; -end -PARTITION = c.PARTITION; -if length(PARTITION) ~= size(X,1) - disp('Incorrect number of coordinate pairs.'); - return -end -% h0 = image_figure; -hold on; -for i=1:length(PARTITION) - if X(i,1)>=0 - plot(X(i,1),X(i,2),'Color',[.8 .8 .8]); - text(X(i,1),X(i,2),num2str(PARTITION(i))); - end -end -hold off; - - -%-------------------------------------------------------------------------- -%-------------------------------------------------------------------------- - -function voronoiTessellation -% Tekee tulostiedostosta voronoi tessellaation. -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -% waitALittle; -% [filename, pathname] = uigetfile('*.mat', 'Load mixture clustering'); -% %load([pathname filename],'c'); -% struct_array = load([pathname filename]); -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'PARTITION') %Mideva versio -% c = struct_array; -% if ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% else -% disp('Incorrect file format'); -% return; -% end -% -% if ~isfield(c, 'pointers') -% disp('Coordinate data missing from the result file'); -% return; -% end - -pointers = c.pointers; vorPoints = c.vorPoints; vorCells = c.vorCells; -coordinates = c.coordinates; names = c.names; - -if isequal(c.mixtureType, 'pop') || isequal(c.mixtureType, 'spatialPop') - PARTITION = c.groupPartition; -else - PARTITION = c.PARTITION; -end - -talle = questdlg(['Do you want names to be visible in the colored ' ... - 'Voronoi tessellation?'], 'Names visible?', 'Yes', 'No', 'Yes'); - -if isequal(talle,'No') - names = -1; -end -vorPlot(vorPoints, vorCells, PARTITION, pointers, coordinates, names); - -%-------------------------------------------------------------------------- - -function localUncertainty -% Tekee tulostiedostosta kolmiulotteisen lokaalia epävarmuutta kuvaavan -% kuvan. - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -% waitALittle; -% [filename, pathname] = uigetfile('*.mat', 'Load mixture clustering'); -% %load([pathname filename],'c'); -% struct_array = load([pathname filename]); -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'PARTITION') %Mideva versio -% c = struct_array; -% if ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% else -% disp('Incorrect file format'); -% return; -% end -% -% if ~isfield(c, 'pointers') -% disp('Coordinate data missing from the result file'); -% return; -% end - -pointers = c.pointers; vorPoints = c.vorPoints; vorCells = c.vorCells; -coordinates = c.coordinates; names = c.names; -varmuus = c.varmuus; - -if isequal(c.mixtureType, 'pop') || isequal(c.mixtureType, 'spatialPop') - PARTITION = c.groupPartition; -else - PARTITION = c.PARTITION; -end - -talle = questdlg('Do you want names to be visible in the plot?', ... - 'Names visible?', 'Yes', 'No', 'Yes'); - -if isequal(talle,'No') - names = -1; -end - -plotVarmuus(vorPoints, vorCells, pointers, varmuus, coordinates, ... - PARTITION, names); - - - -%-------------------------------------------------------------------------- - -function viewPartitio - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); - -% waitALittle; -% [filename, pathname] = uigetfile('*.mat', 'Load mixture clustering'); -% %load([pathname filename],'c'); -% if (sum(filename)==0) || (sum(pathname)==0) -% return; -% end -% struct_array = load([pathname filename]); -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'PARTITION') %Mideva versio -% c = struct_array; -% if ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% else -% disp('Incorrect file format'); -% return; -% end - -if isequal(c.mixtureType, 'pop') || isequal(c.mixtureType, 'spatialPop') - viewPopMixPartition(c.groupPartition, c.rows, c.popnames); -elseif isequal(c.mixtureType, 'trained') - viewMixPartition(c.PARTITION, []); -else - viewMixPartition(c.PARTITION, c.popnames); -end - -function openHelpDoc -% s = fileparts(which('BAPS4manual.doc')); -% helpwin(s); -if strcmp(computer,'PCWIN') - % s = fileparts(which('baps4.exe')); - % winopen([s '\BAPS4manual.doc']); - winopen('BAPS5manual.doc'); -end - -function openHelpHtml -% web http://www.rni.helsinki.fi/~jic/bapspage.html -% web('http://www.rni.helsinki.fi/~jic/bapspage.html','-browser') -% web http://www.rni.helsinki.fi/~jic/bapspage.html -new; -if strcmp(computer,'PCWIN') - dos('start http://www.abo.fi/fak/mnf/mate/jc/software/baps.html'); % For the compiled version -end - -function openAboutWindow -info{1}=''; -info{2}='Bayesian Analysis of Population Structure (BAPS)'; -info{3}=''; -info{4}='Version 6.0'; -info{5}=''; -info{6}='Author: Jukka Corander, Pekka Marttinen, Jukka Siren, Jing Tang and Lu Cheng'; -info{7}=''; -info{8}='Copyright 2005-2012. All Rights Reserved'; -info{9}=''; -info{10}='Please view the reference page when using as part of research'; -info{11}='at http://www.helsinki.fi/bsg/software/BAPS'; -info{12} =''; -helpdlg(info,'About'); - -%-------------------------------------------------------------------------- - -function viewAdmixture - -% waitALittle; -% [filename, pathname] = uigetfile('*.mat', 'Load admixture results.'); -% if (sum(filename)==0) || (sum(pathname)==0) -% return; -% end -% %load([pathname filename],'c'); -% struct_array = load([pathname filename]); -disp('---------------------------------------------------'); -disp('Viewing the admixture result...'); -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -h0 = findobj('Tag','filename1_text'); -filename = get(h0,'String'); - -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'proportionsIt') -% disp('*** ERROR: Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'proportionsIt') %Mideva versio -% c = struct_array; -% if ~isfield(c,'proportionsIt') -% disp('*** ERROR: Incorrect file format'); -% return -% end -% else -% disp('*** ERROR: Incorrect file format'); -% return; -% end - -% mixtureType = c.mixtureType; -proportionsIt = c.proportionsIt; -popnames = c.popnames; partition = c.PARTITION; -mixtureType = c.mixtureType; -% if strcmp(mixtureType,'linkage_mix') % For bacterial clustering data -% if isempty(popnames) || size(popnames,1)==size(partition,1) -%if strcmp(mixtureType, 'admix') - if isempty(popnames) - ninds = size(partition,1); - popnames=cell(ninds,2); - for ind=1:ninds - popnames{ind,1}=cellstr(num2str(ind)); - end - popnames(:,2)=num2cell((1:ninds)'); - end - - npops = c.npops; % all the clusters including outliers - admixnpops = c.admixnpops; - - if ~isfield(c,'pvalue') % compatiable with old data - disp('*** WARNING: pvalue is not found in the admixture result.'); - disp('*** WARNING: Old admixture file.'); - pvalue = ones(size(partition,1),1); - else - pvalue = c.pvalue; - end - view_admixture(proportionsIt,npops,admixnpops, ... - popnames,partition,pvalue,filename); -%else -% disp('*** ERROR: incorrect admixture data.'); - % put which variable as the input? - % admixnpops = c.admixnpops; -% npops = c.npops; -% talle = questdlg(['Do you want individual names to be visible in the admixture ' ... -% 'result graphics?'], 'Names visible?', 'Yes', 'No', 'Yes'); -% if isequal(talle,'No') -% viewPartition2(proportionsIt, [], npops, partition, filename); -% else -% viewPartition2(proportionsIt, popnames, npops, partition, filename); -% end -% end - -%-------------------------------------------------------------------------- -function viewLoghood - view_loglikelihood; - -function viewEnergy - view_energy; - %-------------------------------------------------------------------------- - function viewGeneflow - view_geneflow; - -%-------------------------------------------------------------------------- -function changeColor() - h0 = findobj('Tag','base_figure'); - c = uisetcolor(h0,'Change color'); - h1 = findobj('Tag','datafile_text'); - h2 = findobj('Tag','outputfile_text'); - h3 = findobj('Tag','filename1_text'); - h4 = findobj('Tag','filename2_text'); - set(h1,'BackGroundColor',c); - set(h2,'BackGroundColor',c); - set(h3,'BackGroundColor',c); - set(h4,'BackGroundColor',c); - drawnow; -%----------------------------------------------------------------------- -function showmethebaps() -h0 = findobj('Tag','base_figure'); -%setWindowOnTop(h0,'true') -goToDefault -h0 = findobj('Tag','load_menu'); -set(h0,'UserData',[]); -%setWindowOnTop(h0,'false') - - -%----------------------------------------------------------------------- -function loadMixture - -waitALittle; -[filename, pathname] = uigetfile('*.mat', 'Load mixture result'); -%load([pathname filename],'c'); -if (sum(filename)==0) || (sum(pathname)==0) - return; -end -disp('---------------------------------------------------'); -disp('In loading the mixture result...'); -struct_array = load([pathname filename]); -if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') - disp('*** ERROR: Incorrect file format'); - return - end -elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - disp('*** ERROR: Incorrect file format'); - return - end -else - disp('*** ERROR: Incorrect file format'); - return; -end -% Save gathered information to 'mixture_menu's UserData: -h0 = findobj('Tag','load_menu'); -set(h0,'UserData',c); -clear c; -%Set the name of the datafile to screen. -h1 = findobj('Tag','filename1_text'); -if exist('pathname') - filename = [pathname filename]; -end -set(h1,'String',filename); -h1 = findobj('Tag','datafile_text'); -set(h1,'String','Mixture result:'); -disp('Mixture result loaded.'); -goToMixtureAnalysis - -%-------------------------------------------------------------------------- -function goToMixtureAnalysis -set(findobj('Tag','graph_menu'), 'Enable','on'); -set(findobj('Tag','partitio_menu'), 'Enable','on'); -set(findobj('Tag','likelihood_menu'), 'Enable','on'); -set(findobj('Tag','energy_menu'), 'Enable','on'); -set(findobj('Tag','distances_menu'), 'Enable','on'); -set(findobj('Tag','kl_menu'), 'Enable','on'); -set(findobj('Tag','nei_menu'), 'Enable','on'); -set(findobj('Tag','close_menu'), 'Enable','on'); -set(findobj('Tag','phylogeny_menu'), 'Enable','on'); -set(findobj('Tag','upgma_menu'), 'Enable','on'); -set(findobj('Tag','nj_menu'), 'Enable','on'); -set(findobj('Tag','geneflow_menu'), 'Enable','off'); -set(findobj('Tag','admix_menu'), 'Enable','off'); -set(findobj('Tag','mutationplot_menu'), 'Enable','on'); - - - -%-------------------------------------------------------------------------- -function goToDefault -set(findobj('Tag','graph_menu'), 'Enable','off'); -set(findobj('Tag','distances_menu'), 'Enable','off'); - - - - -%-------------------------------------------------------------------------- -function loadSpatial -% Tekee tulostiedostosta voronoi tessellaation. - -waitALittle; -[filename, pathname] = uigetfile('*.mat', 'Load spatial mixture/admixture clustering'); -%load([pathname filename],'c'); -if (sum(filename)==0) || (sum(pathname)==0) - return; -end -struct_array = load([pathname filename]); -disp('---------------------------------------------------'); -disp('In loading the spatial mixture/admixture result...'); -if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'PARTITION') || ~isfield(c,'mixtureType') - disp('*** ERROR: Incorrect file format'); - return - end - if ~strcmp(c.mixtureType,'spatial') && ~strcmp(c.mixtureType,'spatialPop') - disp('*** ERROR: Incorrect file format'); - return - end -elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - disp('*** ERROR: Incorrect file format'); - return - end -else - disp('*** ERROR: Incorrect file format'); - return; -end - -if ~isfield(c, 'pointers') - disp('*** ERROR: Coordinate data missing from the result file'); - return; -end - -% Save gathered information to 'spatialmixture_menu's UserData: -h0 = findobj('Tag','load_menu'); -set(h0,'UserData',c); - -%Set the name of the datafile to screen. -h1 = findobj('Tag','filename1_text'); -if exist('pathname') - filename = [pathname filename]; -end -set(h1,'String',filename); -h1 = findobj('Tag','datafile_text'); -if isfield(c,'admixnpops') - set(h1,'String','Spatial AdMixture Result:'); - disp('Spatial admixture result loaded.'); - goToSpatialAdMixtureAnalysis -else - set(h1,'String','Spatial Mixture Result:'); - disp('Spatial mixture result loaded.'); - goToSpatialMixtureAnalysis -end -clear c; - - -%-------------------------------------------------------------------------- -function goToSpatialMixtureAnalysis -set(findobj('Tag','graph_menu'), 'Enable','on'); -set(findobj('Tag','plot_coordinates_menu'), 'Enable','on'); -set(findobj('Tag','voronoi_menu'), 'Enable','on'); -set(findobj('Tag','varmuus_menu'), 'Enable','on'); -set(findobj('Tag','distances_menu'), 'Enable','on'); -set(findobj('Tag','kl_menu'), 'Enable','on'); -set(findobj('Tag','nei_menu'), 'Enable','on'); -set(findobj('Tag','close_menu'), 'Enable','on'); -set(findobj('Tag','geneflow_menu'), 'Enable','off'); -set(findobj('Tag','admix_menu'), 'Enable','off'); -set(findobj('Tag','likelihood_menu'), 'Enable','on'); -set(findobj('Tag','partitio_menu'), 'Enable','off'); - -%-------------------------------------------------------------------------- -function goToSpatialAdMixtureAnalysis -set(findobj('Tag','graph_menu'), 'Enable','on'); -set(findobj('Tag','plot_coordinates_menu'), 'Enable','on'); -set(findobj('Tag','voronoi_menu'), 'Enable','on'); -set(findobj('Tag','varmuus_menu'), 'Enable','on'); -set(findobj('Tag','distances_menu'), 'Enable','on'); -set(findobj('Tag','kl_menu'), 'Enable','on'); -set(findobj('Tag','nei_menu'), 'Enable','on'); -set(findobj('Tag','close_menu'), 'Enable','on'); -set(findobj('Tag','geneflow_menu'), 'Enable','off'); -set(findobj('Tag','admix_menu'), 'Enable','on'); -set(findobj('Tag','likelihood_menu'), 'Enable','on'); -set(findobj('Tag','partitio_menu'), 'Enable','off'); -%-------------------------------------------------------------------------- -function closeFile -h0 = findobj('Tag','load_menu'); -set(h0,'UserData',[]); -h0 = findobj('Tag','datafile_text'); -set(h0,'String','Data File:'); -h0 = findobj('Tag','filename1_text'); -set(h0,'String',''); - -set(findobj('Tag','close_menu'), 'Enable','off'); -set(findobj('Tag','graph_menu'), 'Enable','off'); -set(findobj('Tag','plot_coordinates_menu'), 'Enable','off'); -set(findobj('Tag','partitio_menu'), 'Enable','off'); -set(findobj('Tag','likelihood_menu'), 'Enable','off'); -set(findobj('Tag','admix_menu'), 'Enable','off'); -set(findobj('Tag','geneflow_menu'), 'Enable','off'); -set(findobj('Tag','voronoi_menu'), 'Enable','off'); -set(findobj('Tag','varmuus_menu'), 'Enable','off'); - -set(findobj('Tag','distances_menu'), 'Enable','off'); -set(findobj('Tag','kl_menu'), 'Enable','off'); -set(findobj('Tag','nei_menu'), 'Enable','off'); - - -%----------------------------------------------------------------------- -function loadAdmixture -waitALittle; -[filename, pathname] = uigetfile('*.mat', 'Load admixture results.'); -if (sum(filename)==0) || (sum(pathname)==0) - return; -end -%load([pathname filename],'c'); -disp('---------------------------------------------------'); -disp('In loading the admixture result...'); -struct_array = load([pathname filename]); - -if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'proportionsIt') - disp('*** ERROR: Incorrect file format'); - return - end -elseif isfield(struct_array,'proportionsIt') %Mideva versio - c = struct_array; - if ~isfield(c,'proportionsIt') - disp('*** ERROR: Incorrect file format'); - return - end -elseif isfield(struct_array,'tietue') - c = struct_array.tietue; - if ~isfield(c,'proportionsIt') - disp('*** ERROR: Incorrect file format'); - return - end -else - disp('*** ERROR: Incorrect file format'); - return; -end - -% Save gathered information to 'mixture_menu's UserData: -h0 = findobj('Tag','load_menu'); -set(h0,'UserData',c); -clear c; -%Set the name of the datafile to screen. -h1 = findobj('Tag','filename1_text'); -if exist('pathname') - filename = [pathname filename]; -end -set(h1,'String',filename); -h1 = findobj('Tag','datafile_text'); -set(h1,'String','Admixture result:'); -disp('Admixture result loaded.'); -goToAdmixtureAnalysis - -%-------------------------------------------------------------------------- -function goToAdmixtureAnalysis -set(findobj('Tag','graph_menu'), 'Enable','on'); -set(findobj('Tag','admix_menu'), 'Enable','on'); -set(findobj('Tag','geneflow_menu'), 'Enable','on'); -% set(findobj('Tag','distances_menu'), 'Enable','on'); -% set(findobj('Tag','kl_menu'), 'Enable','on'); -% set(findobj('Tag','nei_menu'), 'Enable','on'); -set(findobj('Tag','close_menu'), 'Enable','on'); -% set(findobj('Tag','likelihood_menu'), 'Enable','on'); - - -%-------------------------------------------------------------------------- -function calculateDis(type) -if exist('baps4_output.baps','file') - delete('baps4_output.baps') -else - diary('baps4_output.baps') -end - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -npops = c.npops; -COUNTS = c.COUNTS; -adjprior = c.adjprior; -data = c.data; -partition = c.PARTITION; -clear c; - -if npops > 1 - dist_mat = zeros(npops, npops); - maxnoalle = size(COUNTS,1); - nloci = size(COUNTS,2); - d = zeros(maxnoalle, nloci, npops); - - - switch type - case 'KL' - prior = adjprior; - prior(find(prior==1))=0; - nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - prior(1,nollia)=1; - for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end - - ekarivi = num2str(npops); - disp('--------------------------------------'); - disp('KL-divergence matrix in PHYLIP format:'); - disp('--------------------------------------'); - disp(ekarivi); - for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - dist_mat(pop1,pop2) = div; - end - end - - case 'Nei' - for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end - - ekarivi = num2str(npops); - disp('--------------------------------------'); - disp('Nei-divergence matrix in PHYLIP format:'); - disp('--------------------------------------'); - disp(ekarivi); - for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div1 = sum(sum(dist1.*dist2)); - div2 = sqrt(sum(sum(dist1.^2)))*sqrt(sum(sum(dist2.^2))); - div = -log(div1/div2); - dist_mat(pop1,pop2) = div; - end - end - case 'Hamming' - ekarivi = num2str(npops); - disp('--------------------------------------'); - disp('Hamming distance matrix in PHYLIP format:'); - disp('--------------------------------------'); - disp(ekarivi); - for pop1 = 1:npops - for pop2 = 1:pop1-1 - dist_mat(pop1,pop2) = hamming_dist(data(logical(partition==pop1),[1:end-1]),... - data(logical(partition==pop2),[1:end-1])); - end - end - end - -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); -end -diary off - -% --------------------------------------------------------------------- -% Save the result. -% Jing - 26.12.2005 -talle = questdlg(['Do you want to save the distance matrix in PHYLIP format? '], ... - 'Save distance matrix?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%%waitALittle; - [filename, pathname] = uiputfile('*.txt','Save results as'); - - if (sum(filename)==0) || (sum(pathname)==0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename]) - delete('baps4_output.baps') - end - end -else - delete('baps4_output.baps') -end - -% ------------------------------------------------------------------------- -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - -% ------------------------------------------------------------------------- -function dist = hamming_dist(data1,data2) -[length1,nloci] = size(data1); -length2 = size(data2,1); -dist1 = 0; -for i = 1:length1 - dist2 = 0; - for j = 1:length2 - dist2 = dist2 + sum(data1(i,:)~=data2(j,:))/nloci; - end - dist1 = dist1 + dist2/length2; -end -dist = dist1/length1; -%-------------------------------------------------------------------------- -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - - - diff --git a/matlab/general/baps6.m b/matlab/general/baps6.m deleted file mode 100644 index 7586873..0000000 --- a/matlab/general/baps6.m +++ /dev/null @@ -1,377 +0,0 @@ -function baps5(varargin) -% This is the machine-generated representation of a Handle Graphics object -% and its children. Note that handle values may change when these objects -% are re-created. This may cause problems with any callbacks written to -% depend on the value of the handle at the time the object was saved. -% -% To reopen this object, just type the name of the M-file at the MATLAB -% prompt. The M-file and its associated MAT-file must be on your path. - -% NEW: script version was added. 09.07.2007 - -% Script function has been added by Lu Cheng, 11.03.2010 -% The script function is designed for semi-supervised clustering in the -% trained clustering module, it adopts a codon linkage model for the DNA -% sequence data -% The calling command: baps5 -semi script_file - -global PARAMETERS; PARAMETERS = []; -global SCRIPT_MODE; SCRIPT_MODE = []; - -if nargin == 1 % script version - readScript(varargin{1}); - return -else if nargin==2 - %----------------added by Lu Cheng, 11.03.2010 - if isequal(varargin{1},'-semi'); - script_file = varargin{2}; - PARAMETERS = semiReadScript(script_file); - SCRIPT_MODE = true; - trainedMix - SCRIPT_MODE = []; - PARAMETERS = []; - else - disp(cat(2,'Unknown option: ',varargin{1})); - end - return - %--------------------------------------------- - end -end - -% load baps4 - -% Base frame -h0 = figure('Color',[0.8 0.8 0.8], ... - 'MenuBar','none', ... - 'Position',[364 175 750 500], ... - 'Resize','on', ... - 'Tag','base_figure',... - 'Name','Bayesian Analysis of Population Structure',... - 'NumberTitle','off',... - 'Color',[.7 .9 .7]); - -% File menu -h1 = uimenu('Parent',h0, ... - 'Label','File', ... - 'Tag','file_menu'); -h2 = uimenu('Parent',h1, ... - 'Label','Load Result', ... - 'Tag','load_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf load_mixture_menu', ... - 'Label','Mixture result', ... - 'Tag','mixture_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf load_spatial_menu', ... - 'Label','Spatial mixture result', ... - 'Tag','spatialmixture_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf load_admixture_menu', ... - 'Label','Admixture result', ... - 'Tag','admixture_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','on',... - 'Label','Compare Results', ... - 'Tag','compare_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf compare_menu', ... - 'Label','Mixture results', ... - 'Tag','compare_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf compare_admix_menu', ... - 'Label','AdMixture results', ... - 'Tag','compare_admix_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off',... - 'Callback','baps4cbf close_menu', ... - 'Label','Close Results', ... - 'Tag','close_menu'); -h2 = uimenu('Parent',h1, ... - 'Label','Output File', ... - 'Tag','output_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf output_menu', ... - 'Label','Set', ... - 'Tag','set_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','baps4cbf remove_outputfile_menu', ... - 'Label','Remove', ... - 'Tag','remove_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf exit_menu', ... - 'Label','Exit', ... - 'Tag','exit_menu'); - -% Distances menu -h1 = uimenu('Parent',h0, ... - 'Enable','off',... - 'Label','Distances', ... - 'Tag','distances_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf calculate_kl', ... - 'Label','KL distance', ... - 'Tag','kl_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf calculate_nei', ... - 'Label','Nei distance', ... - 'Tag','nei_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf calculate_hamming', ... - 'Label','Hamming distance', ... - 'Tag','hamming_menu'); - -% Graph menu -h1 = uimenu('Parent',h0, ... - 'Enable','off', ... - 'Label','Graph', ... - 'Tag','graph_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf plot_coordinates_menu', ... - 'Label','Plot Coordinates', ... - 'Tag','plot_coordinates_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf partitio_menu', ... - 'Label','View Partition', ... - 'Tag','partitio_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf admix_menu', ... - 'Label','View Admixture Results', ... - 'Tag','admix_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf likelihood_menu', ... - 'Label','Changes of Log Likelihood', ... - 'Tag','likelihood_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf energy_menu', ... - 'Label','Energy landscape', ... - 'Tag','energy_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf geneflow_menu', ... - 'Label','Plot Gene Flow', ... - 'Tag','geneflow_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf voronoi_menu', ... - 'Label','Voronoi Tessellation', ... - 'Tag','voronoi_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf varmuus_menu', ... - 'Label','Local Uncertainty', ... - 'Tag','varmuus_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf phylogeny_menu', ... - 'Label','Phylogeny', ... - 'Tag','phylogeny_menu'); -h3 = uimenu('Parent',h2, ... - 'Enable','off', ... - 'Callback','baps4cbf upgma_menu',... - 'Label', 'UPGMA', ... - 'Tag','upgma_menu'); -h3 = uimenu('Parent',h2, ... - 'Enable','off', ... - 'Callback','baps4cbf nj_menu',... - 'Label', 'Neighbor-Joining', ... - 'Tag','nj_menu'); -h2 = uimenu('Parent',h1, ... - 'Enable','off', ... - 'Callback','baps4cbf mutationplot_menu', ... - 'Label','Mutation Plot', ... - 'Tag','mutationplot_menu'); - -% Tools menu -h1 = uimenu('Parent',h0, ... - 'Label','Tools', ... - 'Tag','tools_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf fixk_menu', ... - 'Label','Enable Fixed-K Clustering', ... - 'Tag','fixk_menu', ... - 'Userdata', 0); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf partitioncompare_menu', ... - 'Label','Partition Compare', ... - 'Tag','partitioncompare_menu', ... - 'Userdata', []); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf loadfigure_menu', ... - 'Label','Load Figure', ... - 'Tag','loadfigure_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf changecolor_menu',... - 'Label', 'Change BAPS Color', ... - 'Tag','changecolor_menu'); - - -% Help menu -h1 = uimenu('Parent',h0, ... - 'Label','Help', ... - 'Tag','help_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf helpdoc', ... - 'Enable','on', ... - 'Label','Help contents (doc)', ... - 'Tag','helpman_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf helponline', ... - 'Enable','on', ... - 'Label','BAPS on the web', ... - 'Tag','online_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','baps4cbf about', ... - 'Enable','on', ... - 'Label','About', ... - 'Tag','about_menu'); - - -% uicontrol -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.82 0.82 0.77], ... - 'ListboxTop',0, ... - 'Position',[38.25 154.268575851393 381 148.721320743034], ... - 'Style','frame', ... - 'Tag','mix_frame'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.82 0.82 0.77], ... - 'ListboxTop',0, ... - 'Position',[38.25 56 381 79.721320743034], ... - 'Style','frame', ... - 'Tag','admix_frame'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.82 0.82 0.77], ... - 'ListboxTop',0, ... - 'Position',[156.75 279.545624148607 158 11.9465944272446], ... - 'String','Population mixture analysis', ... - 'Style','text', ... - 'Tag','mix_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.82 0.82 0.77], ... - 'ListboxTop',0, ... - 'Position',[156.75 111.545624148607 138 11.9465944272446], ... - 'String','Population admixture analysis', ... - 'Style','text', ... - 'Tag','admix_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf mix1_button', ... - 'ListboxTop',0, ... - 'Position',[51.75 245.412497213622 163.5 22.7554179566563], ... - 'String','Clustering of individuals', ... - 'Tag','mix1_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf mix2_button', ... - 'ListboxTop',0, ... - 'Position',[51.75 212.75 163.5 23.25], ... - 'String','Clustering of groups of individuals', ... - 'Tag','mix2_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf trained_button', ... - 'ListboxTop',0, ... - 'Position',[51.75 178.284055727554 163.5 22.7554179566563], ... - 'String','Trained clustering', ... - 'Tag','trained_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf admix1_button', ... - 'ListboxTop',0, ... - 'Position',[51.7 75.412497213622 163.5 22.7554179566563], ... - 'String','Admixture based on mixture clustering', ... - 'Tag','admix1_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf admix2_button', ... - 'ListboxTop',0, ... - 'Position',[245.75 75.412497213622 163.5 22.7554179566563], ... - 'String','Admixture based on pre-defined populations', ... - 'Tag','admix2_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[.7 .9 .7], ... - 'FontSize',10, ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[36 28.5 100.75 14.25], ... - 'String','Data File:', ... - 'Style','text', ... - 'Tag','datafile_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[.7 .9 .7], ... - 'FontSize',10, ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[36 9.75 51 14.25], ... - 'String','Output File:', ... - 'Style','text', ... - 'Tag','outputfile_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[.7 .9 .7], ... - 'FontSize',10, ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[140.75 28.5 314.5 14.25], ... - 'Style','text', ... - 'Tag','filename1_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[.7 .9 .7], ... - 'FontSize',10, ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[93.75 9.75 314.5 14.25], ... - 'Style','text', ... - 'Tag','filename2_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf spatial_button', ... - 'ListboxTop',0, ... - 'Position',[245.75 245.412497213622 163.5 22.7554179566563], ... - 'String','Spatial clustering of individuals', ... - 'Tag','spatial_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf spatial2_button', ... - 'ListboxTop',0, ... - 'Position',[245.75 212.75 163.5 23.25], ... - 'String','Spatial clustering of groups', ... - 'Tag','spatial2_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','baps4cbf linkage_button', ... - 'ListboxTop',0, ... - 'Position',[245.75 178.284055727554 163.5 23.25], ... - 'String','Clustering with linked loci', ... - 'Tag','linkage_button'); - -% Starting text in the log window: -disp('**********************************************************'); -disp('********************** BAPS 6.0 *************************'); -disp('**********************************************************'); -disp(' '); -disp('Closing this window will end the execution of the program.'); -disp('However, it is recommended that you use ''Exit'' on the ''File'''); -disp('menu of the main window or use the close-button in the upper'); -disp('right-hand corner of the main window to exit the program.'); -disp(' '); - -if strcmp(computer,'PCWIN') || strcmp(computer,'PCWIN64') - if nargout > 0, fig = h0; end - drawnow - % seticon(h0, ['baps5.ico']); -end -% uimenufcn(gcf,'WindowCommandWindow') - diff --git a/matlab/general/distance.m b/matlab/general/distance.m deleted file mode 100644 index 7d42e27..0000000 --- a/matlab/general/distance.m +++ /dev/null @@ -1,46 +0,0 @@ -function d = distance(a,b) -% DISTANCE - computes Euclidean distance matrix -% -% E = distance(A,B) -% -% A - (DxM) matrix -% B - (DxN) matrix -% -% Returns: -% E - (MxN) Euclidean distances between vectors in A and B -% -% -% Description : -% This fully vectorized (VERY FAST!) m-file computes the -% Euclidean distance between two vectors by: -% -% ||A-B|| = sqrt ( ||A||^2 + ||B||^2 - 2*A.B ) -% -% Example : -% A = rand(400,100); B = rand(400,200); -% d = distance(A,B); - -% Author : Roland Bunschoten -% University of Amsterdam -% Intelligent Autonomous Systems (IAS) group -% Kruislaan 403 1098 SJ Amsterdam -% tel.(+31)20-5257524 -% bunschot@wins.uva.nl -% Last Rev : Oct 29 16:35:48 MET DST 1999 -% Tested : PC Matlab v5.2 and Solaris Matlab v5.3 -% Thanx : Nikos Vlassis - -% Copyright notice: You are free to modify, extend and distribute -% this code granted that the author of the original code is -% mentioned as the original author of the code. - -if (nargin ~= 2) - error('Not enough input arguments'); -end - -if (size(a,1) ~= size(b,1)) - error('A and B should be of same dimensionality'); -end - -aa=sum(a.*a,1); bb=sum(b.*b,1); ab=a'*b; -d = sqrt(abs(repmat(aa',[1 size(bb,2)]) + repmat(bb,[size(aa,2) 1]) - 2*ab)); diff --git a/matlab/general/fixKWarning.m b/matlab/general/fixKWarning.m deleted file mode 100644 index d83fed9..0000000 --- a/matlab/general/fixKWarning.m +++ /dev/null @@ -1,14 +0,0 @@ -%------------------------------------------------------------------------- -function fixedK = fixKWarning() -% The function is to ask further confirmation of running fixed K clustering -button = questdlg('You have selected a clustering analysis with the Fixed-K Mode. Are you sure to continue?',... - 'Fixed-K Mode'); -switch button - case 'Yes' - fixedK = 1; - case 'No' - fixedK = 0; - goToFixedK - case 'Cancel' - fixedK = 0.5; -end \ No newline at end of file diff --git a/matlab/general/goToFixedK.m b/matlab/general/goToFixedK.m deleted file mode 100644 index 8510d05..0000000 --- a/matlab/general/goToFixedK.m +++ /dev/null @@ -1,28 +0,0 @@ -%------------------------------------------------------------------------- - -function goToFixedK -% The function to do fix K algorithm - -h0 = findobj('Tag','fixk_menu'); -h1 = findobj('Tag','mix_text'); -old = get(h0, 'UserData'); -if old == 0 - set(h0,'UserData',1, 'label', 'Disable Fixed-K Clustering'); - set(h1,'String', 'Population mixture analysis (Fixed-K Mode)'); - % disable all the non-relevant buttons - set(findobj('Tag','partitioncompare_menu'),'Enable','off'); -% set(findobj('Tag','file_menu'),'Enable','off'); -% set(findobj('Tag','admix_text'),'Enable','off'); -% set(findobj('Tag','admix1_button'),'Enable','off'); -% set(findobj('Tag','admix2_button'),'Enable','off'); - disp('Fixed-K Mode is enabled.'); -else - set(h0,'UserData',0, 'label', 'Enable Fixed-K Clustering'); - set(h1,'String', 'Population mixture analysis'); - set(findobj('Tag','partitioncompare_menu'),'Enable','on'); -% set(findobj('Tag','file_menu'),'Enable','on'); -% set(findobj('Tag','admix_text'),'Enable','on'); -% set(findobj('Tag','admix1_button'),'Enable','on'); -% set(findobj('Tag','admix2_button'),'Enable','on'); - disp('Fixed-K Mode is disabled.'); -end diff --git a/matlab/general/goToPartitionCompare.m b/matlab/general/goToPartitionCompare.m deleted file mode 100644 index 6523a88..0000000 --- a/matlab/general/goToPartitionCompare.m +++ /dev/null @@ -1,138 +0,0 @@ -function goToPartitionCompare -% GOTOPARTITIONCOMPARE goes to the partition comaparing mode - - -% Load the partition result -[filename, pathname] = uigetfile('*.*', 'Load the priori specified partitions'); -if (sum(filename)==0) || (sum(pathname)==0) - return; -end -disp('---------------------------------------------------'); -disp('In loading the partition result...'); -try - c = load([pathname filename]); -catch - fprintf(1,'***ERROR: incorrect partition result.\n'); - return -end -if sum(c(1,:))~=1 - fprintf(1,'***ERROR: invalid prior density.\n'); - return -else - prior = c(1,:); - c = c([2:end],:); -end -[ninds, npartitions] = size(c); -fprintf(1,'# of sampling units: %d\n', ninds); -fprintf(1,'# of partitions in comparision: %d\n', npartitions); - - - -h1 = findobj('Tag','partitioncompare_menu'); - -% Choose data type and model type -items(1).name = 'Model:'; -items(1).default = 1; -items(1).indent = 1; -items(1).values = {'Independent';'Spatial';'Linkage'}; -items(1).linked = [2 3 4]; - -items(2).name = 'Data type'; -items(2).indent = 1; -items(2).values = {1}; -items(3).name = 'individual level'; -items(3).default = 1; -items(3).exclusive = 4; -items(3).indent = 2; -items(4).name = 'group level'; -items(4).default = 0; -items(4).exclusive = 3; -items(4).indent = 2; - -title = 'Specify data and model types'; -out = CSEFlagDialog1(items, title); -if isempty(out) - disp(['cancelled.']); - return -end - -userdata.partitions = c; -userdata.logmls = []; -set(h1,'UserData',userdata); - -try - if out(1).answer == 1 - if out(3).answer == 1 % independent model with individual level - disp(['Model: Independent clustering - Individual level']); - greedyMix(-1); - else % independent model with group level - disp(['Model: Independent clustering - Group level']); - greedyPopMix(); - end - - elseif out(1).answer == 2 - if out(3).answer == 1 % spatial model with individual level - disp(['Model: Spatial clustering - Individual level']); - spatialMixture; - else - disp(['Model: Spatial clustering - Group level']); - spatialPopMixture; - end - - elseif out(1).answer == 3 - disp(['Model: Linkage clustering']); - linkageMixture_speed; - - end - - userdata = get(h1,'UserData'); - logmls = userdata.logmls; - if isempty(logmls) - disp(['*** ERROR: program stopped.']); - set(findobj('Tag','filename1_text'),'String',[]); - else - diary('baps5_partitioncompare.out'); - disp('---------------------------------------------------'); - disp(['Partition Prior LogLikelihood Posterior']); - posterior = zeros(1, npartitions); - sum_posterior = exp(logmls)*prior'; - if sum_posterior == 0 % meaning that one partition dominates - dominate_partition = find(logmls==max(logmls)); - posterior(dominate_partition) = 1; - else - for i = 1:npartitions - posterior(i) = exp(userdata.logmls(i))*prior(i)/sum_posterior; - end - end - - for i = 1:npartitions - disp([' ' ownNum2Str(i) ' ' ownNum2Str(prior(i)) ' ' ... - ownNum2Str(logmls(i)) ' ' ownNum2Str(posterior(i))]); - end - diary off - - save_preproc = questdlg('Do you wish to save the partition compare result?',... - 'Save result?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.txt','Save result as'); - if (sum(filename)==0) || (sum(pathname)==0) - % Cancel was pressed - return; - else - % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps5_partitioncompare.out','file') - copyfile('baps5_partitioncompare.out',[pathname filename]) - delete('baps5_partitioncompare.out') - disp('result saved.'); - else - disp('*** ERROR: result cannot be saved.'); - end - end; - end - end -catch - disp('*** ERROR: incorrect format. Check model and data types'); -end -set(h1,'UserData',[]); diff --git a/matlab/general/iconn.m b/matlab/general/iconn.m deleted file mode 100644 index d458c64..0000000 --- a/matlab/general/iconn.m +++ /dev/null @@ -1,18 +0,0 @@ -function iconn -% Change the icon of all open figures to match the figure number -% -% iconn -% -% See also: seticon -% -% In order to have this feature automatically on for all new figures -% execute the following command: -% set(0,'defaultfigurecreatefcn','iconn') -% You may want to insert it into your startup.m file - -% Copyright 2000-2002, Research and Development - -h = get(0,'children'); -for i=1:min(length(h), 9) - seticon(i, which(sprintf('icon%d.ico', i))) -end diff --git a/matlab/general/imageCbf.m b/matlab/general/imageCbf.m deleted file mode 100644 index d574601..0000000 --- a/matlab/general/imageCbf.m +++ /dev/null @@ -1,67 +0,0 @@ -function imageCbf(action) -%Tahan funktioon ohjataan image_figuren callback:it. - -switch action - -case 'save_image' - saveImage; - -case 'export_bmp' - export('bmp'); - -case 'export_jpg' - export('jpg'); - -end - -function saveImage -%Saves information needed to reconstruct the image later. - -[filename,pathname] = uiputfile('*.mat','Save Figure'); -if (filename == 0) & (pathname == 0) - %Cancel was clicked. - return; -end; -image_file_name = [pathname filename]; -tiedot = get(gcbf,'UserData'); -% tiedot on tietue, joka sisältää info:n ja popnames:in - -% save(image_file_name,'tiedot'); -save(image_file_name,'tiedot','-v7.3'); % added by Lu Cheng, 08.06.2012 - - -%------------------------------------------------------------------- - - -function export(format) -%Saves a figure in a format which has been given -%as a parameter. Exported images cannot be opened using BAPS. - -[filename,pathname] = uiputfile(['*.' format], ['Export to ' format]); -if (filename == 0) & (pathname == 0) - %Cancel was pressed: - return; -end; -filename = checkTheFormat(format,filename); -resultfilename = [pathname filename]; -print(resultfilename); - -%--------------------------------------------------------- - -function newfilename = checkTheFormat(format,oldfilename) -%Checks if the 'oldfilename' has ending *.'format'. If not, ending -%will be added to newfilename. - -if length(oldfilename) < 4 - newfilename = [oldfilename '.' format]; -elseif isequal(oldfilename(end-3: end), ['.' format]) - newfilename = oldfilename; -elseif any(oldfilename == '.') - n = 1; - while ~isequal(oldfilename(n),'.') - n = n+1; - end; - newfilename = [oldfilename(1:n) format]; -else - newfilename = [oldfilename '.' format]; -end; \ No newline at end of file diff --git a/matlab/general/image_figure.m b/matlab/general/image_figure.m deleted file mode 100644 index 9e42c34..0000000 --- a/matlab/general/image_figure.m +++ /dev/null @@ -1,38 +0,0 @@ -function fig = image_figure() -% This is the machine-generated representation of a Handle Graphics object -% and its children. Note that handle values may change when these objects -% are re-created. This may cause problems with any callbacks written to -% depend on the value of the handle at the time the object was saved. -% -% To reopen this object, just type the name of the M-file at the MATLAB -% prompt. The M-file and its associated MAT-file must be on your path. - -load image_figure - -h0 = figure('Color',[0.8 0.8 0.8], ... - 'Colormap',mat0, ... - 'MenuBar','none', ... - 'NumberTitle','off', ... - 'PointerShapeCData',mat1, ... - 'Position',[73 27 896 672], ... - 'Resize','on', ... - 'Tag','image_figure'); -h1 = uimenu('Parent',h0, ... - 'Label','File', ... - 'Tag','image_fig_file_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','imageCbf save_image', ... - 'Label','Save Figure', ... - 'Tag','save_image_menu'); -h2 = uimenu('Parent',h1, ... - 'Label','Export', ... - 'Tag','export_image_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','imageCbf export_jpg', ... - 'Label','*.jpg', ... - 'Tag','jpg_menu'); -h3 = uimenu('Parent',h2, ... - 'Callback','imageCbf export_bmp', ... - 'Label','*.bmp', ... - 'Tag','bmp_menu'); -if nargout > 0, fig = h0; end diff --git a/matlab/general/kstest2.m b/matlab/general/kstest2.m deleted file mode 100644 index a20ea83..0000000 --- a/matlab/general/kstest2.m +++ /dev/null @@ -1,178 +0,0 @@ -function [H, pValue, KSstatistic] = kstest2(x1, x2, alpha, tail) -%KSTEST2 Two-sample Kolmogorov-Smirnov goodness-of-fit hypothesis test. -% H = KSTEST2(X1,X2,ALPHA,TAIL) performs a Kolmogorov-Smirnov (K-S) test -% to determine if independent random samples, X1 and X2, are drawn from -% the same underlying continuous population. ALPHA and TAIL are optional -% scalar inputs: ALPHA is the desired significance level (default = 0.05); -% TAIL indicates the type of test (default = 0). H indicates the result of -% the hypothesis test: -% H = 0 => Do not reject the null hypothesis at significance level ALPHA. -% H = 1 => Reject the null hypothesis at significance level ALPHA. -% -% Let S1(x) and S2(x) be the empirical distribution functions from the -% sample vectors X1 and X2, respectively, and F1(x) and F2(x) be the -% corresponding true (but unknown) population CDFs. The two-sample K-S -% test tests the null hypothesis that F1(x) = F2(x) for all x, against the -% alternative specified by TAIL: -% 'unequal' -- "F1(x) not equal to F2(x)" (two-sided test) -% 'larger' -- "F1(x) > F2(x)" (one-sided test) -% 'smaller' -- "F1(x) < F2(x)" (one-sided test) -% -% For TAIL = 'unequal', 'larger', and 'smaller', the test statistics are -% max|S1(x) - S2(x)|, max[S1(x) - S2(x)], and max[S2(x) - S1(x)], -% respectively. -% -% The decision to reject the null hypothesis occurs when the significance -% level, ALPHA, equals or exceeds the P-value. -% -% X1 and X2 are vectors of lengths N1 and N2, respectively, and represent -% random samples from some underlying distribution(s). Missing -% observations, indicated by NaNs (Not-a-Number), are ignored. -% -% [H,P] = KSTEST2(...) also returns the asymptotic P-value P. -% -% [H,P,KSSTAT] = KSTEST2(...) also returns the K-S test statistic KSSTAT -% defined above for the test type indicated by TAIL. -% -% The asymptotic P-value becomes very accurate for large sample sizes, and -% is believed to be reasonably accurate for sample sizes N1 and N2 such -% that (N1*N2)/(N1 + N2) >= 4. -% -% See also KSTEST, LILLIETEST, CDFPLOT. -% - -% Copyright 1993-2007 The MathWorks, Inc. -% $Revision: 1.5.2.5 $ $ Date: 1998/01/30 13:45:34 $ - -% References: -% Massey, F.J., (1951) "The Kolmogorov-Smirnov Test for Goodness of Fit", -% Journal of the American Statistical Association, 46(253):68-78. -% Miller, L.H., (1956) "Table of Percentage Points of Kolmogorov Statistics", -% Journal of the American Statistical Association, 51(273):111-121. -% Stephens, M.A., (1970) "Use of the Kolmogorov-Smirnov, Cramer-Von Mises and -% Related Statistics Without Extensive Tables", Journal of the Royal -% Statistical Society. Series B, 32(1):115-122. -% Conover, W.J., (1980) Practical Nonparametric Statistics, Wiley. -% Press, W.H., et. al., (1992) Numerical Recipes in C, Cambridge Univ. Press. - -if nargin < 2 - error('stats:kstest2:TooFewInputs','At least 2 inputs are required.'); -end - -% -% Ensure each sample is a VECTOR. -% - -if ~isvector(x1) || ~isvector(x2) - error('stats:kstest2:VectorRequired','The samples X1 and X2 must be vectors.'); -end - -% -% Remove missing observations indicated by NaN's, and -% ensure that valid observations remain. -% - -x1 = x1(~isnan(x1)); -x2 = x2(~isnan(x2)); -x1 = x1(:); -x2 = x2(:); - -if isempty(x1) - error('stats:kstest2:NotEnoughData', 'Sample vector X1 contains no data.'); -end - -if isempty(x2) - error('stats:kstest2:NotEnoughData', 'Sample vector X2 contains no data.'); -end - -% -% Ensure the significance level, ALPHA, is a scalar -% between 0 and 1 and set default if necessary. -% - -if (nargin >= 3) && ~isempty(alpha) - if ~isscalar(alpha) || (alpha <= 0 || alpha >= 1) - error('stats:kstest2:BadAlpha',... - 'Significance level ALPHA must be a scalar between 0 and 1.'); - end -else - alpha = 0.05; -end - -% -% Ensure the type-of-test indicator, TAIL, is a scalar integer from -% the allowable set, and set default if necessary. -% - -if (nargin >= 4) && ~isempty(tail) - if ischar(tail) - tail = strmatch(lower(tail), {'smaller','unequal','larger'}) - 2; - if isempty(tail) - error('stats:kstest2:BadTail',... - 'Type-of-test indicator TAIL must be ''unequal'', ''smaller'', or ''larger''.'); - end - elseif ~isscalar(tail) || ~((tail==-1) || (tail==0) || (tail==1)) - error('stats:kstest2:BadTail',... - 'Type-of-test indicator TAIL must be ''unequal'', ''smaller'', or ''larger''.'); - end -else - tail = 0; -end - -% -% Calculate F1(x) and F2(x), the empirical (i.e., sample) CDFs. -% - -binEdges = [-inf ; sort([x1;x2]) ; inf]; - -binCounts1 = histc (x1 , binEdges, 1); -binCounts2 = histc (x2 , binEdges, 1); - -sumCounts1 = cumsum(binCounts1)./sum(binCounts1); -sumCounts2 = cumsum(binCounts2)./sum(binCounts2); - -sampleCDF1 = sumCounts1(1:end-1); -sampleCDF2 = sumCounts2(1:end-1); - -% -% Compute the test statistic of interest. -% - -switch tail - case 0 % 2-sided test: T = max|F1(x) - F2(x)|. - deltaCDF = abs(sampleCDF1 - sampleCDF2); - - case -1 % 1-sided test: T = max[F2(x) - F1(x)]. - deltaCDF = sampleCDF2 - sampleCDF1; - - case 1 % 1-sided test: T = max[F1(x) - F2(x)]. - deltaCDF = sampleCDF1 - sampleCDF2; -end - -KSstatistic = max(deltaCDF); - -% -% Compute the asymptotic P-value approximation and accept or -% reject the null hypothesis on the basis of the P-value. -% - -n1 = length(x1); -n2 = length(x2); -n = n1 * n2 /(n1 + n2); -lambda = max((sqrt(n) + 0.12 + 0.11/sqrt(n)) * KSstatistic , 0); - -if tail ~= 0 % 1-sided test. - - pValue = exp(-2 * lambda * lambda); - -else % 2-sided test (default). -% -% Use the asymptotic Q-function to approximate the 2-sided P-value. -% - j = (1:101)'; - pValue = 2 * sum((-1).^(j-1).*exp(-2*lambda*lambda*j.^2)); - pValue = min(max(pValue, 0), 1); - -end - -H = (alpha >= pValue); diff --git a/matlab/general/logml2String.m b/matlab/general/logml2String.m deleted file mode 100644 index edb9710..0000000 --- a/matlab/general/logml2String.m +++ /dev/null @@ -1,67 +0,0 @@ -function mjono = logml2String(logml) -% Palauttaa logml:n string-esityksen. - -mjono = ' '; - -if isequal(logml,-Inf) - mjono(7) = '-'; - return -end - -if abs(logml)<10000 - %Ei tarvita e-muotoa - mjono(7) = palautaYks(abs(logml),-1); - mjono(6) = '.'; - mjono(5) = palautaYks(abs(logml),0); - mjono(4) = palautaYks(abs(logml),1); - mjono(3) = palautaYks(abs(logml),2); - mjono(2) = palautaYks(abs(logml),3); - pointer = 2; - while mjono(pointer)=='0' & pointer<7 - mjono(pointer) = ' '; - pointer=pointer+1; - end - if logml<0 - mjono(pointer-1) = '-'; - end -else - suurinYks = 4; - while abs(logml)/(10^(suurinYks+1)) >= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); \ No newline at end of file diff --git a/matlab/general/noIndex.m b/matlab/general/noIndex.m deleted file mode 100644 index 6241f4e..0000000 --- a/matlab/general/noIndex.m +++ /dev/null @@ -1,11 +0,0 @@ -function puredata = noIndex(data, noalle) -% NOINDEX Check that the data contains no index column. -% Input: two variables from a mixture/admixture result structure. -% Output: -% puredata: a data contains no index column. - -if size(data,2) == length(noalle) + 1 - puredata = data(:,[1:end-1]); % remove the index column -else - puredata = data; -end diff --git a/matlab/general/ownNum2Str.m b/matlab/general/ownNum2Str.m deleted file mode 100644 index 391b072..0000000 --- a/matlab/general/ownNum2Str.m +++ /dev/null @@ -1,35 +0,0 @@ -function str = ownNum2Str(number) - -absolute = abs(number); - -if absolute < 1000 - str = num2str(number); -elseif absolute < 10000000 - first_three = rem(number,1000); - next_four = (number - first_three) /1000; - first_three = abs(first_three); - if first_three<10 - first_three = ['00' num2str(first_three)]; - elseif first_three<100 - first_three = ['0' num2str(first_three)]; - else - first_three = num2str(first_three); - end; - str = [num2str(next_four) first_three]; -elseif absolute < 100000000 - first_four = rem(number,10000); - next_four = (number - first_four) /10000; - first_four = abs(first_four); - if first_four<10 - first_four = ['000' num2str(first_four)]; - elseif first_four<100 - first_four = ['00' num2str(first_four)]; - elseif first_four<1000 - first_four = ['0' num2str(first_four)]; - else - first_four = num2str(first_four); - end; - str = [num2str(next_four) first_four]; -else - str = num2str(number); -end; \ No newline at end of file diff --git a/matlab/general/setWindowOnTop.m b/matlab/general/setWindowOnTop.m deleted file mode 100644 index aafafeb..0000000 --- a/matlab/general/setWindowOnTop.m +++ /dev/null @@ -1,124 +0,0 @@ -function setWindowOnTop(h,state) -% SETWINDOWONTOP sets a figures Always On Top state on or off -% -% Copyright (C) 2006 Matt Whitaker -% -% This program is free software; you can redistribute it and/or modify it -% under -% the terms of the GNU General Public License as published by the Free -% Software Foundation; either version 2 of the License, or (at your -% option) any later version. -% -% This program is distributed in the hope that it will be useful, but -% WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -% General Public License for more details. -% -% SETWINDOWONTOP(H,STATE): H is figure handle or a vector of figure handles -% STATE is a string or cell array of strings- -% 'true' - set figure to be always on top -% 'false' - set figure to normal -% if STATE is a string the state is applied to -% all H. If state is a cell array the length STATE -% must equal that of H and each state is applied -% individually. -% Examples: -% h= figure; -% s = 'true'; -% setWindowOnTop(h,s) %sets h to be on top -% -% h(1) = figure; -% h(2) = figure; -% s = 'true'; -% setWindowOnTop(h,s) %sets both figures to be on top -% -% h(1) = figure; -% h(2) = figure; -% s = {'true','false'}; -% setWindowOnTop(h,s) %sets h(1) on top, h(2) normal -% Notes: -% 1. Figures must have 'Visible' set to 'on' and not be docked for -% setWindowOnTop to work. -% 2. Routine does not work for releases prior to R14SP2 -% 3. The Java calls are undocumented by Mathworks -% -% Revisions: 09/28/06- Corrected call to warning and uopdated for R2006b - -drawnow; %need to make sure that the figures have been rendered or Java error can occur - -% setWindowOnTop disabled to ensure that compiled version runs properly -% Modified: 11/06/07 Jukka Siren -return - -%check input argument number -error(nargchk(2, 2, nargin, 'struct')); - -%is JVM available -if ~usejava('jvm') - error('setWindowOnTop requires Java to run.'); -end - -[j,s] = parseInput; -setOnTop; %set the on top state - - function [j,s] = parseInput - % is h all figure handles - if ~all(ishandle(h)) || ~isequal(length(h),length(findobj(h,'flat','Type','figure'))) - error('All input handles must be valid figure handles'); - end %if - - %handle state argument - if ischar(state) - %make it a cell - s = cellstr(repmat(state,[length(h),1])); - - elseif iscellstr(state) - if length(state) ~= length(h) - error('Cell array of strings: state must be same length as figure handle input'); - end %if - s = state; - else - error('state must be a character array or a cell array of strings'); - end %if - - %check that the states are all valid - if ~all(ismember(s,{'true','false'})) - error('Invalid states entered') - end %if - - if length(h) == 1 - j{1} = get(h,'javaframe'); - else - j = get(h,'javaframe'); - end %if - - end %parseInput - - function setOnTop - %get version so we know which method to call - v = ver('matlab'); - %anticipating here that Mathworks will continue to change these - %undocumented calls - switch v(1).Release - case {'(R14SP2)','(R14SP3)'} - on_top = 1; - case {'(R2006a)','(R2006b)'} - on_top = 2; - otherwise %warn but try method 2 - % warning('setWindowOnTop:UntestedVersion',['setWindowOnTop has not been tested with release: ',v.Release]); - on_top = 2; - end %switch - for i = 1:length(j) - switch on_top - case 1 %R14SP2-3 - w = j{i}.fClientProxy.getFrameProxy.getClientFrame; - case 2 %R2006a+ - w= j{i}.fFigureClient.getWindow; - otherwise %should not happen - error('Invalid on top method'); - end %switch - awtinvoke(w,'setAlwaysOnTop',s{i}); - end %for j - end %setOnTop - -end %setWindowOnTop \ No newline at end of file diff --git a/matlab/general/seticon.m b/matlab/general/seticon.m deleted file mode 100644 index 1f02fe6..0000000 --- a/matlab/general/seticon.m +++ /dev/null @@ -1,59 +0,0 @@ -function seticon(window, useicon) -% Set icon for window. -% -% seticon(window, useicon) -% -% Parameters: -% window: Figure number or name of window -% useicon: Icon number to use or file name of icon -% =1 : Application -% =2 : Hand (x) -% =3 : Question (?) -% =4 : Exclamation /!\ -% =5 : Asterisk (i) -% =6 : Winlogo -% -% Examples: -% seticon(2,3) -% seticon(1, 'iconfile.ico') -% seticon('Microsoft Internet', 6) - -% Copyright 2000-2002, Research and Development - -if nargin~=2 - error('Two arguments required') -end - -if ~isstr(window) - window = wgetname(window); -end - -if ~any(window) - warning('Window specification insufficient') - return -end - -if isstr(useicon) - switch lower(useicon) - case 'application' - icon(1, window); - case 'hand' - icon(2, window); - case 'question' - icon(3, window); - case 'exclamation' - icon(4, window); - case 'asterisk' - icon(5, window); - case 'winlogo' - icon(6, window); - otherwise - icon(101, window, useicon); - end -else - if useicon>=1 & useicon<=6 - icon(useicon, window); - else - error('Icon number out of range') - end -end diff --git a/matlab/general/waitALittle.m b/matlab/general/waitALittle.m deleted file mode 100644 index 2d2571b..0000000 --- a/matlab/general/waitALittle.m +++ /dev/null @@ -1,3 +0,0 @@ -function waitALittle -A = rand(500); -gammaln(A); \ No newline at end of file diff --git a/matlab/general/wgetname.m b/matlab/general/wgetname.m deleted file mode 100644 index 47dac3d..0000000 --- a/matlab/general/wgetname.m +++ /dev/null @@ -1,21 +0,0 @@ -function s=wgetname(h) -% Returns the window name of a window -% -% s=wgetname(h) -% -% Parameters: -% h : window number -% -% See also: seticon - -% Copyright 2000-2002, Research and Development - -if strcmp(get(h,'numbertitle'), 'on') - if get(h,'name') - s = sprintf('Figure %d: %s', h, get(h,'name')); - else - s = sprintf('Figure %d', h); - end -else - s = get(h,'name'); -end diff --git a/matlab/general/winontop.mexw32 b/matlab/general/winontop.mexw32 deleted file mode 100644 index e6afebd..0000000 Binary files a/matlab/general/winontop.mexw32 and /dev/null differ diff --git a/matlab/general/zscore.m b/matlab/general/zscore.m deleted file mode 100644 index e0f18c2..0000000 --- a/matlab/general/zscore.m +++ /dev/null @@ -1,51 +0,0 @@ -function [z,mu,sigma] = zscore(x,flag,dim) -%ZSCORE Standardized z score. -% Z = ZSCORE(X) returns a centered, scaled version of X, the same size as X. -% For vector input X, Z is the vector of z-scores (X-MEAN(X)) ./ STD(X). For -% matrix X, z-scores are computed using the mean and standard deviation -% along each column of X. For higher-dimensional arrays, z-scores are -% computed using the mean and standard deviation along the first -% non-singleton dimension. -% -% The columns of Z have sample mean zero and sample standard deviation one -% (unless a column of X is constant, in which case that column of Z is -% constant at 0). -% -% [Z,MU,SIGMA] = ZSCORE(X) also returns MEAN(X) in MU and STD(X) in SIGMA. -% -% [...] = ZSCORE(X,1) normalizes X using STD(X,1), i.e., by computing the -% standard deviation(s) using N rather than N-1, where N is the length of -% the dimension along which ZSCORE works. ZSCORE(X,0) is the same as -% ZSCORE(X). -% -% [...] = ZSCORE(X,FLAG,DIM) standardizes X by working along the dimension -% DIM of X. Pass in FLAG==0 to use the default normalization by N-1, or 1 -% to use N. -% -% See also MEAN, STD. - -% Copyright 1993-2006 The MathWorks, Inc. -% $Revision: 1.7.2.5 $ $Date: 2007/12/10 23:06:31 $ - -% [] is a special case for std and mean, just handle it out here. -if isequal(x,[]), z = []; return; end - -if nargin < 2 - flag = 0; -end -if nargin < 3 - % Figure out which dimension to work along. - dim = find(size(x) ~= 1, 1); - if isempty(dim), dim = 1; end -end - -% Compute X's mean and sd, and standardize it -mu = mean(x,dim); -sigma = std(x,flag,dim); -sigma0 = sigma; -sigma0(sigma0==0) = 1; - -z = (x-mu)./sigma0; -% z = bsxfun(@minus,x, mu); -% z = bsxfun(@rdivide, z, sigma0); - diff --git a/matlab/graph/@phyTree/Display.m b/matlab/graph/@phyTree/Display.m deleted file mode 100644 index f2b3936..0000000 --- a/matlab/graph/@phyTree/Display.m +++ /dev/null @@ -1,26 +0,0 @@ -function display(tr) -%DISPLAY command window display of phylogenetic tree objects. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.6.4 $ $Author: batserve $ $Date: 2005/06/09 21:55:53 $ - -n = numel(tr); -if n > 1 - disp(tr) -elseif n==0 - disp(' Empty array of phylogenetic tree objects') -else - n = length(tr.dist); - switch n - case 0 - disp(' Empty phylogenetic tree object') - case 1 - disp(' Phylogenetic tree object with 1 leaf (0 branches)') - case 3 - disp(' Phylogenetic tree object with 2 leaves (1 branch)') - otherwise - disp([' Phylogenetic tree object with ' num2str((n+1)/2) ... - ' leaves (' num2str((n-1)/2) ' branches)']) - end -end - diff --git a/matlab/graph/@phyTree/GET.m b/matlab/graph/@phyTree/GET.m deleted file mode 100644 index 89bd433..0000000 --- a/matlab/graph/@phyTree/GET.m +++ /dev/null @@ -1,89 +0,0 @@ -function varargout = get(tr,varargin) -%GET Get information about a phylogenetic tree object. -% [VALUE1,VALUE2, ...] = GET(TREE,'NAME1','NAME2', ...) returns the -% contents of the specified fields for the PHYTREE object TREE. -% -% The valid choices for 'NAME' are: -% 'POINTERS' : branch to leaf/branch connectivity list -% 'DISTANCES' : edge length for every leaf/branch -% 'NUMLEAVES' : number of leaves -% 'NUMBRANCHES' : number of branches -% 'NUMNODES' : number of nodes (numleaves + numbranches) -% 'LEAFNAMES' : names of the leaves -% 'BRANCHNAMES' : names of the branches -% 'NODENAMES' : names of all the nodes -% -% GET(TREE) displays all property names and their current values for -% the PHYTREE object TREE. -% -% V = GET(TREE) returns a structure where each field name is the name of -% a property of TREE and each field contains the value of that property. -% -% Examples: -% tr = phytreeread('pf00002.tree') -% protein_names = get(tr,'LeafNames') -% -% See also PHYTREE, PHYTREEREAD, PHYTREE/SELECT, PHYTREE/GETBYNAME. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.6.7 $ $Author: batserve $ $Date: 2005/06/09 21:55:54 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:get:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% get input without arguments displays a summary -if nargin == 1 - s.NumLeaves = numLeaves; - s.NumBranches = numBranches; - s.NumNodes = numLabels; - s.Pointers = tr.tree; - s.Distances = tr.dist; - s.LeafNames = tr.names(1:numLeaves); - s.BranchNames = tr.names(numLeaves+1:numLabels); - s.NodeNames = tr.names; - if nargout == 0 - disp(s) - else - varargout{1} = s; - end - return; -end - -okargs = {'pointers','distances','numleaves','numbranches',... - 'numnodes','leafnames','branchnames','nodenames'}; -for ind = 2 : nargin - pname = varargin{ind-1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:phytree:get:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:get:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % pointers - varargout{ind-1} = tr.tree; %#ok - case 2 % distances - varargout{ind-1} = tr.dist; - case 3 % numleaves - varargout{ind-1} = numLeaves; - case 4 % numbranches - varargout{ind-1} = numBranches; - case 5 % numNodes - varargout{ind-1} = numLabels; - case 6 % leafnames - varargout{ind-1} = tr.names(1:numLeaves); - case 7 % branchnames - varargout{ind-1} = tr.names(numLeaves+1:numLabels); - case 8 % nodenames - varargout{ind-1} = tr.names; - end - end -end diff --git a/matlab/graph/@phyTree/Plot.m b/matlab/graph/@phyTree/Plot.m deleted file mode 100644 index 18a553d..0000000 --- a/matlab/graph/@phyTree/Plot.m +++ /dev/null @@ -1,629 +0,0 @@ -function handles = Plot(tr,varargin) -%PLOT renders a phylogenetic tree. -% -% PLOT(TREE) renders a phylogenetic tree object into a MATLAB figure as a -% phylogram. The significant distances between branches and nodes are in -% horizontal direction, vertical coordinates are accommodated only for -% display purposes. Handles to graph elements are stored in the -% 'UserData' figure field, such that graphic properties can be easily -% modified. -% -% PLOT(TREE,ACTIVEBRANCHES) hides the non'active branches and all their -% descendants. ACTIVEBRANCHES is a logical array of size -% [numBranches x 1] indicating the active branches. -% -% PLOT(...,'TYPE',type) selects the method to render the phylogenetic -% tree. Options are: 'square' (default), 'angular', and 'radial'. -% -% PLOT(...,'ORIENTATION',orient) will orient the phylogenetic tree within -% the figure window. Options are: 'top', 'bottom', 'left' (default), and, -% 'right'. Orientation parameter is valid only for phylograms or -% cladograms. -% -% PLOT(...,'BRANCHLABELS',value) hides/unhides branch labels. Options are -% true or false. Branch labels are placed next to the branch node. -% Defaults to false (true) when TYPE is (is not) 'radial'. -% -% PLOT(...,'LEAFLABELS',value) hides/unhides leaf labels. Options are -% true or false. Leaf labels are placed next to the leaf nodes. Defaults -% to false (true) when TYPE is (is not) 'radial'. -% -% PLOT(...,'TERMINALLABELS',value) hides/unhides terminal labels. Options -% are true (default) or false. Terminal labels are placed over the axis -% tick labels, ignored when 'radial' type is used. -% -% H = PLOT(...) returns a structure with handles to the graph elements. -% -% Example: -% -% tr = phytreeread('pf00002.tree') -% plot(tr,'type','radial') -% -% % Graph element properties can be modified as follows: -% -% h=get(gcf,'UserData') -% set(h.branchNodeLabels,'FontSize',6,'Color',[.5 .5 .5]) -% -% See also PHYTREE, PHYTREE/VIEW, PHYTREEREAD, PHYTREETOOL, SEQLINKAGE. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.6.10 $ $Author: batserve $ $Date: 2006/06/16 20:06:45 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:plot:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -% set defaults -dispBranchLabels = NaN; -dispLeafLabels = NaN; -dispTerminalLabels = true; -renderType = 'square'; -orientation = 'left'; -rotation = 0; - -tr = struct(tr); -tr.numBranches = size(tr.tree,1); - -if nargin>1 && islogical(varargin{1}) - activeBranches = varargin{1}; - argStart = 2; -else - activeBranches = true(tr.numBranches,1); - argStart = 1; -end - -if nargin - argStart > 0 - if rem(nargin - argStart,2) == 1 - error('Bioinfo:phytree:plot:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'type','orientation','rotation',... - 'branchlabels','leaflabels','terminallabels'}; - for j = argStart:2:nargin-argStart - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:phytree:plot:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:plot:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % type - oktypes={'square','angular','radial'}; - l = strmatch(lower(pval),oktypes); %#ok - if isempty(l) - error('Bioinfo:phytree:plot:UnknownTypeName',... - 'Unknown option for %s.',upper(okargs{k})); - else - if l==4 - l=1; - end - renderType = oktypes{l}; - end - case 2 % orientation - oktypes={'left','right','top','bottom'}; - l = strmatch(lower(pval),oktypes); %#ok - if isempty(l) - error('Bioinfo:phytree:plot:UnknownOrientation',... - 'Unknown option for %s.',upper(okargs{k})); - else - orientation = oktypes{l}; - end - case 3 % rotation - if isreal(pval(1)) - rotation = double(pval(1)); - else - error('Bioinfo:phytree:plot:NotValidType',... - 'ROTATION must be numeric and real'); - end - case 4 % branch labels - dispBranchLabels = opttf(pval); - case 5 % leaf labels - dispLeafLabels = opttf(pval); - case 6 % terminal labels - dispTerminalLabels = opttf(pval); - end - end - end -end - -% set dependent defaults -if isnan(dispBranchLabels) - if isequal(renderType,'radial') - dispBranchLabels = true; - else - dispBranchLabels = false; - end -end -if isnan(dispLeafLabels) - if isequal(renderType,'radial') - dispLeafLabels = true; - else - dispLeafLabels = false; - end -end - -tr = doBasicCalculations(tr,activeBranches,renderType); - -nodeIndex = 1:tr.numLabels; -leafIndex = 1:tr.numLeaves; -branchIndex = tr.numLeaves+1:tr.numLabels; - - -% check empty names -for ind = nodeIndex - if isempty(tr.names{ind}) - if ind > tr.numLeaves - tr.names{ind} = ['Branch ' num2str(ind-tr.numLeaves)]; - else - tr.names{ind} = ['Leaf ' num2str(ind)]; - end - end -end - -% rendering graphic objects -fig = gcf; -% fig = figure('Renderer','ZBuffer'); -h.fig = fig; -h.axes = axes; hold on; -sepUnit = max(tr.x)*[-1/20 21/20]; - -% setting the axes -switch renderType - case {'square','angular'} - switch orientation - case 'left' - set(h.axes,'YTick',1:numel(tr.terminalNodes),'Ydir','reverse',... - 'YtickLabel','','YAxisLocation','Right') - if dispTerminalLabels - set(h.axes,'Position',[.05 .10 .7 .85]) - else - set(h.axes,'Position',[.05 .10 .9 .85]) - end - xlim(sepUnit); - ylim([0 numel(tr.terminalNodes)+1]); - case 'right' - set(h.axes,'YTick',1:numel(tr.terminalNodes),'Xdir','reverse','Ydir','reverse',... - 'YtickLabel','','YAxisLocation','Left') - if dispTerminalLabels - set(h.axes,'Position',[.25 .10 .7 .85]) - else - set(h.axes,'Position',[.05 .10 .9 .85]) - end - xlim(sepUnit); - ylim([0 numel(tr.terminalNodes)+1]); - case 'top' - set(h.axes,'XTick',1:numel(tr.terminalNodes),... - 'XtickLabel','','XAxisLocation','Top') - if dispTerminalLabels - set(h.axes,'Position',[.10 .05 .85 .7]) - else - set(h.axes,'Position',[.10 .05 .85 .9]) - end - ylim(sepUnit); - xlim([0 numel(tr.terminalNodes)+1]); - case 'bottom' - set(h.axes,'XTick',1:numel(tr.terminalNodes),'Ydir','reverse',... - 'XtickLabel','','XAxisLocation','Bottom') - if dispTerminalLabels - set(h.axes,'Position',[.10 .25 .85 .7]) - else - set(h.axes,'Position',[.10 .05 .85 .9]) - end - ylim(sepUnit); - xlim([0 numel(tr.terminalNodes)+1]); - end - case 'radial' - set(h.axes,'XTick',[],'YTick',[]) - set(h.axes,'Position',[.05 .05 .9 .9]) - dispTerminalLabels = false; - axis equal -end - -% drawing lines -switch renderType - case 'square' - X = tr.x([nodeIndex;repmat([tr.par(1:tr.numLabels-1) tr.numLabels],2,1)]); - Y = tr.y([repmat(nodeIndex,2,1);[tr.par(1:tr.numLabels-1) tr.numLabels]]); - switch orientation - case {'left','right'} - h.BranchLines = plot(X,Y,'-k'); - delete(h.BranchLines(~tr.activeNodes)) - h.BranchLines = h.BranchLines(tr.activeNodes); - case {'top','bottom'} - h.BranchLines = plot(Y,X,'-k'); - delete(h.BranchLines(~tr.activeNodes)) - h.BranchLines = h.BranchLines(tr.activeNodes); - end - case 'angular' - X = tr.x([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]); - Y = tr.y([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]); - switch orientation - case {'left','right'} - h.BranchLines = plot(X,Y,'-k'); - delete(h.BranchLines(~tr.activeNodes)) - h.BranchLines = h.BranchLines(tr.activeNodes); - case {'top','bottom'} - h.BranchLines = plot(Y,X,'-k'); - delete(h.BranchLines(~tr.activeNodes)) - h.BranchLines = h.BranchLines(tr.activeNodes); - end - case 'radial' - R = tr.x; - A = tr.y / numel(tr.terminalNodes)*2*pi+rotation*pi/180; - tr.x = R .* sin(A); - tr.y = R .* cos(A); - X = tr.x([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]); - Y = tr.y([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]); - h.BranchLines = plot(X,Y,'-k'); - delete(h.BranchLines(~tr.activeNodes)) - h.BranchLines = h.BranchLines(tr.activeNodes); -end - -% drawing nodes -switch renderType - case {'square','angular'} - switch orientation - case {'left','right'} - h.BranchDots = plot(tr.x(branchIndex(tr.activeNodes(branchIndex))),... - tr.y(branchIndex(tr.activeNodes(branchIndex))),'o',... - 'MarkerSize',5,'MarkerEdgeColor','k',... - 'MarkerFaceColor','b'); - h.LeafDots = plot(tr.x(leafIndex(tr.activeNodes(leafIndex))),... - tr.y(leafIndex(tr.activeNodes(leafIndex))),'square',... - 'MarkerSize',4,'MarkerEdgeColor','k',... - 'MarkerFaceColor','w'); - case {'top','bottom'} - h.BranchDots = plot(tr.y(branchIndex(tr.activeNodes(branchIndex))),... - tr.x(branchIndex(tr.activeNodes(branchIndex))),'o',... - 'MarkerSize',5,'MarkerEdgeColor','k',... - 'MarkerFaceColor','b'); - h.LeafDots = plot(tr.y(leafIndex(tr.activeNodes(leafIndex))),... - tr.x(leafIndex(tr.activeNodes(leafIndex))),'square',... - 'MarkerSize',4,'MarkerEdgeColor','k',... - 'MarkerFaceColor','w'); - end - case 'radial' - h.BranchDots = plot(tr.x(branchIndex(tr.activeNodes(branchIndex))),... - tr.y(branchIndex(tr.activeNodes(branchIndex))),'o',... - 'MarkerSize',5,'MarkerEdgeColor','k',... - 'MarkerFaceColor','b'); - h.LeafDots = plot(tr.x(leafIndex(tr.activeNodes(leafIndex))),... - tr.y(leafIndex(tr.activeNodes(leafIndex))),'square',... - 'MarkerSize',4,'MarkerEdgeColor','k',... - 'MarkerFaceColor','w'); -end - -% resize figure if needed -switch renderType - case {'square','angular'} - switch orientation - case {'left','right'} - correctFigureSize(fig, 15 * numel(tr.terminalNodes),0); - fontRatio = max(get(fig,'Position').*[0 0 0 1])/numel(tr.terminalNodes); - case {'top','bottom'} - correctFigureSize(fig, 0, 15 * numel(tr.terminalNodes)); - fontRatio = max(get(fig,'Position').*[0 0 1 0])/numel(tr.terminalNodes); - end - case 'radial' - temp = 10/pi*numel(tr.terminalNodes); - correctFigureSize(fig,temp,temp); - fontRatio = max(get(fig,'Position').*[0 0 1 0])/numel(tr.terminalNodes); -end - -set(h.axes,'Fontsize',min(9,ceil(fontRatio/1.5))); - -% set branch node labels -X = tr.x(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))); -Y = tr.y(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))); -switch renderType - case {'square','angular'} - switch orientation - case {'left'} - h.branchNodeLabels = text(X+sepUnit(1)/2,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)))); - set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on') - set(h.branchNodeLabels,'vertical','bottom') - set(h.branchNodeLabels,'horizontal','right') - set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'right'} - h.branchNodeLabels = text(X+sepUnit(1)/2,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)))); - set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on') - set(h.branchNodeLabels,'vertical','bottom') - set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'top'} - h.branchNodeLabels = text(Y,X-sepUnit(1)/2,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)))); - set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on') - set(h.branchNodeLabels,'vertical','bottom','Rotation',30) - set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'bottom'} - h.branchNodeLabels = text(Y,X+sepUnit(1)/2,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)))); - set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on') - set(h.branchNodeLabels,'vertical','bottom','Rotation',30) - set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - end - case 'radial' - h.branchNodeLabels = text(X,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)))); - set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on') - set(h.branchNodeLabels,'vertical','bottom') - set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio*1.2))); - for ind = 1:numel(h.branchNodeLabels) - if X(ind)<0 - set(h.branchNodeLabels(ind),'horizontal','right') - set(h.branchNodeLabels(ind),'Position',get(h.branchNodeLabels(ind),'Position')+[sepUnit(1)/2 0 0]) - else - set(h.branchNodeLabels(ind),'horizontal','left') - set(h.branchNodeLabels(ind),'Position',get(h.branchNodeLabels(ind),'Position')-[sepUnit(1)/2 0 0]) - end - end -end - -% set leaf nodes labels -X = tr.x(leafIndex(tr.activeNodes(1:tr.numLeaves))); -Y = tr.y(leafIndex(tr.activeNodes(1:tr.numLeaves))); -switch renderType - case {'square','angular'} - switch orientation - case {'left'} - h.leafNodeLabels = text(X-sepUnit(1)/2,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves)))); - set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on') - set(h.leafNodeLabels,'horizontal','left') - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'right'} - h.leafNodeLabels = text(X-sepUnit(1)/2,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves)))); - set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on') - set(h.leafNodeLabels,'horizontal','right') - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'top'} - h.leafNodeLabels = text(Y,X-sepUnit(1)/2,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves)))); - set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on') - set(h.leafNodeLabels,'horizontal','left','Rotation',60) - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - case {'bottom'} - h.leafNodeLabels = text(Y,X-sepUnit(1),tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves)))); - set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on') - set(h.leafNodeLabels,'horizontal','right','Rotation',60) - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2))); - end - case 'radial' - h.leafNodeLabels = text(X,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves)))); - set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on') - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio*1.2))); - % textHeight = mean(cell2mat(get(h.leafNodeLabels,'Extent')))*[0 0 0 1]'; - for ind = 1:numel(h.leafNodeLabels) - if X(ind)<0 - set(h.leafNodeLabels(ind),'horizontal','right') - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')+[sepUnit(1) 0 0]) - else - set(h.leafNodeLabels(ind),'horizontal','left') - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')-[sepUnit(1) 0 0]) - end - % a=atan(Y(ind)/X(ind))*180/pi; - % if a > 0 a = max(0,a-60)/2; else - % a = min(0,a+60)/2; end - % set(h.leafNodeLabels(ind),'Rotation',a) - end - [sortedY,hsY]=sort(Y); - idx=hsY(X(hsY)>0 & sortedY>0); - if numel(idx) - extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1]; - positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0]; - for i = 2:numel(idx) - position = get(h.leafNodeLabels(idx(i)),'Position'); - positionY = max(positionY+extentY,position(2)); - position(2) = positionY; - set(h.leafNodeLabels(idx(i)),'Position',position) - end - end - idx=hsY(X(hsY)<0 & sortedY>0); - if numel(idx) - extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1]; - positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0]; - for i = 2:numel(idx) - position = get(h.leafNodeLabels(idx(i)),'Position'); - positionY = max(positionY+extentY,position(2)); - position(2) = positionY; - set(h.leafNodeLabels(idx(i)),'Position',position) - end - end - idx=flipud(hsY(X(hsY)>0 & sortedY<0)); - if numel(idx) - extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1]; - positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0]; - for i = 2:numel(idx) - position = get(h.leafNodeLabels(idx(i)),'Position'); - positionY = min(positionY-extentY,position(2)); - position(2) = positionY; - set(h.leafNodeLabels(idx(i)),'Position',position) - end - end - idx=flipud(hsY(X(hsY)<0 & sortedY<0)); - if numel(idx) - extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1]; - positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0]; - for i = 2:numel(idx) - position = get(h.leafNodeLabels(idx(i)),'Position'); - positionY = min(positionY-extentY,position(2)); - position(2) = positionY; - set(h.leafNodeLabels(idx(i)),'Position',position) - end - end - -end - -% correct axis limits given the extent of labels -if dispBranchLabels - E = cell2mat(get(h.branchNodeLabels,'Extent')); - if strcmp(get(gca,'XDir'),'reverse') - E(:,1) = E(:,1) - E(:,3); - end - if strcmp(get(gca,'YDir'),'reverse') - E(:,2) = E(:,2) - E(:,4); - end - E=[E;[xlim*[1;0] ylim*[1;0] diff(xlim) diff(ylim)]]; - mins = min(E(:,[1 2])); - maxs = max([sum(E(:,[1 3]),2) sum(E(:,[2 4]),2)]); - axis([mins(1) maxs(1) mins(2) maxs(2)]) -end - -if dispLeafLabels - E = cell2mat(get(h.leafNodeLabels,'Extent')); - if strcmp(get(gca,'XDir'),'reverse') - E(:,1) = E(:,1) - E(:,3); - end - if strcmp(get(gca,'YDir'),'reverse') - E(:,2) = E(:,2) - E(:,4); - end - E=[E;[xlim*[1;0] ylim*[1;0] diff(xlim) diff(ylim)]]; - mins = min(E(:,[1 2])); - maxs = max([sum(E(:,[1 3]),2) sum(E(:,[2 4]),2)]); - axis([mins(1) maxs(1) mins(2) maxs(2)]) -end - -% set terminal nodes labels -switch renderType - case {'square','angular'} - X = tr.x(tr.terminalNodes) * 0; - Y = tr.y(tr.terminalNodes); - switch orientation - case {'left'} - X = X + max(xlim) - sepUnit(1)/2; - h.terminalNodeLabels = text(X,Y,tr.names(tr.terminalNodes)); - case {'right'} - X = X + max(xlim) - sepUnit(1)/2; - h.terminalNodeLabels = text(X,Y,tr.names(tr.terminalNodes)); - set(h.terminalNodeLabels,'Horizontal','right') - case {'top'} - X = X + max(ylim) - sepUnit(1)/2; - h.terminalNodeLabels = text(Y,X,tr.names(tr.terminalNodes)); - set(h.terminalNodeLabels,'Rotation',90) - case {'bottom'} - X = X + max(ylim) - sepUnit(1)/2; - h.terminalNodeLabels = text(Y,X,tr.names(tr.terminalNodes)); - set(h.terminalNodeLabels,'Rotation',270) - end - case 'radial' - h.terminalNodeLabels = text(0,0,' '); -end - -if dispTerminalLabels - set(h.terminalNodeLabels,'Fontsize',min(9,ceil(fontRatio/1.5))); -end - -if ~dispBranchLabels - set(h.branchNodeLabels,'visible','off'); -end -if ~dispLeafLabels - set(h.leafNodeLabels,'visible','off'); -end -if ~dispTerminalLabels - set(h.terminalNodeLabels,'visible','off'); -end - -box on -hold off - -% store handles -set(fig,'UserData',h) -if nargout - handles = h; -end - -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function tr = doBasicCalculations(tr,activeBranches,renderType) - -% helper function to compute and find some features of the tree -tr.numLeaves = tr.numBranches + 1; -tr.numLabels = tr.numBranches + tr.numLeaves; - -% remove uderscores from names -for ind = 1:tr.numLabels - tr.names{ind}(tr.names{ind}=='_')=' '; -end - -% obtain parents for every node -tr.par(tr.tree(:)) = tr.numLeaves + [1:tr.numBranches 1:tr.numBranches]; - -% find active nodes -tr.activeNodes = true(tr.numLabels,1); -for ind =tr.numBranches:-1:1 - tr.activeNodes(tr.tree(ind,:)) = tr.activeNodes(tr.numLeaves+ind) & activeBranches(ind); -end - -% propagate last leaf -tr.lastleaf = 1:tr.numLabels; -for ind = tr.numBranches:-1:1 - if ~tr.activeNodes(tr.tree(ind,1)) - tr.lastleaf(tr.tree(ind,:))=tr.lastleaf(ind+tr.numLeaves); - end -end - -tr.activeBranches = tr.activeNodes(tr.numLeaves+1:tr.numLabels)&activeBranches; -tr.activeLeaves = tr.activeNodes(1:tr.numLeaves); - -% find x coordinates of branches -tr.x = tr.dist; -for ind = tr.numBranches:-1:1 - tr.x(tr.tree(ind,:)) = tr.x(tr.tree(ind,:)) + tr.x(ind+tr.numLeaves); -end - -% find y coordinates of branches -tr.terminalNodes = tr.lastleaf([true,diff(tr.lastleaf(1:tr.numLeaves))~=0]); -tr.y=zeros(tr.numLabels,1); -tr.y(tr.terminalNodes)=1:length(tr.terminalNodes); -switch renderType - case 'square' - for ind = 1:tr.numBranches - if tr.activeBranches(ind) - tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:))); - end - end - case {'angular','radial'} - for ind = 1:tr.numBranches - if tr.activeBranches(ind) - if tr.x(tr.tree(ind,1))/tr.x(tr.tree(ind,2))>3 - tr.y(ind+tr.numLeaves) = tr.y(tr.tree(ind,1)); - elseif tr.x(tr.tree(ind,2))/tr.x(tr.tree(ind,1))>3 - tr.y(ind+tr.numLeaves) = tr.y(tr.tree(ind,2)); - else - tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:))); - end - end - end -end - -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function correctFigureSize(fig,recommendedHeight,recommendedWidth) -% helper function to increase initial figure size depending on the screen & -% tree sizes -screenSize = diff(reshape(get(0,'ScreenSize'),2,2),[],2)-[0;100]; -% 100 gives extra space for the figure header and win toolbar -position = get(fig,'Position'); -if recommendedHeight > position(4) - if recommendedHeight < sum(position([2 4])) - position(2) = sum(position([2 4])) - recommendedHeight; - position(4) = recommendedHeight; - elseif recommendedHeight < screenSize(2) - position(2) = 30; - position(4) = recommendedHeight; - else - position(2) = 30; - position(4) = screenSize(2); - end -end -if recommendedWidth > position(3) - if recommendedWidth < sum(position([1 3])) - position(1) = sum(position([1 3])) - recommendedWidth; - position(3) = recommendedWidth; - elseif recommendedWidth < screenSize(1) - position(1) = 0; - position(3) = recommendedHeight; - else - position(1) = 0; - position(3) = screenSize(1); - end -end -set(fig,'Position',position) diff --git a/matlab/graph/@phyTree/Prune.m b/matlab/graph/@phyTree/Prune.m deleted file mode 100644 index fdd26a1..0000000 --- a/matlab/graph/@phyTree/Prune.m +++ /dev/null @@ -1,197 +0,0 @@ -function tr = prune(tr,sel,varargin) -%PRUNE Reduces a phylogenetic tree by removing branch and leaf nodes. -% T2 = PRUNE(T1,NODES) prunes the nodes listed in the NODES vector from -% the tree T1. Any branch (or leaf) node listed in NODES and all their -% descendants will disappear. The respective 'parent' nodes will be -% connected to the respective 'brother' nodes as required. NODES in the -% tree are indexed as [1:NUMLEAVES] for the leaves and as -% [NUMLEAVES+1 : NUMLEAVES+NUMBRANCHES] for the branches. NODES can also -% be a logical array of following sizes: [NUMLEAVES+NUMBRANCHES x 1], -% [NUMLEAVES x 1] or [NUMBRANCHES x 1]. -% -% T2 = PRUNE(T1,NODES,'MODE','EXCLUSIVE') changes the pruning mode to -% 'EXCLUSIVE', i.e. only the descendants of NODES will be pruned. Then -% NODES will become leaves as long as they do not have a predecessor in -% the list NODES. In this case pruning is the process of reducing a tree -% by turning some branch nodes into leaf nodes, and removing the leaf -% nodes under the original branch. Default is 'INCLUSIVE' and it behaves -% as explained above, i.e. the listed NODES are also pruned. -% -% Examples: -% -% % Load a phylogenetic tree created from a protein family: -% tr = phytreeread('pf00002.tree'); -% view(tr) -% -% % To remove all the 'mouse' proteins use: -% ind = getbyname(tr,'mouse'); -% tr = prune(tr,ind); -% view(tr) -% -% % To remove potential outliers in the tree use: -% [sel,sel_leaves] = select(tr,'criteria','distance','threshold',.3,... -% 'reference','leaves','exclude','leaves','propagate','toleaves'); -% tr = prune(tr,~sel_leaves) -% view(tr) -% -% See also PHYTREE, PHYTREE/SELECT, PHYTREE/GET, PHYTREETOOL. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.4.8.2.1 $ $Date: 2004/11/30 03:45:24 $ - -% set default -exclusiveMode = false; - -if numel(tr)~=1 - error('Bioinfo:phytree:prune:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -btr = tr; -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% validate sel -if islogical(sel) - if numel(sel)==numLabels - sel = sel(:)==true; - elseif numel(sel)==numLeaves - sel = [sel(:);false(numBranches,1)]; - elseif numel(sel)==numBranches - sel = [false(numLeaves,1);sel(:)]; - else - error('Bioinfo:IncorrectLogical',... - 'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree'); - end -elseif isnumeric(sel) && isreal(sel) && all(sel>=1) && all(sel<=numLabels) - tem(numLabels)=false; - tem(floor(sel))=true; - sel=tem(:); -else - error('Bioinfo:IncorrectTypeofArguments','Invalid value for NODES'); -end - -nvarargin = numel(varargin); -if nvarargin - if rem(nvarargin,2) - error('Bioinfo:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'mode',''}; - for j=1:2:nvarargin - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % classifiers - modeOptions = {'exclusive','inclusive'}; - modeSelected = strmatch(lower(pval),modeOptions); %#ok - if isempty(modeSelected) - error('Bioinfo:NotValidMode','Not a valid mode.') - end - exclusiveMode = modeSelected==1; - end - end - end -end - -% shortcut for an empty sel -if ~sum(sel) - return; -end - -% when inclusiveMode if the two chidren of a branch are selected then the -% parent node should also be selected -if ~exclusiveMode - for ind = 1:numBranches - if all(sel(tr.tree(ind,:))) - sel(ind+numLeaves) = true; - end - end -end - -% find descendants not selected under selected nodes -for ind = numBranches:-1:1 - if sel(ind+numLeaves) - sel(tr.tree(ind,:))=true; - end -end - - if sel(numLabels) - warning('Bioinfo:PrunedRoot',... - 'Can not prune the root node in a Phylogenetic Tree.') - tr=btr; return - end - -% obtain parents for every node -parents(tr.tree(:)) = repmat(numLeaves+1:numLabels,2,1)'; - -if ~exclusiveMode % (the selected nodes are deleted with their descendants) - % find the top selected nodes in order to edit branches - htop = find(~[sel(parents);0]&sel); - % for every top node do the junction - for ind = 1:length(htop) - g=htop(ind); - mypar = parents(g); - if mypar < numLabels % my parent is NOT the root - % then connect brother to granparent - mygrpar = parents(mypar); % grandparent - myuncle = setxor(tr.tree(mygrpar-numLeaves,:),mypar); % uncle - mybro = setxor(tr.tree(mypar-numLeaves,:),g); % brother - tr.tree(mygrpar-numLeaves,:) = [myuncle mybro]; - tr.dist(mybro) = tr.dist(mybro) + tr.dist(mypar); - parents(mybro) = mygrpar; - end - sel(mypar) = true; %also delete my par - - end - if sum(~sel) == 1 - warning('Bioinfo:NotAMinimumTree',... - 'The selected nodes lead to only one leaf, Phylogenetic Tree not pruned.') - tr=btr; return - end - % find indexes to change tree - permuta = 1:numLabels; - permuta(sel) = []; - ipermuta(permuta) = 1:length(permuta); - permutaBranches = permuta(permuta>numLeaves)-numLeaves; - % update all tree structure fields - tr.names = tr.names(permuta); - tr.dist = tr.dist(permuta); - tr.dist(end) = 0; - tr.tree = tr.tree(permutaBranches,:); - tr.tree = ipermuta(tr.tree); - -else % exclusiveMode (the selected nodes are not deleted, only their descendants) - - % unselect leaves which are already in the top - sel(1:numLeaves)=sel(parents(1:numLeaves)); - % find the top selected nodes in order to edit branches - top = [~sel(parents);1] & sel; - % find the new leaves (no deleted leaves + branches that become leaves) - newLeaves = [~sel(1:numLeaves);top(numLeaves+1:end)]; - % find which branches will stay - stayingBranches = ~sel(numLeaves + 1 : numLabels); - % setting new indexes to change the tree architecture - permuta = [find(newLeaves);numLeaves+find(stayingBranches)]; - ipermuta(permuta) = 1:length(permuta); - % update all tree structure fields - tr.names = tr.names(permuta); - tr.dist = tr.dist(permuta); - tr.dist(end) = 0; - tr.tree = tr.tree(stayingBranches,:); - tr.tree = ipermuta(tr.tree); - % calling phytree with this format to force edge-crossing check - tr = phytree(tr.tree,tr.dist,tr.names); - -end % if ~exclusiveMode - - diff --git a/matlab/graph/@phyTree/SET.m b/matlab/graph/@phyTree/SET.m deleted file mode 100644 index e2ab488..0000000 --- a/matlab/graph/@phyTree/SET.m +++ /dev/null @@ -1,21 +0,0 @@ -function tr = set(tr,varargin) %#ok -%SET Set object properties of a phylogenetic tree object. -% -% Properties in a phylogenetic tree object can not be manually set. -% A PHYTREE object must be created by its constructor method PHYTREE -% or by using one of the functions: PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN. -% -% See also: PHYTREE, PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.8.2.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:51 $ - -% error('Bioinfo:phytree:set:NotAllowedMethod',... -% ['Properties in a phylogenetic tree object can not be manually set.\n'... -% 'A PHYTREE object must be created by its constructor method PHYTREE\n'... -% 'or by using one of the functions: PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN.']) - -% numBranches = size(tr.tree,1); -% numLeaves = numBranches + 1; -% -% tr.names(1:numLeaves)= varargin{2}'; \ No newline at end of file diff --git a/matlab/graph/@phyTree/Select.m b/matlab/graph/@phyTree/Select.m deleted file mode 100644 index aaf28fb..0000000 --- a/matlab/graph/@phyTree/Select.m +++ /dev/null @@ -1,313 +0,0 @@ -function [sel,sell,selb] = select(tr,varargin) -%SELECT Selects tree branches and leaves. -% -% S = SELECT(T,N) returns a logical vector S of size [NUMNODES x 1] -% indicating N closest nodes to the root node of the phylogenetic tree -% object T (NUMNODES = NUMLEAVES + NUMBRANCHES). The first criteria used -% is branch levels, then patristic distance, also known as tree distance. -% By default SELECT, uses INF as the value of N, therefore SELECT(T) will -% return a vector of 'trues'. -% -% S = SELECT(...,'REFERENCE',R) changes the reference point(s) to measure -% the closeness. R can be 'root' (default) or 'leaves'. When using -% 'leaves', a node to be tested may have different distances to its -% descendant leaves, which are the references (e.g. a non-ultrametric -% tree), if this the case the minimum distance to any descendant leaf -% will be considered. R may also be an index which points to any node of -% the tree. -% -% S = SELECT(...,'CRITERIA',C) changes the criteria used to measure -% closeness. If C='levels' (default) then the first criteria is branch -% levels and then patristic distance. If C='distance' then the first -% criteria is patristic distance and then branch levels. -% -% S = SELECT(...,'THRESHOLD',V) selects all the nodes which closeness is -% less or equal than the threshold value V. Observe that either -% 'criteria' or either 'reference' can be used. If N is not specified -% N = INF, otherwise the output can be further size limited by N. -% -% S = SELECT(...,'EXCLUDE',E) sets a post-filter which excludes all the -% branch nodes from S when E=='branches' or all the leaf nodes when -% E=='leaves'. The default is 'none'. -% -% S = SELECT(...,'PROPAGATE',P) activates a post-functionality which -% propagates the selected nodes to the leaves when P=='toleaves' or -% towards the root finding a common ancestor when P=='toroot'. The -% default is 'none', P may also be 'both'. 'PROPAGATE' switch acts after -% 'EXCLUDE' switch. -% -% [S,SELLEAVES,SELBRANCHES] = SELECT(...) returns two additional logical -% vectors, one for the selected leaves and one for the selected branches. -% -% Examples: -% -% % Load a phylogenetic tree created from a protein family: -% tr = phytreeread('pf00002.tree'); -% -% % To find close products for a given protein (e.g. vips_human): -% ind = getbyname(tr,'vips_human'); -% [sel,sel_leaves] = select(tr,'criteria','distance',... -% 'threshold',0.6,'reference',ind); -% view(tr,sel_leaves) -% -% % To find potential outliers in the tree use: -% [sel,sel_leaves] = select(tr,'criteria','distance','threshold',.3,... -% 'reference','leaves','exclude','leaves','propagate','toleaves'); -% view(tr,~sel_leaves) -% -% -% See also PHYTREE, PHYTREE/GET, PHYTREE/PDIST, PHYTREE/PRUNE, PHYTREETOOL. - -% -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.6.7.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:50 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:select:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -% set defaults -V=inf; -CriteriaIsDistance = false; -ReferenceIs = 'root'; -ExcludeSwitch = false; -PostPropagate = false; - -% check is first argument is N, otherwise set N to default -if (nargin>1 && isnumeric(varargin{1}) && isreal(varargin{1})) - N = floor(varargin{1}); - first_arg = 3; -else - N = inf; - first_arg = 2; -end - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% identify input arguments -if nargin - first_arg + 1 > 0 - if rem(nargin - first_arg,2) == 0 - error('Bioinfo:phytree:select:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'reference','criteria','threshold','exclude','propagate'}; - for j=first_arg - 1 : 2 : nargin - first_arg + 1 - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:phytree:select:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:select:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % reference - if islogical(pval) - if numel(pval)==numLabels - pval = pval(:)==true; - elseif numel(pval)==numLeaves - pval = [pval(:);false(numBranches,1)]; - elseif numel(pval)==numBranches - pval = [false(numLeaves,1);pval(:)]; - else - error('Bioinfo:phytree:select:InvalidSizeLogicalReferenceNode',... - 'When reference node is a logical vector it must contain NUMNODES, NUMLEAVES or NUMBRANCHES elements.') - end - pval = find(pval); - ReferenceIs = 'node'; - if numel(pval) ~= 1 - error('Bioinfo:phytree:select:InvalidValuesLogicalReferenceNode',... - 'When reference node is a logical vector one element must be true and all others false.') - else - ReferenceNode = pval; - end - elseif isnumeric(pval) - ReferenceIs = 'node'; - if numel(pval) ~= 1 - error('Bioinfo:phytree:select:InvalidSizeReferenceNode',... - 'Reference node must be scalar.') - elseif all(pval~=1:numLabels) - error('Bioinfo:phytree:select:InvalidValueReferenceNode',... - 'Incorrect reference node.') - else - ReferenceNode = pval; - end - else - h = strmatch(lower(pval),{'root','leaves'}); %#ok - if numel(h) - switch(h) - case 1 - ReferenceIs = 'root'; - case 2 - ReferenceIs = 'leaves'; - end - else error('Bioinfo:phytree:select:InvalidStringReferenceNode',... - 'Invalid string for the reference node.'); - end - end - case 2 % criteria - h = strmatch(lower(pval),{'distance','levels'}); %#ok - if numel(h) - CriteriaIsDistance = (h == 1); - else error('Bioinfo:phytree:select:InvalidCriteria',... - 'Invalid string for criteria.'); - end - case 3 % threshold - V = pval; - if (~isnumeric(V) || ~isreal(V) || numel(V)>1) - error('Bioinfo:phytree:select:InvalidThreshold',... - 'Invalid value for V.'); - end - case 4 % exclude - h = strmatch(lower(pval),{'branches','leaves','none'}); %#ok - if numel(h) - switch(h) - case 1 - ExcludeSwitch = true; - ExcludeType = 'branches'; - case 2 - ExcludeSwitch = true; - ExcludeType = 'leaves'; - case 3 - ExcludeSwitch = false; - end - else error('Bioinfo:phytree:select:InvalidExcludeOption',... - 'Invalid string for exclude switch.'); - end - case 5 % propagate - h = strmatch(lower(pval),{'toleaves','toroot','both','none'}); %#ok - if numel(h) - switch(h) - case 1 - PostPropagate = true; - PostPropagateType = 'toleaves'; - case 2 - PostPropagate = true; - PostPropagateType = 'toroot'; - case 3 - PostPropagate = true; - PostPropagateType = 'both'; - case 4 - PostPropagate = false; - end - else error('Bioinfo:phytree:select:InvalidPostPropagate',... - 'Invalid string for post-propagate switch.'); - end - end % switch(k) - end % if ... - end % for j=... -end % nargin - -% calculate the distance (and levels) of every node to the reference -levels2Ref = zeros(numLabels,1); -switch ReferenceIs - case 'root' - % calculate the distance to the root for every node - dist2Ref = tr.dist; - for ind = numBranches:-1:1 - dist2Ref(tr.tree(ind,:)) = ... - dist2Ref(tr.tree(ind,:)) + dist2Ref(ind+numLeaves); - levels2Ref(tr.tree(ind,:)) = levels2Ref(ind+numLeaves) + 1; - end - case 'leaves' - % calculate the distance to the closest leaf for every node - dist2Ref = zeros(numLabels,1); - for ind = 1:numBranches - dist2Ref(ind+numLeaves) = ... - min(dist2Ref(tr.tree(ind,:))+tr.dist(tr.tree(ind,:))); - levels2Ref(ind+numLeaves) = min(levels2Ref(tr.tree(ind,:))) + 1; - end - case 'node' - refVector = zeros(numLabels,1); - refVector(ReferenceNode)=1; - dist2Ref = pdist(tr,'SquareForm',true,'nodes','all')... - * refVector; - tr.dist = ones(numLabels,1); % to count now levels ! - levels2Ref = pdist(tr,'SquareForm',true,'nodes','all') ... - * refVector; -end - -% applies the threshold value -if CriteriaIsDistance - sel = dist2Ref < V; -else % ~CriteriaIsDistance - sel = levels2Ref < V; -end % if CriteriaIsDistance - -% needs to remove additional nodes because of N -if sum(sel)>N - if CriteriaIsDistance - [dum,h]=sortrows([dist2Ref levels2Ref]); %#ok - else % ~CriteriaIsDistance - [dum,h]=sortrows([levels2Ref dist2Ref]); %#ok - end % if CriteriaIsDistance - g=h(sel(h)); - sel(g(N+1:end))=false; -end - -% exclude option -if ExcludeSwitch - switch ExcludeType - case 'branches' - sel((1+numLeaves):numLabels) = false; - case 'leaves' - sel(1:numLeaves) = false; - end -end - -% post-propagate option -if PostPropagate - - % expands all the current nodes towards the leaves - if any(strcmp({'toleaves','both'},PostPropagateType)) - for ind = numBranches:-1:1 - if sel(ind+numLeaves) - sel(tr.tree(ind,:))=true; - end - end - end - - % propagates towards the root finding the common ancestors - if any(strcmp({'toroot','both'},PostPropagateType)) - - % find closest common branch for every pair of nodes - % diagonal is invalid ! but not needed - - % initializing full matrix - commf = zeros(numLabels,'int16'); - children = false(1,numLabels); - for ind = numBranches:-1:1 - children(:) = false; - children(ind+numLeaves) = true; - for ind2 = ind:-1:1 - if children(ind2+numLeaves) - children(tr.tree(ind2,:))=true; - end - end - commf(children,children)=int16(ind); - end - commf = commf(sel,sel); - commf = commf - diag(diag(commf)); - commf = unique(commf(commf(:)>0)); - sel(commf+numLeaves) = true; - - % now propagates towards the common ancestor - for ind = 1:double(max(commf)) - if any(sel(tr.tree(ind,:))) - sel(ind+numLeaves) = true; - end - end - end -end - -if nargout > 1 - sell = sel(1:numLeaves); -end -if nargout > 2 - selb = sel(numLeaves+1:numLabels); -end diff --git a/matlab/graph/@phyTree/Subtree.m b/matlab/graph/@phyTree/Subtree.m deleted file mode 100644 index f119bde..0000000 --- a/matlab/graph/@phyTree/Subtree.m +++ /dev/null @@ -1,91 +0,0 @@ -function subtr = subtree(tr,nodes) -%SUBTREE Extracts a subtree. -% -% T2 = SUBTREE(T1,NODES) Extracts a new subtree T2 in which the new root -% is the first common ancestor of the NODES vector from T1. NODES in the -% tree are indexed as [1:NUMLEAVES] for the leaves and as [NUMLEAVES+1 : -% NUMLEAVES+NUMBRANCHES] for the branches. NODES can also be a logical -% array of following sizes: [NUMLEAVES+NUMBRANCHES x 1], [NUMLEAVES x 1] -% or [NUMBRANCHES x 1]. -% -% Example: -% -% % Load a phylogenetic tree created from a protein family: -% tr = phytreeread('pf00002.tree') -% -% % Get the subtree that contains the VIPS and CGRR human proteins: -% sel = getbyname(tr,{'vips_human','cgrr_human'}); -% sel = any(sel,2); -% tr = subtree(tr,sel) -% view(tr); -% -% See also PHYTREE, PHYTREE/PRUNE, PHYTREE/SELECT, PHYTREE/GET, -% PHYTREE/GETBYNAME. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:57:04 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:subtree:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% validate nodes -if islogical(nodes) - if numel(nodes)==numLabels - nodes = nodes(:)==true; - elseif numel(nodes)==numLeaves - nodes = [nodes(:);false(numBranches,1)]; - elseif numel(nodes)==numBranches - nodes = [false(numLeaves,1);nodes(:)]; - else - error('Bioinfo:phytree:subtree:IncorrectSizeInputVector',... - 'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree.'); - end -elseif isnumeric(nodes) && isreal(nodes) && all(nodes(:)>=1) && all(nodes(:)<=numLabels) - tem = false(numLabels,1); - tem(floor(nodes(:))) = true; - nodes=tem(:); -else - error('Bioinfo:phytree:subtree:InvalidInputNodes',... - 'Invalid value for NODES.'); -end - -% at this point NODES should only be a logical vector - -if (~any(nodes(numLeaves+1:numLabels)) && (sum(nodes(1:numLeaves))<2)) - error('Bioinfo:phytree:subtree:InvalidSubtree',... - 'Subtree must contain at least two leaves.'); -end - - -% look for the first common ancestor that contains all selected nodes, -% accumulating the selected nodes towards the root, the common ancestor -% will be the first sum equal to the number of selected nodes -branchWidth = double(nodes); -for ind = 1:numBranches - branchWidth(numLeaves+ind) = branchWidth(numLeaves+ind) + ... - sum(branchWidth(tr.tree(ind,:))); -end -commonAncestor = find(branchWidth==sum(nodes),1); - -% now propagate the ancestor new) root towards the leaves to find all the -% nodes that should stay for the subtree -sel = false(1,numLabels); -sel(commonAncestor) = true; -for ind = commonAncestor:-1:numLeaves+1 - sel(tr.tree(ind-numLeaves,:)) = sel(ind); -end - -% extract the subtree -permuta = find(sel); -subtr=phytree; -ipermuta(permuta) = 1:length(permuta); -subtrNumLeaves = (ipermuta(end) + 1)/2; -subtr.tree = ipermuta(tr.tree(permuta(subtrNumLeaves+1:end)-numLeaves,:)); -subtr.dist = tr.dist(permuta); -subtr.names = tr.names(permuta); -subtr.dist(end) = 0; diff --git a/matlab/graph/@phyTree/View.m b/matlab/graph/@phyTree/View.m deleted file mode 100644 index 2fef37e..0000000 --- a/matlab/graph/@phyTree/View.m +++ /dev/null @@ -1,1618 +0,0 @@ -function View(tr,sel,propsForFigure) -%VIEW views a phylogenetic tree in phytreetool. -% -% VIEW(TREE) shows a phylogenetic tree object. The significant distances -% between branches and nodes are in horizontal direction, vertical -% coordinates are accommodated only for display purposes. Tree -% Edit/Analysis tools are accessible through the mouse left/right buttons -% and also using the 'Tree' menu. -% -% VIEW(TREE,SEL) starts the viewer with an initial selection of nodes -% specified by SEL. SEL can be a logical array of any of the following -% sizes: [NUMLEAVES+NUMBRANCHES x 1], [NUMLEAVES x 1], or [NUMBRANCHES x -% 1]. SEL may also be a list of indices. -% -% Examples: -% -% tr = phytreeread('pf00002.tree') -% view(tr) -% -% See also PHYTREE, PHYTREE/PLOT, PHYTREEREAD, PHYTREETOOL, SEQLINKAGE, -% SEQNEIGHJOIN. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.6.19.2.1 $ $Author: batserve $ $Date: 2006/07/24 13:54:28 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:view:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -tr = doBasicCalculations(tr); - -nodeIndex = 1:tr.numLabels; -leafIndex = 1:tr.numLeaves; -branchIndex = tr.numLeaves+1:tr.numLabels; - -% check empty names -for ind = nodeIndex - if isempty(tr.names{ind}) - if ind > tr.numLeaves - tr.names{ind} = ['Branch ' num2str(ind-tr.numLeaves)]; - else - tr.names{ind} = ['Leaf ' num2str(ind)]; - end - end -end - -% initial drawing -if nargin<3 - propsForFigure.Name = ['Phylogenetic Tree Tool ' getphytreetoolnumber]; -end -propsForFigure.PruneWarning = getacceptedwarningfromothertools; -fig = figure('Renderer','ZBuffer','Name',propsForFigure.Name,... - 'NumberTitle','off','IntegerHandle','off','tag','PhyTreeTool'); -setappdata(fig,'propsForFigure',propsForFigure) -setappdata(fig,'backupTree',tr) -tr.ha = axes; hold on; -set(tr.ha,'Position',[.05 .05 .7 .9],'YTick',leafIndex,'FontSize',9,'Ydir','reverse',... - 'YAxisLocation','Right','YTickLabel',char(tr.names{leafIndex})) -tr.hlines = plot( ... - tr.x([nodeIndex;repmat([tr.par(1:tr.numLabels-1) tr.numLabels],2,1)]),... - tr.y([repmat(nodeIndex,2,1);[tr.par(1:tr.numLabels-1) tr.numLabels]]),... - '-k'); -tr.hpathline = plot(1,1,'--r','LineWidth',2,'Visible','off'); -tr.hdragbox = plot(1,1,':k','LineWidth',1,'Visible','off'); -tr.hdots(1,1) = plot(tr.x(branchIndex),tr.y(branchIndex),'o',... - 'MarkerSize',5,'MarkerEdgeColor','k','MarkerFaceColor','b'); -tr.hdots(1,2) = plot(tr.x(leafIndex),tr.y(leafIndex),'square',... - 'MarkerSize',4,'MarkerEdgeColor','k','MarkerFaceColor','w'); -tr.hseldots(1,1) = plot(tr.x(branchIndex),tr.y(branchIndex),'o',... - 'MarkerSize',5,'MarkerEdgeColor','r','MarkerFaceColor','r'); -tr.hseldots(1,2) = plot(tr.x(leafIndex),tr.y(leafIndex),'square',... - 'MarkerSize',4,'MarkerEdgeColor','r','MarkerFaceColor','r'); -tr.hldots(1,1) = plot(tr.x(branchIndex),tr.y(branchIndex),'o',... - 'MarkerSize',5,'MarkerEdgeColor',[.5 .5 .5],... - 'MarkerFaceColor',[.6 .6 1]); -tr.hldots(1,2) = plot(tr.x(leafIndex),tr.y(leafIndex),'square',... - 'MarkerSize',4,'MarkerEdgeColor',[.5 .5 .5],... - 'MarkerFaceColor','w'); -set(tr.hldots(1),'Xdata',[],'Ydata',[]) -set(tr.hldots(2),'Xdata',[],'Ydata',[]) -tr.axhold = plot([-eps -eps],[0 0],'.','MarkerSize',eps,'Color','w'); -tr.datatip = text(0,1,1,'k','Tag','TreeTag','BackgroundColor',[1 1 .93],... - 'Color', [0 0 0],'EdgeColor', [0.8 0.8 0.8],... - 'VerticalAlignment','Top','Clipping','off',... - 'Visible','off','Fontsize',8,'Interpreter','none'); - -if nargin == 1 || isempty(sel) - tr.selected = false(tr.numLabels,1); % selected nodes -else - % validate sel - if islogical(sel) - if numel(sel)==tr.numLabels - sel = sel(:)==true; - elseif numel(sel)==tr.numLeaves - sel = [sel(:);false(tr.numBranches,1)]; - elseif numel(sel)==tr.numBranches - sel = [false(tr.numLeaves,1);sel(:)]; - else - close(fig) - error('Bioinfo:phytree:view:IncorrectLogical',... - 'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree'); - end - elseif isnumeric(sel) && isreal(sel) && all(sel>=1) && all(sel<=tr.numLabels) - tem(tr.numLabels)=false; - tem(floor(sel))=true; - sel=tem(:); - else - close(fig) - error('Bioinfo:phytree:view:IncorrectTypeofArguments',... - 'Invalid value for NODES'); - end - tr.selected =sel; -end - -% save more figure data needed for the gui functionality -tr.activeNodes = true(tr.numLabels,1); % active nodes -tr.activeBranches = true(tr.numBranches,1); % active Branches -tr.sel2root = false(tr.numLabels,1); % path sel-node to root -tr.editMode = 'Select'; % initial edit mode -tr.indicativeMode = false; % data-tip flag -tr.lastThresholdValue = []; % remembers last cut - -% create uicontrols (will appear as needed, initially invisible) -tr.editBox = uicontrol(fig,'Background',[1 1 1],'style','edit',... - 'visible','off','callback',@doneRenaming); -tr.slider = uicontrol(fig,'style','slider','SliderStep',[.1 .1],... - 'visible','off','callback',@sliderCallback); -tr.slidertx = uicontrol(fig,'style','text','visible','off'); -tr.sliderok = uicontrol(fig,'style','pushbutton','visible','off',... - 'string','OK','callback',@doThresholdCut); - -% setup callback for click over nodes -set([tr.hseldots,tr.hdots,tr.hldots],'ButtonDownFcn',@toggleNode) -% setup figure callback functions -set(fig,'WindowButtonDownFcn',@mouseClickOnFigure); -set(fig,'WindowButtonUpFcn',@mouseRelease); -set(fig,'WindowButtonMotionFcn',@localWindowButtonMotion); - -% setup UIMenus, context menus and toolbar -tr.hToggleUIMenu = makePhyTreeViewerUIMenus(fig); -tr.hToggleToolbar = makePhyTreeViewerToolbar(fig); -[tr.hToggleContextMenu,tr.hAxisContextMenu,tr.hDotsContextMenu] = ... - makePhyTreeViewerContextMenus(fig); -% activate Context Menus -set(tr.ha,'UIContextMenu',tr.hAxisContextMenu); -set([tr.hdots tr.hldots tr.hseldots],'UIContextMenu',tr.hDotsContextMenu); - -set(fig,'UserData',tr) % save figure data - -correctFigureSize(fig, 15 * tr.numLeaves); % resize figure if needed -setupYLabelsListeners; % listeners for YLabels -updateTree(fig,[],[]) % updates figure after all initializations -set(gca,'xLim',[0 max(tr.x)] + max(tr.x) * [-.1 .05]); -tr.yLim = get(tr.ha,'Ylim');tr.xLim = get(tr.ha,'Xlim'); -set(fig,'UserData',tr) % save figure data -toolsmenufcn(fig,'PanY') % set zoom mode to vertical constraining -toolsmenufcn(fig,'ZoomY') % set pan mode to vertical constraining -set(fig,'HandleVisibility','callback') % after all init, make it invisible - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function ylabelsListener(hSrc,event,hf,ha) %#ok -% Auto sizes the ylabels -ratio = max(get(hf,'Position').*[0 0 0 1])/diff(get(ha,'YLim')); -set(ha,'Fontsize',min(9,ceil(ratio/1.7))); % the gold formula -% Also verify if we need to re-position the slidebar of threshold cut -tr=get(hf,'Userdata'); -if any(strcmp(tr.editMode,{'Distance to Leaves','Distance to Root'})) - wS = get(hf,'Position'); % window dimensions - aP = get(tr.ha,'Position'); % axes position - set(tr.slider, 'Position',[aP(1)*wS(3) wS(4)-20 aP(3)*wS(3) 20]) - set(tr.slidertx,'Position',[sum(aP([1 3]))*wS(3) wS(4)-20 60 20]) - set(tr.sliderok,'Position',[sum(aP([1 3]))*wS(3)+60 wS(4)-20 30 20]) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function mouseClickOnFigure(h,varargin) -% This callback function is activated when a mouse button is pressed in any -% location of the figure and under any of my edit modes -tr = get(gcbf,'Userdata'); -switch tr.editMode - case 'Renaming'; doneRenaming(h,varargin); - case 'Distance to Leaves'; cancelThresholdCut(h,varargin); - case 'Distance to Root'; cancelThresholdCut(h,varargin); - case 'Select'; - switch get(gcbf,'SelectionType') - case {'normal','extend'} - tr = get(gcbf,'userdata'); - cp = get(tr.ha,'CurrentPoint'); - xPos = cp(1,1); yPos = cp(1,2); - set(tr.hdragbox,'Visible','on',... - 'Xdata',repmat(xPos,5,1),'Ydata',repmat(yPos,5,1)) - case 'open' - autoFit(h) - end -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function toggleNode(h,varargin) -% This callback function is activated when a mouse button is pressed over -% any of the displayed nodes under any of my edit modes -hideActiveIndicators(h,varargin) -tr = get(gcbf,'Userdata'); -switch get(gcbf,'SelectionType') - case 'normal' - switch tr.editMode - case 'Select'; selectNode(h,varargin); - case 'Inspect'; inspectNode(h,varargin); - case 'Collapse/Expand'; collapseExpand(h,varargin); - case 'Rotate Branch'; rotateBranch(h,varargin); - case 'Rename'; renameNode(h,varargin); - case 'Renaming'; doneRenaming(h,varargin); - case 'Prune'; pruneTree(h,varargin); - case 'Distance to Leaves'; cancelThresholdCut(h,varargin); - case 'Distance to Root'; cancelThresholdCut(h,varargin); - end - case 'extend' - switch tr.editMode - case 'Select'; selectNode(h,varargin); - end - case 'alt' - case 'open' - end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function changeEditMode(h,varargin) %#ok -% Callback function to change the edit mode, this function is -% called from the toolbar, the context menu or the uimenu. -tr = get(gcbf,'Userdata'); -myModes = {'Inspect','Collapse/Expand','Rotate Branch','Rename','Prune'}; - -% first, disable any present edit mode -switch tr.editMode - case {myModes{:},'Select'}; - disableMyContextMenus(h) - disableMyWindowButtonActions(h) - ind = strmatch(tr.editMode,myModes); %#ok - set(tr.hToggleToolbar(ind), 'State','off') - set(tr.hToggleUIMenu(ind), 'Checked','off') - set(tr.hToggleContextMenu(ind),'Checked','off') - %case '&Zoom In'; toolsmenufcn(gcbf,'ZoomIn'); - case '&Zoom In'; zoom(gcbf,'off') - %case 'Zoom &Out'; toolsmenufcn(gcbf,'ZoomOut'); - case 'Zoom &Out'; zoom(gcbf,'off') - %case '&Pan'; toolsmenufcn(gcbf,'Pan'); - case '&Pan'; pan(gcbf,'off'); - case {'Distance to Leaves','Distance to Root'} - enableAllUI(h) - disableMyWindowButtonActions(h) -end - -% depending on the caller instance, determine the new edit mode -switch get(h,'Type') - case 'uimenu'; newEditMode = get(h,'Label'); - case 'uitoggletool' - newEditMode = get(h,'Tag'); - switch newEditMode - case 'Exploration.ZoomIn'; newEditMode = '&Zoom In'; - case 'Exploration.ZoomOut'; newEditMode = 'Zoom &Out'; - case 'Exploration.Pan'; newEditMode = '&Pan'; - end - otherwise; newEditMode = 'Select'; -end -%disp( [tr.editMode ' --> ' newEditMode] ) -% if new mode is the same then we are toggling off -if strcmp(newEditMode,tr.editMode) - newEditMode = 'Select'; -end - -% if changing to Prune, verify the warnign has been accepted -if strcmp(newEditMode,'Prune') - propsForFigure = getappdata(gcbf,'propsForFigure'); - if isequal(propsForFigure.PruneWarning,'NotDone') - warndlg(['Pruning nodes cannot be undone. Before continuing,',... - ' you may want to export the current tree to a new tool.'],... - 'Warning','modal') - setacceptedwarningtoothertools - end -end - -switch newEditMode - case '&Zoom In'; toolsmenufcn(gcbf,'ZoomIn'); - case 'Zoom &Out'; toolsmenufcn(gcbf,'ZoomOut'); - %case '&Pan'; toolsmenufcn(gcbf,'Pan'); - case '&Pan'; pan(gcbf,'on'); - case myModes; enableMyContextMenus(h) - enableMyWindowButtonActions(h) - ind = strmatch(newEditMode,myModes); %#ok - set(tr.hToggleToolbar(ind), 'State','on') - set(tr.hToggleUIMenu(ind), 'Checked','on') - set(tr.hToggleContextMenu(ind),'Checked','on') - case 'Select'; enableMyContextMenus(h) - enableMyWindowButtonActions(h) - case {'Distance to Leaves','Distance to Root'} - disableAllUI(h) - enableMyWindowButtonActions(h) - set(gcbf,'WindowButtonMotionFcn',[]) - set(gcbf,'WindowButtonUpFcn',[]) - set([tr.hseldots,tr.hdots,tr.hldots],'ButtonDownFcn',[]) -end - -switch newEditMode - case 'Inspect'; if sum(tr.selected(:)) ~= 1 - tr.selected(:) = false; - tr.selected(end) = true; - end - otherwise -end - -tr.sel2root = path2root(tr, tr.selected); -tr.editMode = newEditMode; -set(gcbf,'userdata',tr) -updateTree(gcbf,[],[]) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function hideActiveIndicators(h,varargin) %#ok -tr = get(gcbf,'userdata'); -set([tr.hpathline,tr.datatip],'visible','off') - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function disableAllUI(h,varargin) %#ok -hw = findall(gcbf,'Type','uimenu','Parent',gcbf); -gw = findall(gcbf,'Type','UIToggleTool'); -set([hw;gw],'Enable','off') - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function enableAllUI(h,varargin) %#ok -hw = findall(gcbf,'Type','uimenu','Parent',gcbf); -gw = findall(gcbf,'Type','UIToggleTool'); -set([hw;gw],'Enable','on') - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function disableMyContextMenus(h,varargin) %#ok -tr = get(gcbf,'userdata'); -set(tr.ha,'UIContextMenu',[]); -set([tr.hdots tr.hseldots],'UIContextMenu',[]); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function enableMyContextMenus(h,varargin) %#ok -tr = get(gcbf,'userdata'); -set(tr.ha,'UIContextMenu',tr.hAxisContextMenu); -set([tr.hdots tr.hldots tr.hseldots],'UIContextMenu',tr.hDotsContextMenu); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function disableMyWindowButtonActions(h,varargin) %#ok -set(gcbf,'WindowButtonDownFcn',[]); -set(gcbf,'WindowButtonUpFcn',[]) -set(gcbf,'WindowButtonMotionFcn',[]); -tr = get(gcbf,'userdata'); -set([tr.hseldots,tr.hdots,tr.hldots],'ButtonDownFcn',[]) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function enableMyWindowButtonActions(h,varargin) %#ok -set(gcbf,'WindowButtonDownFcn',@mouseClickOnFigure); -set(gcbf,'WindowButtonUpFcn',@mouseRelease); -set(gcbf,'WindowButtonMotionFcn',@localWindowButtonMotion); -tr = get(gcbf,'userdata'); -set([tr.hseldots,tr.hdots,tr.hldots],'ButtonDownFcn',@toggleNode) -set(gcbf,'KeyPressFcn',[]); -set(gcbf,'Pointer','arrow'); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function localWindowButtonMotion(h,varargin) %#ok -% Callback function activated when moving over the axes, checks location of -% the mouse and puts datatip if over an active node. - -tr = get(h,'userdata'); -% set a virtual grid to get the point -xThres=diff(get(tr.ha,'Xlim'))/100; -yThres=diff(get(tr.ha,'Ylim'))/100; -cp = get(tr.ha,'CurrentPoint'); -xPos = cp(1,1); yPos = cp(1,2); -hp = tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres); -hp = find (hp & tr.activeNodes); - -% shortcut out when dragging a box in select mode -if strcmp(get(tr.hdragbox,'Visible'),'on') - xdata = get(tr.hdragbox,'XData');xdata([3,4]) = xPos; - ydata = get(tr.hdragbox,'YData');ydata([2,3]) = yPos; - set(tr.hdragbox,'XData',xdata,'YData',ydata) -% shortcut out when turning off 'indicative' mode -elseif tr.indicativeMode && isempty(hp) %&& isempty(tr.highligth) - set([tr.datatip tr.hpathline],'visible','off') - set(tr.hlines,'color','black') - set(tr.hldots(1),'Xdata',[],'Ydata',[]) - set(tr.hldots(2),'Xdata',[],'Ydata',[]) -% turn on or update 'indicative' mode -elseif numel(hp) % && isempty(tr.highligth) - % find leaves (children) below this branch - children = false(1,tr.numLabels); - children(hp(1)) = true; - for ind = hp(1)-tr.numLeaves:-1:1 - if children(ind+tr.numLeaves) - children(tr.tree(ind,:))=true; - end - end - - % find and draw path to selected - if strcmp(tr.editMode,'Inspect') - [pathA,pathB] = path2sel(tr,hp(1)); - dis2sel = tr.x(find(pathA,1))+tr.x(find(pathB,1))... - -2*tr.x(find(pathA,1,'last')); - if any(pathB) - xx = [tr.x(pathA);NaN;tr.x(pathB)]; - yy = [tr.y(pathA);NaN;tr.y(pathB)]; - hh=zeros(2*numel(xx),1); hh(1:2:end)=1; hh=cumsum(hh); - set(tr.hpathline,'XData',xx(hh(2:end)),... - 'YData',yy(hh(1:end-1)),'Visible','on'); - end - end - - % place text - name = [tr.names{hp(1)} ' ']; - name(name=='_')=' '; - children(hp(1)) = false; - numChil = sum(children(1:tr.numLeaves)); - childrenNames = char(tr.names(children(1:tr.numLeaves))); - childrenNames(childrenNames=='_')=' '; - childrenNames=[repmat(' ',size(childrenNames,1),1) childrenNames]; - switch tr.editMode - case 'Inspect' - if numChil - set(tr.datatip,'string',char( [ {name; ... - ['Dist to parent: ' num2str(tr.dist(hp(1)))];... - ['Dist to root: ' num2str(tr.x(hp(1))-tr.x(end))];... - ['Path length: ' num2str(dis2sel)];... - ['Samples: ' num2str(numChil)]};... - mat2cell(childrenNames,ones(size(childrenNames,1),1),... - size(childrenNames,2))])) - extraLines = 5; - else - set(tr.datatip,'string',char( {name; ... - ['Dist to parent: ' num2str(tr.dist(hp(1)))];... - ['Dist to root: ' num2str(tr.x(hp(1))-tr.x(end))];... - ['Path length: ' num2str(dis2sel)]})) - extraLines = 4; - end - case {'Collapse/Expand','Rotate Branch','Rename','Prune','Select'} - if numChil - set(tr.datatip,'string',char([ - {[name ' (' num2str(numChil) ' samples)']};... - mat2cell(childrenNames,ones(size(childrenNames,1),1),... - size(childrenNames,2))])) - else - set(tr.datatip,'string', name) - end - extraLines = 1; - otherwise % all other modes - end - - %compute some values before adjusting data tip - fp = get(gcbf,'Position'); % fig position in points - fh = fp(4);%fw = fp(3); % fig size (height & width) in points - ap = get(tr.ha,'Position'); % axis position normalized - yl = ylim(tr.ha); yl = yl - ... - [ap(2) ap(2)+ap(4)-1]*diff(yl)/ap(4); % fig height limits in axis units - xl = xlim(tr.ha); xl = xl - ... - [ap(1) ap(1)+ap(3)-1]*diff(xl)/ap(3); % fig width limits in axis units - yPosPt = (-yPos -4*yThres + yl(2))*fh/diff(yl); % datatip position in pts - reqPt = (numChil+extraLines)*14+2; % required datatip height in pts - % adjust if other fontsize is used - - %adjust string of datatip if it will not fit (i.e. remove names) - if reqPt > fh - str = get(tr.datatip,'String'); - set(tr.datatip,'String',str(1:extraLines,:)); - reqPt = extraLines*14+2; - end - - %adjust vertical position of datatip just below cp - topEdge = yl(2)-min(fh,max(yPosPt,reqPt))*diff(yl)/fh; - switch tr.editMode - case {'Collapse/Expand','Rotate Branch','Prune'} - % datatip usually to the left of cp to see shadowing of branches - datatipExtent = get(tr.datatip,'Extent'); - datatipWidth = datatipExtent(3); - rightEdge = max(xPos-3*xThres,xl(1)+datatipWidth); - % is the datatip over cp ? - if rightEdge>xPos && topEdgemin(xdata) & ... - tr.ymin(ydata) & tr.activeNodes) ; -if (strcmp(get(gcbf,'SelectionType'),'normal') && ... - strcmp(get(tr.hdragbox,'visible'),'on')) - tr.selected(:) = false; -end -tr.selected(hp) = true; -tr.sel2root = path2root(tr,tr.selected); -set(tr.hdragbox,'Visible','off') -set(gcbf,'userdata',tr) -updateTree(gcbf,[],[]) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function selectNode(h,varargin) %#ok -% Callback function to select a Node. -% Entry points: from 1) the dots context menu or 2) toggle node -tr = get(gcbf,'userdata'); -% set a virtual grid to get the point -xThres=diff(get(tr.ha,'Xlim'))/100; -yThres=diff(get(tr.ha,'Ylim'))/100; -cp = get(tr.ha,'CurrentPoint'); -xPos = cp(1,1); yPos = cp(1,2); -hp = find(tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)) ; - -if numel(hp) - set(tr.hdragbox,'visible','off') - temp = tr.selected(hp(1)); - switch get(gcbf,'SelectionType') - case 'normal'; tr.selected(:) = false; - case 'alt'; if ~temp - tr.selected(:) = false; - end - temp=false; - end - tr.selected(hp(1)) = ~temp; - tr.sel2root = path2root(tr,tr.selected); - set(gcbf,'userdata',tr) - updateTree(gcbf,[],[]) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function inspectNode(h,varargin) %#ok -% Callback function to inspect the reference Node. -% Entry points: from 1) the dots context menu or 2) toggle node -tr = get(gcbf,'userdata'); -% set a virtual grid to get the point -xThres=diff(get(tr.ha,'Xlim'))/100; -yThres=diff(get(tr.ha,'Ylim'))/100; -cp = get(tr.ha,'CurrentPoint'); -xPos = cp(1,1); yPos = cp(1,2); -hp = find(tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)) ; - -if numel(hp) - temp = tr.selected(hp(1)); - tr.selected(:) = false; - tr.selected(hp(1)) = ~temp; - tr.sel2root = path2root(tr,tr.selected); - set(gcbf,'userdata',tr) - updateTree(gcbf,[],[]) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function collapseExpand(h,varargin) %#ok -% Callback function to Collapse/Expand a branch. -% Entry points: from 1) the dots context menu or 2) toggle node - -tr = get(gcbf,'userdata'); -if strcmp(get(h,'Type'),'uimenu') % come from a context menu - hp = find(tr.selected(tr.numLeaves+1:tr.numLabels)); -else - % set a virtual grid to get the point - xThres=diff(get(tr.ha,'Xlim'))/100; - yThres=diff(get(tr.ha,'Ylim'))/100; - cp = get(tr.ha,'CurrentPoint'); - xPos = cp(1,1); yPos = cp(1,2); - hp = find(tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)) ; - hp=hp(hp>tr.numLeaves)-tr.numLeaves; - if numel(hp) - hp=hp(1); %just in case it picked two points - end -end - -if numel(hp) - for ind = 1:numel(hp) - tr.activeBranches(hp(ind))=~tr.activeBranches(hp(ind)); - activeBranches=find(tr.activeBranches)'; - % find active nodes by expanding active Branches - tr.activeNodes(:)=false; - tr.activeNodes(tr.numLabels,1)=true; - for k = activeBranches(end:-1:1) - tr.activeNodes(tr.tree(k,:))=tr.activeNodes(k+tr.numLeaves); - end - tr.selected(:) = false; - tr.selected(hp(ind)+tr.numLeaves) = true; - end - tr.sel2root = path2root(tr,tr.selected); - set(gcbf,'userdata',tr) - updateTree(gcbf,[],hp(end)+tr.numLeaves) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function rotateBranch(h,varargin) %#ok -% Callback function to rotate a branch reordering the leaves. -% Entry points: from 1) the dots context menu or 2) toggle node -tr = get(gcbf,'userdata'); -if strcmp(get(h,'Type'),'uimenu') % come from a context menu - hp = find(tr.selected(tr.numLeaves+1:tr.numLabels)); -else - % set a virtual grid to get the point - xThres=diff(get(tr.ha,'Xlim'))/100; - yThres=diff(get(tr.ha,'Ylim'))/100; - cp = get(tr.ha,'CurrentPoint'); - xPos = cp(1,1); yPos = cp(1,2); - hp = find(tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)) ; - hp=hp(hp>tr.numLeaves)-tr.numLeaves; - if numel(hp) - hp=hp(1); %just in case it picked two points - end -end -if numel(hp) - for ind = 1:numel(hp) - %find Leaves for every child - childrenA = false(1,tr.numLabels); - childrenA(tr.tree(hp(ind),1)) = true; - for k = tr.tree(hp(ind),1)-tr.numLeaves:-1:1 - if childrenA(k+tr.numLeaves) - childrenA(tr.tree(k,:))=true; - end - end - childrenB = false(1,tr.numLabels); - childrenB(tr.tree(hp(ind),2)) = true; - for k = tr.tree(hp(ind),2)-tr.numLeaves:-1:1 - if childrenB(k+tr.numLeaves) - childrenB(tr.tree(k,:))=true; - end - end - permuta = 1:tr.numLabels; - chA = find(childrenA(1:tr.numLeaves)); - chB = find(childrenB(1:tr.numLeaves)); - if chA(1)(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)) ; -if numel(hp) - tr.previousMode = tr.editMode; tr.editMode = 'Renaming'; - xBoxPos = (.02+(xPos-Xlim(1))/diff(Xlim))*aPos(3)+aPos(1); - yBoxPos = (.02+(Ylim(2)-yPos)/diff(Ylim))*aPos(4)+aPos(2); - position=get(gcbf,'position'); - position=[position(3)*xBoxPos position(4)*yBoxPos 150 20]; - set(tr.editBox,'position',position); - set(tr.editBox,'Visible','on','string',tr.names{hp(1)},'Value',hp(1)) - disableAllUI(h) - disableMyContextMenus(h) - set(gcbf,'WindowButtonMotionFcn',[]); % disable windows mouse motion - set(gcbf,'userdata',tr) - end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function doneRenaming(h,varargin) %#ok -% Output helper function to abandon the "Renaming" mode -tr = get(gcbf,'userdata'); -tr.editMode = tr.previousMode; -tr.names{get(tr.editBox,'Value')} = get(tr.editBox,'String'); -set(tr.editBox,'Visible','off') -set(gcbf,'userdata',tr) -updateTree(gcbf,[],[]); -enableAllUI(h) -enableMyContextMenus(h) -enableMyWindowButtonActions(h) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function pruneTree(h,varargin) %#ok -% Callback function to prune the tree, this function is complex because not -% only the basic structure is updated but all the other handles are -% also updated to contain the new tree. - -% if changing to Prune, verify the warning has been accepted -propsForFigure = getappdata(gcbf,'propsForFigure'); -if isequal(propsForFigure.PruneWarning,'NotDone') - warndlg(['Pruning nodes cannot be undone. Before continuing,',... - ' you may want to export the current tree to a new tool.'],... - 'Warning','modal') - setacceptedwarningtoothertools - return % do not do this pruning -end - -tr = get(gcbf,'userdata'); -if strcmp(get(h,'Type'),'uimenu') % comes from a context menu - hp = find(tr.selected); -else - tr = get(gcbf,'userdata'); - Xlim=get(tr.ha,'Xlim');Ylim=get(tr.ha,'Ylim'); - % aPos=get(tr.ha,'Position'); - % set a virtual grid to get the point - xThres=diff(Xlim)/100; - yThres=diff(Ylim)/100; - cp = get(tr.ha,'CurrentPoint'); - xPos = cp(1,1); yPos = cp(1,2); - hp = find(tr.x<(xPos+xThres) & tr.x>(xPos-xThres) & ... - tr.y<(yPos+yThres) & tr.y>(yPos-yThres)); - hp=hp(1); %just in case it picked two points -end - -hp(hp==tr.numLabels)=[]; %cannot delete the root - -while numel(hp) - %find all nodes to purge (i.e. all descendants) - children = false(1,tr.numLabels); - children(hp(1)) = true; - for k = hp(1)-tr.numLeaves:-1:1 - if children(k+tr.numLeaves) - children(tr.tree(k,:))=true; - end - end - mypar = tr.par(hp(1)); % parent - if mypar < tr.numLabels % my parent is NOT the root - % connect brother to granparent - mygrpar = tr.par(mypar); % grandparent - myuncle = setxor(tr.tree(mygrpar-tr.numLeaves,:),mypar); % uncle - mybro = setxor(tr.tree(mypar-tr.numLeaves,:),hp(1)); % brother - tr.tree(mygrpar-tr.numLeaves,:) = [myuncle mybro]; - tr.dist(mybro) = tr.dist(mybro) + tr.dist(mypar); - temp = get(tr.hlines(mygrpar-tr.numLeaves),'Xdata'); - temp([1 4])=tr.x(tr.tree(mygrpar-tr.numLeaves,:)); - set(tr.hlines(mygrpar-tr.numLeaves),'Xdata',temp); - highlight = [mybro,mygrpar]; - else % if my parent is the root, now I am the new root - temp=cell2mat(get(tr.hlines,'Xdata'))-tr.dist(end); - for k = 1:tr.numBranches - set(tr.hlines(k),'Xdata',temp(k,:)); - end - highlight = setxor(tr.tree(mypar-tr.numLeaves,:),hp(1)); - end - children(mypar) = true; %also delete my par - % find indexes to change tree - permuta = 1:tr.numLabels; - permuta(children) = []; - ipermuta = zeros(1,tr.numLabels); - ipermuta(permuta) = 1:length(permuta); - permutaBranches = permuta(permuta>tr.numLeaves)-tr.numLeaves; - % update all tree structure fields - tr.names = tr.names(permuta); - tr.dist = tr.dist(permuta); - tr.tree = tr.tree(permutaBranches,:); - tr.tree = ipermuta(tr.tree); - if isempty(tr.tree) - return; - end % one leaf, no branches ! - tr = doBasicCalculations(tr); - hlines = tr.hlines; - tr.hlines = tr.hlines(permuta); - delete(setxor(hlines,tr.hlines)); - tr.activeNodes = tr.activeNodes(permuta); - tr.activeBranches = tr.activeBranches(permutaBranches); - tr.selected = tr.selected(permuta); - tr.selected(:) = false; - tr.selected(ipermuta(highlight)) = true; - tr.sel2root = path2root(tr,tr.selected); - % update the vector with nodes to prune (node index has changed) - hp=ipermuta(hp); - hp(1)=[]; - hp(hp==0)=[]; - set(gcbf,'userdata',tr) - updateTree(gcbf,[],[]) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function colorDown(h,varargin) %#ok -% Color Down a branch. -% Entry points: from 1) the dots context menu or 2) toggle node -disp('Color Down a branch. Not implemented YET !!!') - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function findNode(h,varargin) %#ok -treefig = gcbf; -tr = get(treefig,'userdata'); -s = inputdlg('Regular Expression to match ?','Find Leaf/Branch',1); -if ~isempty(s) - hc=regexpi(regexprep(tr.names,'_',' '),s); - h = false(1,tr.numLabels); - for ind = 1:tr.numLabels - if ~isempty(hc{ind}) - h(ind)=true; - end - end - hf = find(h); - for ind = 1:length(hf) - while ~tr.activeNodes(hf(ind)) - hf(ind)=tr.par(hf(ind)); - end - end - tr.selected(:) = false; - tr.selected(hf) = true; - tr.sel2root = path2root(tr,tr.selected); % update path to root - set(treefig,'Userdata',tr); - updateTree(treefig,[],[]) - - % if selected are out of current view then fit the tree - if (any(min(ylim(tr.ha))>tr.y(tr.selected)) || ... - any(max(ylim(tr.ha))= Value; - set(tr.slidertx,'String',num2str(Value)) -end -set(gcbf,'Userdata',tr); -toshow = [tr.tocollapse(tr.par(1:tr.numLabels-1));0]&tr.tocollapse; -mask = (1:tr.numLabels)'>tr.numLeaves; -% update light lines -set(tr.hlines(~toshow),'color','k') -set(tr.hlines(toshow),'color',[.87 .87 .87]) -% update light dots -set(tr.hldots(1),'Ydata',tr.y(toshow&mask&tr.activeNodes),... - 'Xdata',tr.x(toshow&mask&tr.activeNodes)) -set(tr.hldots(2),'Ydata',tr.y(toshow&~mask&tr.activeNodes),... - 'Xdata',tr.x(toshow&~mask&tr.activeNodes)) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function doThresholdCut(h,varargin) %#ok -% this helper function inactivates nodes based on the threshold cut selected -% with the slider. Entry point: the only way to get into this function is -% by the 'OK' uicontrol next to the slider. -tr = get(gcbf,'userdata'); -tr.activeBranches = tr.activeBranches & ~tr.tocollapse(tr.numLeaves+1:tr.numLabels); -% find active nodes by expanding active Branches -activeBranches=find(tr.activeBranches)'; -tr.activeNodes(:)=false; -tr.activeNodes(tr.numLabels,1)=true; -for ind = activeBranches(end:-1:1) - tr.activeNodes(tr.tree(ind,:))=tr.activeNodes(ind+tr.numLeaves); -end -tr.lastThresholdValue = get(tr.slider,'Value'); -set([tr.slider,tr.slidertx,tr.sliderok],'Visible','off') -tr.selected(:) = false; -tr.sel2root = path2root(tr,tr.selected); -set(gcbf,'userdata',tr) -changeEditMode(h); -updateTree(gcbf,[],[]); -autoFit(h) -enableAllUI(h) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function cancelThresholdCut(h,varargin) %#ok -% this helper function cancels the threshold cut mode and returns to the -% 'select' mode. Entry point: the ways to get into this function is -% by the 'CANCEL' uicontrol next to the slider (does not exist yet) or by -% mouse click over the axes diring the slider mode. -tr = get(gcbf,'userdata'); -set([tr.slider,tr.slidertx,tr.sliderok],'Visible','off') -changeEditMode(h); -updateTree(gcbf,[],[]); -enableAllUI(h) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function expandAll(h,varargin) %#ok -% Callback function to expand all hidden nodes -tr = get(gcbf,'userdata'); -x=[0 inf]; -[dump,anchor]=min(abs((mean(ylim)-tr.y))+x(1+tr.activeNodes)'); %#ok -tr.activeBranches(:) = true; -tr.activeNodes(:) = true; -set(gcbf,'Userdata',tr); -updateTree(gcbf,[],anchor) -autoFit(h) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to save tree -function saveNewick(h,varargin) %#ok -tr = get(gcbf,'userdata'); -newtr.tree = tr.tree; -newtr.dist = tr.dist; -newtr.names = tr.names; -phytreewrite(phytree(newtr),'GUI',true); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to restore the original tree -function restoreTree(h,varargin) %#ok -tr = getappdata(gcbf,'backupTree'); -view(phytree(tr)) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to load tree -function loadNewick(h,varargin) %#ok -if strcmp(get(h,'Type'),'uimenu') % if caller is the uimenu then needs - figtoclose = gcbf; - tr = phytreeread; % to pick a file - -else % if not, caller is the callback from get workspace var - tr=[]; - pfig = get(h,'Parent'); - if strcmp(get(h,'string'),'Import') || ... - (strcmp(get(h,'style'),'listbox') && strcmp(get(gcbf,'SelectionType'),'open')) - hp = get(pfig,'Userdata'); - figtoclose = hp(4); - ops = get(hp(1),'string'); - if ~isempty(ops) - tr = evalin('base',ops(get(hp(1),'value'),:)); - end - close(pfig); - elseif strcmp(get(h,'string'),'Cancel') - close(pfig); - end -end -if ~isempty(tr) - propsForFigure = getappdata(figtoclose,'propsForFigure'); - view(tr,[],propsForFigure); - close(figtoclose) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to close tree -function closeNewick(h,varargin) %#ok -close(gcbf) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to copy the tree to a figure -function doPublishFigure(h,varargin,AllNodes,callerIsContextMenu) %#ok - -pfig = get(h,'Parent'); -if strcmp(get(h,'string'),'Cancel') - close(pfig); - return; -end - -% get options from the publishdlg window and close it -hp = get(pfig,'Userdata'); -vfig = hp(7); -va = get(hp(1:6),'Value'); -va = [va{:}]; -switch find(va(1:3)) - case 1; args = {'type','square'}; - case 2; args = {'type','angular'}; - case 3; args = {'type','radial'}; -end -args = {args{:},'bra',va(4),'lea',va(5),'ter',va(6)}; -close(pfig); - -% now select the branch to publish based on selected points -tr = get(vfig,'userdata'); -if ~callerIsContextMenu & ~tr.selected %#ok % if called from the uimenu and - tr.selected(end) = true; % nothing is selected pick the root -end -selected = find(tr.selected); -commonpath = true(tr.numLabels,1); -for ind = 1:numel(selected) - commonpath = commonpath & path2root(tr,selected(ind)); -end -branchtoexp = find(commonpath,1); -tr.selected(:) = false; -tr.selected(branchtoexp) = true; -set(vfig,'userdata',tr) -updateTree(vfig,[],[]); - -hp = branchtoexp; -%find all nodes to export (i.e. all descendants) -children = false(1,tr.numLabels); -children(hp) = true; -if AllNodes - for ind = hp-tr.numLeaves:-1:1 - if children(ind+tr.numLeaves) - children(tr.tree(ind,:))=true; - end - end - permuta = find(children); -else - for ind = hp-tr.numLeaves:-1:1 - if children(ind+tr.numLeaves) - children(tr.tree(ind,:))=tr.activeNodes(tr.tree(ind,:)); - end - end - braToLea = find(~tr.activeNodes(tr.tree(:,1)))+tr.numLeaves; - expBran = find(children(tr.numLeaves+1:end)) + tr.numLeaves; - permuta = [find(children(1:tr.numLeaves)) ... - intersect(expBran,braToLea)]; - [dump,hs] = sort(tr.y(permuta)); %#ok - permuta = [permuta(hs) setdiff(expBran,braToLea)]; -end - -if sum(children)>1 % enough leaves to export ? - newtr = phytree; - ipermuta(permuta) = 1:length(permuta); - numLeaves = (ipermuta(end) + 1)/2; - newtr.tree = ipermuta(tr.tree(permuta(numLeaves+1:end)-tr.numLeaves,:)); - newtr.dist = tr.dist(permuta); - newtr.names = tr.names(permuta); - plot(newtr,true(length(newtr.tree),1),args{:}) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function for autofit -function autoFit(h,varargin) %#ok -tr = get(gcbf,'userdata'); -set(tr.ha,'Ylim',[min(tr.y(tr.activeNodes))-1,max(tr.y(tr.activeNodes))+1]); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Callback function to Reset view -function myResetView(h,varargin) %#ok -tr = get(gcbf,'userdata'); -set(tr.ha,'Ylim',tr.yLim); -set(tr.ha,'Xlim',tr.xLim); - - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Common export function -function exportSubtree(h,varargin,AllNodes,ToWS,callerIsContextMenu) %#ok - -tr = get(gcbf,'userdata'); -% if called from the uimenu and nothing is selected pick the root -if ~callerIsContextMenu & ~tr.selected %#ok - tr.selected(end) = true; -end - -selected = find(tr.selected); -commonpath = true(tr.numLabels,1); -for ind = 1:numel(selected) - commonpath = commonpath & path2root(tr,selected(ind)); -end -branchtoexp = find(commonpath,1); -tr.selected(:) = false; -tr.selected(branchtoexp) = true; - -set(gcbf,'userdata',tr) -updateTree(gcbf,[],[]); -doExport(branchtoexp,AllNodes,ToWS) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Common export function after having selected a point -function doExport(hp,AllNodes,ToWS) - -tr = get(gcbf,'userdata'); - -%find all nodes to export (i.e. all descendants) -children = false(1,tr.numLabels); -children(hp) = true; -if AllNodes - for ind = hp-tr.numLeaves:-1:1 - if children(ind+tr.numLeaves) - children(tr.tree(ind,:))=true; - end - end - permuta = find(children); -else - for ind = hp-tr.numLeaves:-1:1 - if children(ind+tr.numLeaves) - children(tr.tree(ind,:))=tr.activeNodes(tr.tree(ind,:)); - end - end - braToLea = find(~tr.activeNodes(tr.tree(:,1)))+tr.numLeaves; - expBran = find(children(tr.numLeaves+1:end)) + tr.numLeaves; - permuta = [find(children(1:tr.numLeaves)) ... - intersect(expBran,braToLea)]; - [dump,hs] = sort(tr.y(permuta)); %#ok - permuta = [permuta(hs) setdiff(expBran,braToLea)]; -end -if sum(children)>1 % enough leaves to export ? - newtr=phytree; - ipermuta(permuta) = 1:length(permuta); - numLeaves = (ipermuta(end) + 1)/2; - newtr.tree = ipermuta(tr.tree(permuta(numLeaves+1:end)-tr.numLeaves,:)); - newtr.dist = tr.dist(permuta); - newtr.names = tr.names(permuta); - if ToWS % export to workspace ? - s = inputdlg('Workspace variable name ?','Export to Workspace',1); - while ~(isempty(s) || isvarname(s{1}) || isempty(s{1})) - s = inputdlg('Not a valid variable name, type a MATLAB variable name ?','Export to Workspace',1); - end - if ~(isempty(s) || isempty(s{1})) - assignin('base',s{1},newtr) - end - else % no, then export to other viewer - view(newtr); - end -end - - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function updateTree(h,highLight,anchor) %#ok -% Redraws the tree depending on the active Branches -% rather than erase and redraw, we only change specific fields in hlines -% and hdots. - -tr = get(h,'userdata'); -activeBranches=find(tr.activeBranches)'; -oldPos = tr.y(anchor); - -% propagate last leaf -lastleaf = 1:tr.numLabels; -for ind = tr.numBranches:-1:1 - if ~tr.activeNodes(tr.tree(ind,1)) - lastleaf(tr.tree(ind,:))=lastleaf(ind+tr.numLeaves); - end -end - -% find x coordinates of branches -tr.x = tr.dist; -for ind = tr.numBranches:-1:1 - tr.x(tr.tree(ind,:)) = tr.x(tr.tree(ind,:)) + tr.x(ind+tr.numLeaves); -end - -% find y coordinates of branches -dummy = lastleaf([true,diff(lastleaf(1:tr.numLeaves))~=0]); -tr.y=zeros(tr.numLabels,1); -tr.y(dummy)=1:length(dummy); -for ind = activeBranches - tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:))); -end - -% update right labels -todis = tr.names(dummy); -set(tr.ha,'ytick',1:length(dummy),'yticklabel',todis) - -% show only active branches -set(tr.hlines,'Visible','off') -set(tr.hlines(tr.activeNodes),'Visible','on') - -% update coordinates in lines -for ind = 1:tr.numLabels-1 - set(tr.hlines(ind),'Ydata',tr.y([ind,ind,tr.par(ind)])) - set(tr.hlines(ind),'Xdata',tr.x([ind,tr.par([ind ind])])) -end -set(tr.hlines(tr.numLabels),'Ydata',tr.y(tr.numLabels)*[1 1 1]) -set(tr.hlines(tr.numLabels),'Xdata',tr.x(tr.numLabels)*[1 1 1]) - -% update dots -mask = false(tr.numLabels,1); mask(1:tr.numLeaves) = true; -set(tr.hdots(1),'Ydata',tr.y(tr.activeNodes&~mask),'Xdata',tr.x(tr.activeNodes&~mask)) -set(tr.hdots(2),'Ydata',tr.y(tr.activeNodes&mask),'Xdata',tr.x(tr.activeNodes&mask)) - -% update red dots -set(tr.hseldots(1),'Ydata',tr.y(tr.activeNodes&~mask&tr.selected),'Xdata',tr.x(tr.activeNodes&~mask&tr.selected)) -set(tr.hseldots(2),'Ydata',tr.y(tr.activeNodes&mask&tr.selected),'Xdata',tr.x(tr.activeNodes&mask&tr.selected)) - -% set the axis holders -set(tr.axhold,'Ydata',[0.5,max(tr.y(tr.activeNodes))+0.5]) -if numel(oldPos) - set(tr.ha,'ylim',get(tr.ha,'ylim')+tr.y(anchor)-oldPos); -else - set(tr.ha,'ylim',get(tr.ha,'ylim')) % just touch 'YLim' such that the listener is triggered -end - -% turn on indicative modes -tr.indicativeMode = false; -set([tr.datatip tr.hpathline],'visible','off') -set(tr.hlines,'color','black') -set(tr.hldots(1),'Xdata',[],'Ydata',[]) -set(tr.hldots(2),'Xdata',[],'Ydata',[]) - -% save figure data -set(h,'Userdata',tr) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function path2r = path2root(tr,from) -% helper function, finds path to root -path2r = false(tr.numLabels,1); -if (numel(from)~=1 && sum(from)~=1) - return; -end -path2r(from) = true; -temp = find(path2r); -if numel(temp) - while temp~=tr.numLabels; - temp = tr.par(temp); - path2r(temp) = true; - end -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function [pathA,pathB] = path2sel(tr,from) -% helper function, finds path to selected node -path2rt = path2root(tr,from); -commonPath = tr.sel2root & path2rt; -commonPath(find(commonPath,1)) = false; -pathB = tr.sel2root & ~commonPath; -pathA = path2rt & ~commonPath; - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function tr = doBasicCalculations(tr) -% helper function to compute and find some features of the tree -tr = struct(tr); -tr.numBranches = size(tr.tree,1); -tr.numLeaves = tr.numBranches + 1; -tr.numLabels = tr.numBranches + tr.numLeaves; - -% obtain parents for every node -tr.par(tr.tree(:)) = tr.numLeaves + [1:tr.numBranches 1:tr.numBranches]; - -% calculate the distance to the closest leaf for every node -% needed for fast threshold cut -tr.dist2Leaf = zeros(tr.numLabels,1); -for ind = 1:tr.numBranches - tr.dist2Leaf(ind+tr.numLeaves) = ... - min(tr.dist2Leaf(tr.tree(ind,:))+tr.dist(tr.tree(ind,:))); -end - -% calculate drawing coordinates for the tree: x coordinated will never -% change, but y coordinates may change depending on the active branches and -% nodes. -tr.x = tr.dist; tr.y=[1:tr.numLeaves zeros(1,tr.numBranches)]'; -for ind = tr.numBranches:-1:1 - tr.x(tr.tree(ind,:)) = tr.x(tr.tree(ind,:)) + tr.x(ind+tr.numLeaves); -end -for ind =1:tr.numBranches - tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:))); -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function correctFigureSize(fig,recommendedHeight) -% helper function to increase initial figure size depending on the screen & -% tree sizes -screenSize = diff(reshape(get(0,'ScreenSize'),2,2),[],2)-100; - % 100 gives extra space for the figure header and win toolbar -position = get(fig,'Position'); -if recommendedHeight > position(4) - if recommendedHeight < sum(position([2 4])) - position(2) = sum(position([2 4])) - recommendedHeight; - position(4) = recommendedHeight; - elseif recommendedHeight < screenSize(2) - position(2) = 30; - position(4) = recommendedHeight; - else - position(2) = 30; - position(4) = screenSize(2); - end - set(fig,'Position',position) -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function hToggleToolbar = makePhyTreeViewerToolbar(fig) -% helper function to set the toolbar -% -% hToggleToolbar contains handles to easy change the state on/off when -% changing modes - -oldSH = get(0,'ShowHiddenHandles'); -set(0,'ShowHiddenHandles','on') - -set(fig,'toolbar','figure') % needs to update because uicontrols turn it off - -% Fix toolbar options, we keep: ZoomIn,ZoomOut,Pan -hw = findall(fig,'type','uitoolbar'); -hf = get(hw,'Children'); -h1 = findall(hf,'Tag','Exploration.Pan'); -h2 = findall(hf,'Tag','Exploration.ZoomOut'); -h3 = findall(hf,'Tag','Exploration.ZoomIn'); -delete(setxor(hf,[h1,h2,h3])) -set([h1 h2 h3],'Separator','off','clickedCallback',@changeEditMode); - -% load icons -load(fullfile(matlabroot,'toolbox','bioinfo','bioinfo','@phytree','phytreeicons')) - -h4 = uitoggletool('ToolTip','Inspect Tool Mode','separator','on',... - 'Tag','Inspect', 'CData',icons(:,:,1:3)); %#ok -h5 = uitoggletool('ToolTip','Collapse/Expand Branch Mode',... - 'Tag','Collapse/Expand','CData',icons(:,:,4:6)); %#ok -h6 = uitoggletool('ToolTip','Rotate Branch Mode',... - 'Tag','Rotate Branch', 'CData',icons(:,:,7:9)); %#ok -h7 = uitoggletool('ToolTip','Rename Leaf/Branch Mode',... - 'Tag','Rename', 'CData',icons(:,:,10:12)); %#ok -h8 = uitoggletool('ToolTip','Prune (delete) Leaf/Branch Mode',... - 'Tag','Prune', 'CData',icons(:,:,13:15)); %#ok -set([h4 h5 h6 h7 h8],'clickedCallback',@changeEditMode,'state','off',... - 'Serializable','off','HandleVisibility','off'); -hToggleToolbar = [h4 h5 h6 h7 h8 h1 h2 h3]; -set(0,'ShowHiddenHandles',oldSH) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function hToggleUIMenu = makePhyTreeViewerUIMenus(fig) -% helper function to set UI menus -% -% hToggleUIMenu contains handles to easy check on/off modes in the UI menu - -oldSH = get(0,'ShowHiddenHandles'); -set(0,'ShowHiddenHandles','on') - -% delete figure menus not used -h1 = findall(fig,'Type','uimenu', 'Label','&Edit'); -h2 = findall(fig,'Type','uimenu', 'Label','&View'); -h3 = findall(fig,'Type','uimenu', 'Label','&Insert'); -h4 = findall(fig,'Type','uimenu', 'Label','&Desktop'); -delete([h1,h2,h3,h4]) - -% Repair "File" menu -hw = findall(fig,'Type','uimenu', 'Label','&File'); -hf = get(hw,'children'); -h1 = findall(hw,'Label','Expo&rt Setup...'); -h3 = findall(hw,'Label','Print Pre&view...'); -h4 = findall(hw,'Label','&Print...'); -delete(setxor(hf,[h1,h3,h4])) -uimenu(hw,'Label','New Tool...', 'Position',1,'Callback','phytreetool') -uimenu(hw,'Label','Open...', 'Position',2,'Callback',@loadNewick) -uimenu(hw,'Label','Import from Workspace...','Position',3,'Callback',@importfromwsdlg) -uimenu(hw,'Label','Open Original in New Tool','Position',4,'Callback',@restoreTree) -uimenu(hw,'Label','Save As...', 'Position',5,'Callback',@saveNewick,'Separator','on') -item0 = uimenu(hw ,'Label','Print to Figure','Position',6); - uimenu(item0,'Label','With Hidden Nodes...','Callback',{@publishdlg,1,0}); - uimenu(item0,'Label','Only Displayed...', 'Callback',{@publishdlg,0,0}); -item1 = uimenu(hw,'Label','Export to New Tool','Position',7); - uimenu(item1,'Label','With Hidden Nodes...','Callback',{@exportSubtree,1,0,0}); - uimenu(item1,'Label','Only Displayed...', 'Callback',{@exportSubtree,0,0,0}); -item2 = uimenu(hw,'Label','Export to Workspace','Position',8); - uimenu(item2,'Label','With Hidden Nodes...','Callback',{@exportSubtree,1,1,0}); - uimenu(item2,'Label','Only Displayed...', 'Callback',{@exportSubtree,0,1,0}); -uimenu(hw,'Label','Exit','Separator','on','Position',12,'Callback',@closeNewick) -set(h1,'Separator','on') - -% Repair "Tools" menu -hw = findall(fig,'Type','uimenu','Label','&Tools'); -hf = get(hw,'children'); -h1 = findall(hw,'Tag','figMenuZoomIn'); set(h1,'Callback',{@changeEditMode,gcbo}); -h2 = findall(hw,'Tag','figMenuZoomOut'); set(h2,'Callback',{@changeEditMode,gcbo}); -h3 = findall(hw,'Tag','figMenuPan'); set(h3,'Callback',{@changeEditMode,gcbo}); -h4 = findall(hw,'Tag','figMenuResetView'); set(h4,'Callback',{@myResetView,gcbo}); -h5 = findall(hw,'Tag','figMenuOptions'); -set([h1,h4],'separator','off') -delete(setxor(hf,[h1,h2,h3,h4,h5])) -delete(findall(h5,'Tag','figMenuOptionsDatatip')) -delete(findall(h5,'Tag','figMenuOptionsDataBar')) -h6 = uimenu(hw,'Label','Inspect','Position',1,'Callback',@changeEditMode); -h7 = uimenu(hw,'Label','Collapse/Expand','Position',2,... - 'Callback',@changeEditMode); -h8 = uimenu(hw,'Label','Rotate Branch','Position',3,... - 'Callback',@changeEditMode); -h9 = uimenu(hw,'Label','Rename','Position',4, 'Callback',@changeEditMode); -h10 = uimenu(hw,'Label','Prune','Position',5, 'Callback',@changeEditMode); -item3 = uimenu(hw,'Label','Threshold Collapse','Position',9,'Separator','on'); - uimenu(item3,'Label','Distance to Leaves', 'Callback',@thresholdCut); - uimenu(item3,'Label','Distance to Root', 'Callback',@thresholdCut); - uimenu(hw,'Label','Expand All','Position',10,'Callback',@expandAll); -uimenu(hw,'Label','Find Leaf/Branch...','Position',11,'Callback',@findNode); -uimenu(hw,'Label','Fit to Window','Position',12,'Separator','on',... - 'Callback',@autoFit); -set(h1,'Separator','on') - -% Repair "Help" menu -hw = findall(fig,'Type','uimenu','Label','&Help'); -delete(get(hw,'children')); -uimenu(hw,'Label','Bioinformatics Toolbox Help','Position',1,'Callback',... - 'web([docroot ''/toolbox/bioinfo/bioinfo_product_page.html''])') -uimenu(hw,'Label','Phylogenetic Tree Tool Help','Position',2,'Callback',... - ['helpview(fullfile(docroot,''toolbox'',''bioinfo'', ''bioinfo.map'')' ... - ',''phytreetool_reference'')' ] ) -uimenu(hw,'Label','Demos','Position',3,'Separator','on',... - 'Callback','demo(''toolbox'',''bioinfo'')') -tlbx = ver('bioinfo'); -mailstr = ['web(''mailto:bioinfofeedback@mathworks.com?subject=',... - 'Feedback%20for%20Phytreetool%20in%20Bioinformatics',... - '%20Toolbox%20',tlbx(1).Version,''')']; -uimenu(hw,'Label','Send Feedback','Position',4,'Separator','on',... - 'Callback',mailstr); -set(0,'ShowHiddenHandles',oldSH) -hToggleUIMenu = [h6 h7 h8 h9 h10 h1 h2 h3]; - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function [hToggleContextMenu,hAxisContextMenu,hDotsContextMenu] = ... - makePhyTreeViewerContextMenus(fig) %#ok -% helper function to set context menus -% -% hToggleContextMenu contains handles to easy check on/off modes in the -% Context Menu -% hAxisContextMenu contains handle to the Axis Context Menu to used to -% reactivate it -% hDotsContextMenu contains handle to the Dots Context Menu to used to -% reactivate it - -% set up context menu for the axes (when mouse is not over a node) -hcm1 = uicontextmenu('Callback',@hideActiveIndicators); - -h1 = uimenu(hcm1,'Label','Inspect', 'Callback',@changeEditMode); -h2 = uimenu(hcm1,'Label','Collapse/Expand','Callback',@changeEditMode); -h3 = uimenu(hcm1,'Label','Rotate Branch', 'Callback',@changeEditMode); -h4 = uimenu(hcm1,'Label','Rename', 'Callback',@changeEditMode); -h5 = uimenu(hcm1,'Label','Prune', 'Callback',@changeEditMode); -item4 = uimenu(hcm1 , 'Label', 'Threshold Collapse','Separator','on'); - uimenu(item4,'Label','Distance to Leaves', 'Callback',@thresholdCut); - uimenu(item4,'Label','Distance to Root', 'Callback',@thresholdCut); - uimenu(hcm1,'Label','Expand All','Callback',@expandAll); -uimenu(hcm1 ,'Label','Find Leaf/Branch...', 'Callback',@findNode); -uimenu(hcm1 ,'Label','Fit to Window','Separator','on','Callback',@autoFit); -uimenu(hcm1 ,'Label','Reset to Original View', 'Callback',@myResetView); - -% context menu for dots (when mouse over a dot and right mouse button pressed) -hcm2 = uicontextmenu('Callback',@enableOptionsContextMenu); - uimenu(hcm2,'Label','Collapse/Expand', 'Callback',@collapseExpand); - uimenu(hcm2,'Label','Rotate Branch', 'Callback',@rotateBranch); - uimenu(hcm2,'Label','Rename', 'Callback',@renameNode); - uimenu(hcm2,'Label','Prune', 'Callback',@pruneTree); -item0 = uimenu(hcm2,'Label','Print to Figure','Separator','on'); - uimenu(item0,'Label','With Hidden Nodes...','Callback',{@publishdlg,1,1}); - uimenu(item0,'Label','Only Displayed...', 'Callback',{@publishdlg,0,1}); -item1 = uimenu(hcm2,'Label','Export to New Tool'); - uimenu(item1,'Label','With Hidden Nodes...','Callback',{@exportSubtree,1,0,1}); - uimenu(item1,'Label','Only Displayed...', 'Callback',{@exportSubtree,0,0,1}); -item2 = uimenu(hcm2,'Label','Export to Workspace'); - uimenu(item2,'Label','With Hidden Nodes...','Callback',{@exportSubtree,1,1,1}); - uimenu(item2,'Label','Only Displayed...', 'Callback',{@exportSubtree,0,1,1}); - %uimenu(hcm2,'Label','Color Down', 'Callback',@colorDown); - -% save context menus in my data structure to later restore them if desired -hToggleContextMenu = [h1 h2 h3 h4 h5]; -hAxisContextMenu = hcm1; -hDotsContextMenu = hcm2; - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function enableOptionsContextMenu(h,varargin) -hideActiveIndicators -selectNode(h,varargin) -tr = get(gcbf,'Userdata'); -hc = get(tr.hDotsContextMenu,'Children'); -set(hc,'Enable','on') -if sum(tr.selected)~=1 - set(hc(5),'Enable','off'); -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function setupYLabelsListeners -% helper function to setsup listeners for the ylables, so we can detect if -% we would need to change the fontsize -hgp = findpackage('hg'); -axesC = findclass(hgp,'axes'); -figureC = findclass(hgp,'figure'); -% listens when the Ylim of axes has changed -YLimListener = handle.listener(gca,axesC.findprop('YLim'),... - 'PropertyPostSet',{@ylabelsListener,gcf,gca}); -% listens when Position of Figure has changed -PositionListener = handle.listener(gcf,figureC.findprop('Position'),... - 'PropertyPostSet',{@ylabelsListener,gcf,gca}); -% store the listeners -setappdata(gcf,'PhyTreeListeners',[YLimListener, PositionListener]); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function publishdlg(h,varargin,AllNodes,callerIsContextMenu) %#ok -% dialog window to select options for publishing -vfig = gcbf; -c = get(0,'ScreenSize')*[1 0;0 1;.5 0;0 .5]; -fig = figure('WindowStyle','modal','Color',[0.831373 0.815686 0.784314],... - 'Position',[c-[150 100] 300 200],'Resize','off','NumberTitle','off',... - 'Name','Print Phylogenetic Tree to Figure','IntegerHandle','off' ); -h1=uibuttongroup;h2=uibuttongroup; -set(h1,'Position',[.08 .35 .35 .55],'Title','Rendering Type','backgroundcolor',[0.831373 0.815686 0.784314]) -set(h2,'Position',[.52 .35 .39 .55],'Title','Display Labels','backgroundcolor',[0.831373 0.815686 0.784314]) -ui1=uicontrol(h1,'style','radiobutton','Position',[5 70 90 20],'string','Square','value',1,'backgroundcolor',[0.831373 0.815686 0.784314]); -ui2=uicontrol(h1,'style','radiobutton','Position',[5 40 90 20],'string','Angular','backgroundcolor',[0.831373 0.815686 0.784314]); -ui3=uicontrol(h1,'style','radiobutton','Position',[5 10 90 20],'string','Radial','backgroundcolor',[0.831373 0.815686 0.784314]); -ui4=uicontrol(h2,'style','checkbox','Position',[5 70 109 20],'string','Branch Nodes','backgroundcolor',[0.831373 0.815686 0.784314]); -ui5=uicontrol(h2,'style','checkbox','Position',[5 40 109 20],'string','Leaf Nodes','backgroundcolor',[0.831373 0.815686 0.784314]); -ui6=uicontrol(h2,'style','checkbox','Position',[5 10 109 20],'string','Terminal Nodes','value',1,'backgroundcolor',[0.831373 0.815686 0.784314]); -uicontrol(fig,'style','pushbutton','Position',[70 20 60 30],'string','Print','Callback',{@doPublishFigure,AllNodes,callerIsContextMenu}); -uicontrol(fig,'style','pushbutton','Position',[155 20 60 30],'string','Cancel','Callback',{@doPublishFigure,AllNodes,callerIsContextMenu}); -set(fig,'Userdata',[ui1 ui2 ui3 ui4 ui5 ui6 vfig]); -set(h1,'SelectionChangeFcn',{@toggleCheckBoxs,ui3,ui6}) - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function toggleCheckBoxs(h,event,h3,h6) %#ok -if get(h3,'value') - set(h6,'enable','off') -else - set(h6,'enable','on') -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function importfromwsdlg(h,varargin) %#ok -% dialog window to select variable from workspace -mvars = evalin('base','whos'); -mvars = mvars(strmatch('phytree',{mvars(:).class})); %#ok -vfig = gcbf; -c = get(0,'ScreenSize')*[1 0;0 1;.5 0;0 .5]; -fig = figure('WindowStyle','modal','Color',[0.831373 0.815686 0.784314],... - 'Position',[c-[80 100] 160 220],'Resize','off','NumberTitle','off',... - 'Name','Get Phytree Object','IntegerHandle','off' ); -ui1=uicontrol(fig,'style','list','Position',[22 70 120 120],'string',char(mvars(:).name),'backgroundcolor','w','Callback',@loadNewick); -ui2=uicontrol(fig,'style','pushbutton','Position',[15 20 60 30],'string','Import','Callback',@loadNewick); -ui3=uicontrol(fig,'style','pushbutton','Position',[90 20 60 30],'string','Cancel','Callback',@loadNewick); -uicontrol(fig,'style','text','Position',[20 190 140 20],'string','Select phytree object:','Horizontal','left','backgroundcolor',[0.831373 0.815686 0.784314]) -set(fig,'Userdata',[ui1 ui2 ui3 vfig]); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function n = getphytreetoolnumber() -% Computes the index number for this particular tool - -% first, finds the used numbers so far -allFigs = findall(0,'tag','PhyTreeTool'); -usedNumbers = zeros(1,numel(allFigs)+1); -baseName = 'Phylogenetic Tree Tool '; -baseLen = length(baseName); -for i = 1:numel(allFigs) - str = get(allFigs(i),'Name'); - usedNumbers(i) = str2double(str(baseLen:end)); -end - -% This is how we find the next index. The rule is that we find the lowest -% integer value (non-zero and positive) not yet prescribed to a phytree -% tool, This is the same way MATLAB figures behave. -n = num2str(min(setdiff(1:(max(usedNumbers)+1),usedNumbers))); - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function n = getacceptedwarningfromothertools(cfig) %#ok -% Finds out if the pruning warning has been accepted -% first, finds the used numbers so far -allFigs = findall(0,'tag','PhyTreeTool'); -n = 'NotDone'; -for i = 1:numel(allFigs) - propsForFigure = getappdata(allFigs(i),'propsForFigure'); - if isequal(propsForFigure.PruneWarning,'Done') - n = 'Done'; - end -end - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function setacceptedwarningtoothertools() -% Set the pruning warning to all the other open tools as 'Done' -allFigs = findall(0,'tag','PhyTreeTool'); -for i = 1:numel(allFigs) - propsForFigure = getappdata(allFigs(i),'propsForFigure'); - propsForFigure.PruneWarning = 'Done'; - setappdata(allFigs(i),'propsForFigure',propsForFigure); -end - diff --git a/matlab/graph/@phyTree/Weights.m b/matlab/graph/@phyTree/Weights.m deleted file mode 100644 index 70c84b6..0000000 --- a/matlab/graph/@phyTree/Weights.m +++ /dev/null @@ -1,54 +0,0 @@ -function W = weights(tr) -%WEIGHTS Tree based sequence weights. -% -% W = WEIGHTS(T) Calculates branch proportional weights for every leaf in -% the tree using the Thompson-Higgins-Gibson method. The distance of every -% segment of the tree is adjusted by dividing it by the number of leaves -% it contains. The sequence weights are the result of normalizing to the -% unity the new patristic distances between every leaf and the root. -% -% Example: -% -% % Create an ultrametric tree with specified branch distances -% bd = [1 2 3]'; -% tr_1 = phytree([1 2;3 4;5 6],bd) -% view(tr_1) -% weights(tr_1) -% -% See also MULTIALIGN, PHYTREE, PROFALIGN, SEQLINKAGE. - -% References: -% J.D. Thompson, D.G. Higgins, and T.J. Gibson. Nucleic Acids Res. (1994) -% 22(22):4673-4680. -% S.Henikoff and J. G. Henikoff. JMB. (1994) 243(4):574--578. -% -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.8.2 $Author: batserve $ $Date: 2005/06/17 20:19:24 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:weights:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% calculate the branch width -branchWidth = ones(numLabels,1); -for ind = 1:numBranches - branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:))); -end - -% adjust the distances -tr.dist = tr.dist ./ branchWidth; - -% calculate distance of every leave to root -cdist = tr.dist; -for ind = numBranches:-1:1 - cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves); -end - -W = cdist(1:numLeaves); -W = W./max(W); - diff --git a/matlab/graph/@phyTree/getByName.m b/matlab/graph/@phyTree/getByName.m deleted file mode 100644 index 4b74557..0000000 --- a/matlab/graph/@phyTree/getByName.m +++ /dev/null @@ -1,103 +0,0 @@ -function sel = getbyname(tr,query,varargin) -%GETBYNAME Selects branches and leaves by name. -% -% S = GETBYNAME(T,EXPRESSION) returns a logical vector S of size -% [NUMNODES x 1] indicating the node names of the phylogenetic tree T -% that match the regular expression EXPRESSION regardless of case. -% -% Symbols than can be used in a matching regular expression are explained -% in help REGEXP. -% -% When EXPRESSION is a cell array of strings, GETBYNAME returns a matrix -% where every column corresponds to every query in EXPRESSION. -% -% S = GETBYNAME(T,STRING,'EXACT',true) looks for exact matches only -% (ignoring case). When STRING is a cell array of strings, GETBYNAME -% returns a vector with indices. -% -% Example: -% -% % Load a phylogenetic tree created from a protein family: -% tr = phytreeread('pf00002.tree'); -% -% % Select all the 'mouse' and 'human' proteins: -% sel = getbyname(tr,{'mouse','human'}); -% view(tr,any(sel,2)); -% -% See also PHYTREE, PHYTREE/PRUNE, PHYTREE/SELECT, PHYTREE/GET. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.6.5 $ $Author: batserve $ $Date: 2005/06/09 21:55:55 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:getbyname:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -doExactMatch = false; - -if nargin > 2 - okargs = {'exact',''}; - for j=1:2:nargin-2 - pname = varargin{j}; - k = strmatch(lower(pname), okargs); %#ok - if isempty(k) - error('Bioinfo:phytree:getbyname:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:getbyname:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 - if nargin == 3 - doExactMatch = true; - else - doExactMatch = opttf(varargin{j+1}); - if isempty(doExactMatch) - error('Bioinfo:phytree:getbyname:InputOptionNotLogical',... - '%s must be a logical value, true or false.',... - upper(char(okargs(k)))); - end - end - end %switch - end %if - end %for -end %if - - -numLabels = numel(tr.names); -if iscell(query) - if doExactMatch - sel = zeros(numLabels,1); - else - sel = false(numLabels,numel(query)); - end - for ind = 1:numel(query) - if doExactMatch - sel(strcmpi(query{ind},tr.names)) = ind; - else - try - regexpiOutput = regexpi(tr(:).names,query{ind}); - catch - error('Bioinfo:phytree:getbyname:IncorrectRegularExpression',... - ['The query expression produced the following error in ' ... - 'REGEXPI: \n%s'],lasterr); - end - sel(:,ind) = ~cellfun('isempty',regexpiOutput); - end - end -else % must be a single string of chars - if doExactMatch - sel = strcmpi(query,tr.names); - else - try - regexpiOutput = regexpi(tr(:).names,query); - catch - error('Bioinfo:phytree:getbyname:IncorrectRegularExpression',... - ['The query expression produced the following error in ' ... - 'REGEXPI: \n%s'],lasterr); - end - sel = ~cellfun('isempty',regexpiOutput); - end -end diff --git a/matlab/graph/@phyTree/getCanonical.m b/matlab/graph/@phyTree/getCanonical.m deleted file mode 100644 index 6b0db81..0000000 --- a/matlab/graph/@phyTree/getCanonical.m +++ /dev/null @@ -1,65 +0,0 @@ -function [ptrs,dist,names] = getcanonical(tr) -%GETCANONICAL Calculates the canonical form of a phylogenetic tree. -% -% PTRS = GETCANONICAL(TREE) Returns the pointers of the canonical form of -% a phylogenetic tree. In a canonical tree the leaves are ordered -% alphabetically and the branches are ordered first by their width and -% then alphabetically by their first element. A canonical tree is -% isomorphic to all the trees with the same skeleton independently of the -% order of their leaves and branches. -% -% [PTRS,DIST,NAMES] = GETCANONICAL(TREE) Returns also the re-ordered -% distances and node names. -% -% Example: -% % create two trees with same skeleton but slightly different distances -% b = [1 2; 3 4; 5 6; 7 8;9 10]; -% tr_1 = phytree(b,[.1 .2 .3 .3 .4 ]'); -% tr_2 = phytree(b,[.2 .1 .2 .3 .4 ]'); -% plot(tr_1) -% plot(tr_2) -% -% % compare if the two trees are isomorphic -% isequal(getcanonical(tr_1),getcanonical(tr_2)) -% -% See also PHYTREE, PHYTREEREAD, PHYTREE/GETBYNAME, PHYTREE/SELECT, -% PHYTREE/SUBTREE. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.8.2 $ $Author: batserve $ $Date: 2006/06/16 20:06:42 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:getcanonical:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -[dummy,h] = sort(tr.names(1:numLeaves)); %#ok -h(h)=1:numLeaves; - -% compute the branch width and the first element for each one -branchWidth = ones(numLabels,1); -firstElement = [h;inf(numBranches,1)]; -for ind = 1:numBranches - branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:))); - firstElement(numLeaves+ind) = min(firstElement(tr.tree(ind,:))); -end - -% find out how to re-order -[dummy,ord]=sortrows([branchWidth firstElement]); %#ok -iord(ord) = 1:numLabels; - -% re-order pointers -ptrs = sort(iord(tr.tree(ord(numLeaves+1:numLabels)-numLeaves,:)),2); - -if nargout > 1 - dist = tr.dist(ord); -end -if nargout > 2 - names = tr.names(ord); -end - - diff --git a/matlab/graph/@phyTree/getMatrix.m b/matlab/graph/@phyTree/getMatrix.m deleted file mode 100644 index 39a02ec..0000000 --- a/matlab/graph/@phyTree/getMatrix.m +++ /dev/null @@ -1,62 +0,0 @@ -function [cm lab dist] = getmatrix(tr,varargin) -%GETMATRIX converts a Phytree Object into a relationship matrix. -% -% [MATRIX, ID, DISTANCES] = GETMATRIX(T) converts the phylogenetic tree -% object T into a logical sparse matrix, where 1's indicate that a branch -% node (row index) is connected to its child (column index). The child -% can be either another branch node or a leaf node. ID is a list of the -% labels that correspond to the rows and columns of MATRIX, first the -% leaf nodes from 1 to NUMLEAVES, then the branch nodes from NUMLEAVES+1 -% to NUMLEAVES+NUMBRANCHES, being the root the last node. DISTANCES is -% a column vector with one entry for every nonzero entry in MATRIX -% traversed columnwise and representing the distance between the branch -% node and the child. -% -% Example: -% -% T = phytreeread('pf00002.tree') -% [MATRIX ID DIST] = getmatrix(T); -% -% See also PHYTREE, PHYTREE/GET, PHYTREE/PDIST, PHYTREE/PRUNE, PHYTREETOOL. - -% Copyright 2006 The MathWorks, Inc. -% $Revision: 1.1.6.1 $ $Date: 2006/06/16 20:06:43 $ - - - %%% check arguments -if nargin > 1 - if rem(nargin,2) == 0 - error('Bioinfo:phytree:getmatrix:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'no_input_arguments'}; - for j=1:2:nargin-2 - pname = varargin{j}; - pval = varargin{j+1}; - if(isstruct(pval)) - error('Bioinfo:phytree:getmatrix:StructParamError',... - 'parameter cannot be a struct'); - end - k = find(strncmpi(pname, okargs,length(pname))); - if isempty(k) - error('Bioinfo:phytree:getmatrix:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:getmatrix:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - end - end - end -end - -numLeaves = size(tr.tree,1)+1; -numNodes = numLeaves + numLeaves -1; -cm = sparse(repmat(numLeaves+1:numNodes,1,2),tr.tree(:),true,numNodes,numNodes); -if nargout>1 - lab = tr.names; -end -if nargout>2 - dist = tr.dist; -end diff --git a/matlab/graph/@phyTree/getNewickStr.m b/matlab/graph/@phyTree/getNewickStr.m deleted file mode 100644 index 797f933..0000000 --- a/matlab/graph/@phyTree/getNewickStr.m +++ /dev/null @@ -1,133 +0,0 @@ -function str = getnewickstr(tr,varargin) -%GETNEWICKSTR creates a NEWICK formatted string. -% -% STR = GETNEWICKSTR(TREE) returns the NEWICK formatted string of the -% phylogenetic tree object TREE. -% -% GETNEWICKSTR(...,'DISTANCES',false) excludes the distances from the -% output. Default is true. -% -% GETNEWICKSTR(...,'BRANCHNAMES',true) includes the branch names into the -% output. Default is false. -% -% The NEWICK tree format is found at: -% http://evolution.genetics.washington.edu/phylip/newicktree.html -% -% Example: -% -% seqs = int2nt(ceil(rand(10)*4)); % some random sequences -% dist = seqpdist(seqs,'alpha','nt'); % pairwise distances -% tree = seqlinkage(dist); % construct phylogenetic tree -% str = getnewickstr(tree) % get the NEWICK string -% -% See also PHYTREE, PHYTREEREAD, PHYTREEWRITE, PHYTREETOOL, SEQLINKAGE, -% PHYTREE/GET, PHYTREE/GETBYNAME, PHYTREE/GETCANONICAL. - -% Undocumented: -% GETSTR(...,'MULTILINE',true) introduces 'new line' characters for a -% multi-line output. This option is used by PHYTREEWRITE. Default is -% false. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:55:57 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:getstr:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% set defaults -writeDistances = true; -writeBranchNames = false; -multiLine = false; - -nvarargin = numel(varargin); -if nvarargin - if rem(nvarargin,2) - error('Bioinfo:phytree:getstr:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'multiline','distances','branchnames'}; - for j=1:2:nvarargin - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:phytree:getstr:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % multi-lines - multiLine = opttf(pval); - if isempty(multiLine) - error('Bioinfo:phytree:getstr:multiLineOptionNotLogical',... - '%s must be a logical value, true or false.',... - upper(char(okargs(k)))); - end - case 2 % write distances - writeDistances = opttf(pval); - if isempty(writeDistances) - error('Bioinfo:phytree:getstr:writeDistancesOptionNotLogical',... - '%s must be a logical value, true or false.',... - upper(char(okargs(k)))); - end - case 3 % write branch names - writeBranchNames = opttf(pval); - if isempty(writeBranchNames) - error('Bioinfo:phytree:getstr:writeBranchNamesOptionNotLogical',... - '%s must be a logical value, true or false.',... - upper(char(okargs(k)))); - end - end - end - end -end - -for i=1:numLabels-1; - if (i<=numLeaves || writeBranchNames) - if writeDistances - namedist{i} = [tr.names{i} ':' num2str(tr.dist(i))]; %#ok - else - namedist{i} = tr.names{i}; - end - elseif writeDistances && ~writeBranchNames - namedist{i} = [':' num2str(tr.dist(i))]; %#ok - else - namedist{i} = ''; - end -end - -if writeBranchNames - namedist{numLabels} = [tr.names{numLabels} ';']; -else - namedist{numLabels} = ';'; -end - -for i=1:numBranches - if tr.tree(i,1) > numLeaves - t1 = branchstr{tr.tree(i,1)}; - else - t1 = namedist{tr.tree(i,1)}; - end - if tr.tree(i,2) > numLeaves - t2 = branchstr{tr.tree(i,2)}; - else - t2 = namedist{tr.tree(i,2)}; - end - branchstr{i+numLeaves} = ... - [ '(\n' t1 ',\n' t2 ')\n' , namedist{i+numLeaves} ]; %#ok -end - -str = sprintf(branchstr{numLabels}); - -if ~multiLine - str = strrep(str,sprintf('\n'),''); -end - diff --git a/matlab/graph/@phyTree/openVar.m b/matlab/graph/@phyTree/openVar.m deleted file mode 100644 index 47e48d6..0000000 --- a/matlab/graph/@phyTree/openVar.m +++ /dev/null @@ -1,12 +0,0 @@ -function openvar(name, tr) %#ok -%OPENVAR Opens a phylogenetic tree object for graphical editing. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.6.3 $ $Author: batserve $ $Date: 2006/06/16 20:06:44 $ - -try - view(tr); -catch - % rethrows the error into a dlg window - errordlg(lasterr, 'Inspection error', 'modal'); -end diff --git a/matlab/graph/@phyTree/pDist.m b/matlab/graph/@phyTree/pDist.m deleted file mode 100644 index e543872..0000000 --- a/matlab/graph/@phyTree/pDist.m +++ /dev/null @@ -1,153 +0,0 @@ -function [dist,comm] = pdist(tr,varargin) -%PDIST computes the pairwise patristic distance. -% -% D = PDIST(TREE) returns a vector D containing the patristic -% distances between every possible pair of leaf nodes in the phylogenic -% tree object TREE. The distance is computed following the path through -% the branches of the tree. -% -% The output vector D is arranged in the order of ((2,1),(3,1),..., -% (M,1),(3,2),...(M,3),.....(M,M-1)), i.e. the lower left triangle of the -% full M-by-M distance matrix. To get the distance between the Ith and -% Jth nodes (I > J) use the formula D((J-1)*(M-J/2)+I-J). M is the -% number of leaves) -% -% D = PDIST(...,'NODES',N) indicates the nodes to be included in the -% computation. N can be 'leaves' (default) or 'all'. In the former -% case the output will be order as before, but M is the total number of -% nodes in the tree, i.e. NUMLEAVES+NUMBRANCHES. -% -% D = PDIST(...,'SQUAREFORM',true) coverts the output into a square -% format, so that D(I,J) denotes the distance between the Ith and the Jth -% node. The output matrix is symmetric and has a zero diagonal. -% -% D = PDIST(...,'CRITERIA',C) changes the criteria used to relate pairs. -% C can be 'distance' (default) or 'levels'. -% -% [D,C] = PDIST(TREE) returns in C the index of the closest common parent -% nodes for every possible pair of query nodes. -% -% Example: -% -% % get the tree distances between every leaf -% tr = phytreeread('pf00002.tree') -% dist = pdist(tr,'nodes','leaves','squareform',true) -% -% See also SEQPDIST, SEQLINKAGE, PHYTREE, PHYTREETOOL. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.6.6 $ $Author: batserve $ $Date: 2005/06/09 21:55:59 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:pdist:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -% set default -squaredOutput = false; -CriteriaIsLevels = false; -outNodes = 'leaves'; - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -% process input arguments -if nargin > 1 - if rem(nargin,2) == 0 - error('Bioinfo:phytree:pdist:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'nodes','squareform','criteria'}; - for ind = 2 : 2: nargin - pname = varargin{ind-1}; - pval = varargin{ind}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:phytree:pdist:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:phytree:pdist:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % nodes - okNodes = {'leaves','all','branches'}; - h = strmatch(lower(pval),okNodes ); %#ok - if isempty(h) - error('Bioinfo:phytree:pdist:IncorrectReferenceNode',... - 'Incorrect node selection') - else - outNodes = okNodes{h}; - end - case 2 % squareform - squaredOutput = (pval == true); - case 3 % criteria - h = strmatch(lower(pval),{'levels','distance'}); %#ok - if numel(h) - CriteriaIsLevels = (h == 1); - else error('Bioinfo:phytree:pdist:InvalidCriteria',... - 'Invalid string for criteria.'); - end - end - end - end -end - - -% create indexes to work only the lower leff triangle -m = numLabels*(numLabels-1)/2; -p = cumsum([1 (numLabels-1):-1:2]); -I = ones(m,1); I(p) = [2 3-numLabels:0]; -J = zeros(m,1); J(p) = 1; -H = I; H(p) = 2:numLabels; -I = cumsum(I); J = cumsum(J); H = cumsum(H); - -switch outNodes - case 'leaves' - outSelection = (I <= numLeaves) & (J <= numLeaves); - case 'all' - outSelection = (I>0); - case 'branches' - outSelection = (I > numLeaves) & (J > numLeaves); -end -% find closest common branch for every pair of nodes -% diagonal is invalid ! but not needed - -% initializing full matrix -commf = zeros(numLabels,'int16'); -children = false(1,numLabels); -for ind = numBranches:-1:1 - children(:) = false; - children(ind+numLeaves) = true; - for ind2 = ind:-1:1 - if children(ind2+numLeaves) - children(tr.tree(ind2,:))=true; - end - end - commf(children,children)=int16(ind); -end - -% output vector with the lower leff triangle closest common branches -comm = double(commf(H)); - -% compute the distance to root for every node -cdist = tr.dist; -if CriteriaIsLevels % set to count levels instead - cdist(:) = 1; - cdist(end) = 0; -end -for ind = numBranches:-1:1 - cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves); -end - -% compute pairwise distance -dist = cdist(I)+cdist(J)-2*cdist(comm+numLeaves); - -dist = dist(outSelection); -comm = comm(outSelection); - -if squaredOutput - dist = squareform(dist); - comm = squareform(comm); -end diff --git a/matlab/graph/@phyTree/phyTree.m b/matlab/graph/@phyTree/phyTree.m deleted file mode 100644 index bf7f9bc..0000000 --- a/matlab/graph/@phyTree/phyTree.m +++ /dev/null @@ -1,235 +0,0 @@ -function tr = phyTree(varargin) -%PHYTREE Phylogenetic tree object. -% -% TREE = PHYTREE(B) creates an ultrametric phylogenetic tree object. B is -% a numeric array of size [NUMBRANCHES X 2] in which every row represents -% a branch of the tree and it contains two pointers to the branches -% or leaves nodes which are its children. Leaf nodes are numbered from 1 -% to NUMLEAVES and branch nodes are numbered from NUMLEAVES + 1 to -% NUMLEAVES + NUMBRANCHES. Note that since only binary trees are allowed, -% then NUMLEAVES = NUMBRANCHES + 1. Branches are defined in chronological -% order, i.e. B(i,:) > NUMLEAVES + i. As a consequence, the first row can -% only have pointers to leaves and the last row must represent the 'root' -% branch. Parent-child distances are set to the unit or by the ultrametric -% condition if child is a leaf. -% -% TREE = PHYTREE(B,D) creates an additive phylogenetic tree object with -% branch distances defined by D. D is a numeric array of size [NUMNODES X -% 1] with the distances of every child node (leaf or branch) to its parent -% branch. NUMNODES = NUMLEAVES + NUMBRANCHES. D(end), the distance -% associated to the root node, is meaningless. -% -% TREE = PHYTREE(B,C) creates an ultrametric phylogenetic tree object with -% branch distances defined by C. C is a numeric array of size [NUMBRANCHES -% X 1] with the coordinates of every branch node. In ultrametric tress all -% the leaves are at the same location (i.e. same distance to the root). -% -% TREE = PHYTREE(BC) creates an ultrametric phylogenetic binary tree -% object with branch pointers in BC(:,[1 2]) and branch coordinates in -% BC(:,3). Same as PHYTREE(B,C). -% -% TREE = PHYTREE(...,N) specifies the names for the leaves and/or the -% branches. N is a cell of strings. If NUMEL(N)==NUMLEAVES then the names -% are assigned chronologically to the leaves. If NUMEL(N)==NUMBRANCHES the -% names are assigned to the branch nodes. If NUMEL(N)==NUMLEAVES + -% NUMBRANCHES all the nodes are named. Unassigned names default to 'Leaf -% #' and/or 'Branch #' as required. -% -% TREE = PHYTREE creates an empty phylogenectic tree object -% -% Example: -% -% % create an ultrametric tree -% b = [1 2; 3 4; 5 6; 7 8;9 10]; -% t = phytree(b); -% view(t) -% -% % create an ultrametric tree with specified branch distances -% bd = [.1 .2 .3 .3 .4 ]'; -% b = [1 2; 3 4; 5 6; 7 8;9 10]; -% t = phytree(b,bd); -% view(t) -% -% See also PHYTREE/GET, PHYTREE/SELECT, PHYTREEREAD, PHYTREETOOL, -% PHYTREEWRITE, SEQLINKAGE, SEQNEIGHJOIN, SEQPDIST. - - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.6.11.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:49 $ - -justVerifyValidity = false; - -switch nargin - case 0 - tr.tree = zeros(0,2); - tr.dist = zeros(0,1); - tr.names = {}; - case 1 - B = varargin{1}; - case 2 - B = varargin{1}; - if iscell(varargin{2}) - N = varargin{2}; - else - D = varargin{2}; - end - case 3 - B = varargin{1}; - D = varargin{2}; - N = varargin{3}; - otherwise - error('Bioinfo:phytree:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); -end - -if nargin==1 && isstruct(B) && isfield(B,'tree') && isfield(B,'names') && isfield(B,'dist') - N = B.names; - D = B.dist; - B = B.tree; - tr.tree = B; - tr.dist = D; - tr.names = N; - justVerifyValidity = true; -end - -if nargin - if isnumeric(B) - switch size(B,2) - case 2 - % ok - case 3 - D = B(:,3); - B(:,3)=[]; - otherwise - error('Bioinfo:phytree:IncorrectSize','Incorrect size for B or BC') - end - else - error('Bioinfo:phytree:IncorrectType','Incorrect type for B or BC') - end - - % test B - if sum(diff(sort(B(:)))~=1) || (min(B(:))~=1) - error('Bioinfo:phytree:IncompleteTree','Branch architecture is not complete') - end - numBranches = size(B,1); - numLeaves = numBranches + 1; - numLabels = numBranches + numLeaves; - h=all(B'>=repmat(numLeaves+1:numLabels,2,1)); - if any(h) - error('Bioinfo:phytree:NonChronologicalTree',... - ['Branch(es) not in chronological order: [' num2str(find(h)) ']']) - end - - if exist('D','var') - if ~isnumeric(D) || any(D(:)<0) || ~all(isreal(D)) || size(D,2)~=1 - error('Bioinfo:phytree:DistancesNotValid',... - 'Distances must be a column vector of real positive numbers') - end - switch size(D,1) - case numBranches - D = [zeros(numLeaves,1); D]; % add ultrametric distances of leaves - D(B) = D((numLeaves+(1:numBranches))'*[1 1])-D(B); %dist of edges - D(end) = 0; % set root at zero - case numLabels - % ok - otherwise - error('Bioinfo:phytree:DistancesNotValid',... - 'Distances must agree either with the number of branches (C) or total nodes (D)') - end - else % set defaut D - % look for parents - P = zeros(numLabels,1); - P(B) = repmat((1:numBranches)',1,2); - P(end) = numBranches; - % look at which level is every branch - L = zeros(numLabels,1); - for ind = 1:numBranches - L(ind+numLeaves) = max(L(B(ind,:))+1); - end - D = L(P+numLeaves)-L; - end - - % set default names - for ind = 1:numLeaves - names{ind}=['Leaf ' num2str(ind)]; %#ok - end - for ind = 1:numBranches - names{ind+numLeaves}=['Branch ' num2str(ind)]; - end - - if exist('N','var') - if ~iscell(N) - error('Bioinfo:phytree:NamesNotValid',... - 'Names must be supplied with a cell of strings') - end - switch numel(N) - case numLabels - h = 1:numLabels; - case numLeaves - h = 1:numLeaves; - case numBranches - h = numLeaves+1:numLabels; - otherwise - error('Bioinfo:phytree:NamesNotValid',... - 'Names must agree either with the number of branches, number of leaves, or total nodes') - end - for ind = 1:length(h); - str = N{ind}; - if ~ischar(str) - error('Bioinfo:phytree:NamesNotValid',... - 'Names must be valid strings') - end - names{h(ind)}=str; - end - % check that none of the names is empty - for ind = 1:numLabels - if isempty(names{ind}) - if ind > numLeaves - names{ind} = ['Branch ' num2str(ind-numLeaves)]; - else - names{ind} = ['Leaf ' num2str(ind)]; - end - end - end - if numel(unique(names))~=numLabels - error('Bioinfo:phytree:NamesNotUnique',... - 'Names for leaves and branches must be unique') - end - end - - % check and corrects a non-monotonic tree - monotonicWarning = false; - for ind = 1:numBranches - if any(D(B(ind,:))<0) - monotonicWarning = true; - tmp = min(D(B(ind,:))); - D(B(ind,:)) = D(B(ind,:)) - tmp; - D(numLeaves+ind) = D(numLeaves+ind) + tmp; - end - end - if monotonicWarning - warning('Bioinfo:phytree:NonMonotonicTree',... - 'Non consistent branch distances; \n Incremented branch lengths to hold a Monotonic Phylogenetic Tree') - end - - if justVerifyValidity - tr = class(tr,'phytree'); - return - end - - tr.tree = B; - tr.dist = D; - tr.names = names(:); - - % reorder such that there will be no crossings in the displayed tree - tr = prettyOrder(tr); - -end %if nargin - -% for trees of only one branch correct dimensions -% if size(tr.tree,2) <2 tr.tree = tr.tree'; end - -% Makes the tree a class -tr = class(tr,'phyTree'); - - diff --git a/matlab/graph/@phyTree/private/Opttf.m b/matlab/graph/@phyTree/private/Opttf.m deleted file mode 100644 index 07be462..0000000 --- a/matlab/graph/@phyTree/private/Opttf.m +++ /dev/null @@ -1,31 +0,0 @@ -function tf = opttf(pval) -%OPTTF determines whether input options are true or false - -% Copyright 2003-2004 The MathWorks, Inc. -% $Revision: 1.3.4.2 $ $Date: 2004/12/24 20:42:39 $ - - -if islogical(pval) - tf = all(pval); - return -end -if isnumeric(pval) - tf = all(pval~=0); - return -end -if ischar(pval) - truevals = {'true','yes','on','t'}; - k = any(strcmpi(pval,truevals)); - if k - tf = true; - return - end - falsevals = {'false','no','off','f'}; - k = any(strcmpi(pval,falsevals)); - if k - tf = false; - return - end -end -% return empty if unknown value -tf = logical([]); \ No newline at end of file diff --git a/matlab/graph/@phyTree/private/prettyOrder.m b/matlab/graph/@phyTree/private/prettyOrder.m deleted file mode 100644 index 1f06509..0000000 --- a/matlab/graph/@phyTree/private/prettyOrder.m +++ /dev/null @@ -1,39 +0,0 @@ -function tr = prettyOrder(tr) -%PRETTYORDER Reorders the leaf nodes to avoid branch crossings. -% -% T2 = PRETTYORDER(T1) Reorders the leaf nodes in the phylogenetic tree -% T1 such that the layout of the tree does not contain branch crossings. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:56:11 $ - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -L = [ones(numLeaves,1); zeros(numBranches,1)]; -for ind = 1 : numBranches - L(ind+numLeaves) = sum(L(tr.tree(ind,:))); -end -X = zeros(numLabels,1); -for ind = numBranches:-1:1 - X(tr.tree(ind,:)) = tr.dist(tr.tree(ind,:))+X(ind+numLeaves); -end -Li = zeros(1,numLabels); Ls = Li; -Ls(numLabels) = numLeaves; -for ind = numBranches:-1:1 - Ls(tr.tree(ind,:)) = Ls(ind+numLeaves); - Li(tr.tree(ind,:)) = Li(ind+numLeaves); - if diff(X(tr.tree(ind,:)))>=0 - Ls(tr.tree(ind,1)) = Li(tr.tree(ind,1)) + L(tr.tree(ind,1)); - Li(tr.tree(ind,2)) = Ls(tr.tree(ind,2)) - L(tr.tree(ind,2)); - else - Ls(tr.tree(ind,2)) = Li(tr.tree(ind,2)) + L(tr.tree(ind,2)); - Li(tr.tree(ind,1)) = Ls(tr.tree(ind,1)) - L(tr.tree(ind,1)); - end -end - -tr.names(Ls(1:numLeaves))=tr.names(1:numLeaves); -tr.dist(Ls(1:numLeaves))=tr.dist(1:numLeaves); -Ls(numLeaves+1:numLabels)=numLeaves+1:numLabels; -tr.tree = Ls(tr.tree); diff --git a/matlab/graph/@phyTree/reRoot.m b/matlab/graph/@phyTree/reRoot.m deleted file mode 100644 index e1f43f4..0000000 --- a/matlab/graph/@phyTree/reRoot.m +++ /dev/null @@ -1,206 +0,0 @@ -function tr = reRoot(tr,node,distance) -%REROOT changes the root of a phylogenetic tree. -% -% T2 = REROOT(T1) changes the root of the phylogenetic tree T1 using the -% mid-point method. The mid-point is the location where the means of -% the branch lengths of either side of the tree are equalized. The -% original root is deleted. -% -% T2 = REROOT(T1,NODE) changes the root to the branch indexed by NODE. -% The new root is placed at half the distance between NODE and its -% parent. -% -% T2 = REROOT(T1,NODE,DISTANCE) re-roots T1 by placing the new root at a -% given DISTANCE from the reference NODE towards the root of the tree. -% -% Note: The new branch in T2 representing the root is labeled as 'Root'. -% -% Example: -% -% % Create an ultrametric tree -% tr_1 = phytree([5 7;8 9;6 11; 1 2;3 4;10 12;14 16;15 17;13 18]) -% plot(tr_1,'branchlabels',true) -% -% % Place the new root at 'Branch 7' -% sel = getbyname(tr_1,'Branch 7'); -% tr_2 = reroot(tr_1,sel) -% plot(tr_2,'branchlabels',true) -% -% % The mid-point of the original tree was the root, since it was an -% % ultrametric tree -% tr_3 = reroot(tr_2) -% plot(tr_3,'branchlabels',true) -% -% See also PHYTREE, PHYTREE/GET, PHYTREE/GETBYNAME, PHYTREE/PRUNE, -% PHYTREE/SELECT, SEQNEIGHJOIN. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.8.3 $ $Author: batserve $ $Date: 2006/06/16 20:06:46 $ - -if numel(tr)~=1 - error('Bioinfo:phytree:reroot:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -if nargin == 1; - [node,distance] = midpoint(tr); -else - % validate node - if islogical(node) - if any(numel(node) == [numLabels numLeaves]) - node = find(node); - elseif numel(node) == numBranches - node = find(node) + numLeaves; - else - error('Bioinfo:phytree:reroot:IncorrectSizeInputVector',... - 'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree.'); - end - end - if ~isscalar(node) || node<1 || node>numLabels - error('Bioinfo:phytree:reroot:InvalidInputNode',... - 'Invalid value for NODE.'); - end - node = round(node); - % when no distance is given put the root at half the branch - if nargin<3 - distance = tr.dist(node)/2; - end -end - -% find parents for every tree node -parent = zeros(numLabels,1); -parent(tr.tree) = repmat(numLeaves+1:numLabels,1,2); - -% validate distance, if necessary shift the origin node -if ~isscalar(distance) || distance<0 - error('Bioinfo:phytree:reroot:InvalidInputDistance',... - 'Invalid value for DISTANCE.'); -end - -% validate distance, if necessary shift the origin node -while (tr.dist(node)<=distance) && (node ~= numLabels) - distance = distance - tr.dist(node); - node = parent(node); -end - -if node == numLabels - if distance>0 - warning('Bioinfo:phytree:reroot:BeyondRoot',... - 'Distance goes beyond the root, tree unchanged.') - else - tr.names{end} = 'Root'; - end - return -end - -% check if we just need to move the branches of current root -if parent(node) == numLabels - tr.dist(setxor(tr.tree(end,:),node)) = ... - sum(tr.dist(tr.tree(end,:))) - distance; - tr.dist(node) = distance; - tr.names{end} = 'Root'; - return -end - -% path to root from node and bros for every point in the path -path2r = false(numBranches,1); -pathBros = []; -me = node; -par = parent(node); -while par - pathBros = [pathBros;setxor(tr.tree(par-numLeaves,:),me)]; - path2r(par-numLeaves) = true; - me = par; - par = parent(par); -end -path2rInd=find(path2r)+numLeaves; - -% new tree pointers -tr.tree = [tr.tree(~path2r,:);... - [[pathBros(end-1:-1:1);node],[pathBros(end);path2rInd(end-1:-1:1)]]]; - -% swapping distances in the nodes that belong to the path2r -tr.dist(pathBros(end)) = tr.dist(pathBros(end)) + tr.dist(path2rInd(end-1)); -tr.dist(path2rInd(2:end-1)) = tr.dist(path2rInd(1:end-2)); -tr.dist(parent(node)) = tr.dist(node) - distance; -tr.dist(node) = distance; - -% some branches changed positions, need to tree -permuta = [(1:numLeaves)';find(~path2r)+numLeaves;... - path2rInd(end-1:-1:1);numLabels]; -ipermuta(permuta)=1:numLabels; -tr.tree = ipermuta(tr.tree); -tr.dist = tr.dist(permuta); -tr.names = tr.names(permuta); -tr.names{end} = 'Root'; - -% re-order leaves for no branch crossings -tr = prettyorder(tr); - -%-% ---------------------------------------------------------------- -% Selects the point where the mean of the branch length is equalized -function [node,distance] = midpoint(tr) - -numBranches = size(tr.tree,1); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - - -branchWidth = ones(numLabels,1); -downDist = zeros(numLabels,1); -upDist = zeros(numLabels,1); -% cumulative distance and width downwards the tree -for ind = 1:numBranches - branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:))); - downDist(numLeaves+ind) = sum(downDist(tr.tree(ind,:)) + ... - tr.dist(tr.tree(ind,:)).*branchWidth(tr.tree(ind,:))); -end -% backpropagate distances -for ind = numBranches:-1:1 - upDist(tr.tree(ind,:)) = downDist(tr.tree(ind,[2 1])) +... - upDist(ind+numLeaves) + ... - tr.dist(ind+numLeaves).*(numLeaves-branchWidth(ind+numLeaves)) + ... - tr.dist(tr.tree(ind,[2 1])).*(branchWidth(tr.tree(ind,[2 1]))); -end - -% find all possible midpoints, solve this eq for every edge -% ud/Nu + (x)*e = dd/Nd + (1-x)*e -% ud = cumulative upwards distances -% dd = cumulative downwards distances -% Nu = number of leaves in the upper braches -% Nd = number of leaves in the lower braches -% e = distance of current edge - -h = tr.dist~=0; % root can not be in an edge which length is zero -h(numLabels) = false; % the route can not be segmented -x = inf(numLabels,1); -x(h) = (upDist(h)./branchWidth(h) - ... - downDist(h)./(numLeaves-branchWidth(h)))./tr.dist(h)/2 + 1/2; - -x(h) = (upDist(h)./(numLeaves-branchWidth(h)) - ... - downDist(h)./(branchWidth(h)))./tr.dist(h)/2 + 1/2; - -% find all possible roots -h = find(x>=0 & x<=1); -if isempty(h) - [dummy,h] = min(abs(x-1/2)); %#ok -end -% pick the most balanced one -[d,g]=min(abs(branchWidth(h)*2-numLeaves)); %#ok - -node = h(g); -ratio = min(max(x(h(g)),0),1); - -% if ratio is 1 then better pick the parent -if ratio == 1 - [node,dummy] = find(tr.tree==node); %#ok - node = node + numLeaves; - ratio = 0; -end - -% change the ratio (x) to the distance to the selected node -distance = ratio .* tr.dist(node); - diff --git a/matlab/graph/allfreqsnew2.m b/matlab/graph/allfreqsnew2.m deleted file mode 100644 index 6126e8a..0000000 --- a/matlab/graph/allfreqsnew2.m +++ /dev/null @@ -1,90 +0,0 @@ -function [counts,noalle_est,prior,adjprior,rawalleles] = allfreqsnew2(rawdata, noalle_est) -% Filename: allfreqsnew2.m -% [counts,noalle,prior,adjprior,rawalleles] = allfreqsnew(rawdata) -% -% Description: -% rawdata has n rows (2 x #individuals) and n(l) first -% colums are the loci. The last column is the subpopindex -% prior is a created matrix of positive Dirichlet hyperparameters -% missing data is filtered out -% !!!NEW!!!zeros are accepted as allele codes and any negative numbers as missing data. - -% Modified by: Jing Tang -SCALE = 1; -dime=size(rawdata); -noalle=zeros(dime(2)-1,1); -rawalleles=cell(1,dime(2)-1); -for i=1:dime(2)-1 - noalle(i)=length(unique(rawdata(:,i))); -end -for i=1:dime(2)-1 - if length(find(rawdata(:,i)<=0))>0 - noalle(i)=noalle(i)-1; - end -end - -% Fomulate the raw data such that the value i in a entry denotes the ith -% alleles. -for i=1:dime(2)-1 - rawalles=unique(rawdata(:,i)); - % rawalles = [1:noalle(i)]'; - if rawalles(1)<=0 - rawalles(1)=-999; - end - rawalleles{i} = rawalles; %rawalleles!!! - if rawalles(1)<0 - for j=2:noalle(i)+1 - %rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - end - else - for j=1:noalle(i) - % rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - end - end -end - -% ALLOWED_MEMORY = 50; % in unit of megabyte. -% n1 = max(noalle_est); -% n2 = dime(2)-1; -% n3 = double(max(rawdata(:,dime(2)))); -% ncells = n1*n2*n3; -% memory_used = ncells/(1024*1024); % using uint8 format. -% if memory_used < ALLOWED_MEMORY -% counts=zeros(n1,n2,n3,'uint8'); -% else -% nbatches = ceil(memory_used/ALLOWED_MEMORY); -% n3_in_batch = ceil(n3/nbatches); -% counts = cell(nbatches,1); -% for i=1:nbatches-1 -% % counts = cat(3,counts,uint16(zeros(n1,n2,n3_in_batch))); -% counts{i} = zeros(n1,n2,n3_in_batch,'uint8'); -% end -% % counts = cat(3, counts, uint16(zeros(n1,n2,n3-n3_in_batch*(nbatches-1)))); -% counts{i} = zeros(n1,n2,n3-n3_in_batch*(nbatches-1),'uint8'); -% end - - - -counts = zeros(max(noalle_est),dime(2)-1,max(rawdata(:,dime(2))),'uint8'); -for i=1:dime(1) - for j=1:dime(2)-1 - if rawdata(i,j)>0 - counts(rawdata(i,j),j,rawdata(i,dime(2)))=... - counts(rawdata(i,j),j,rawdata(i,dime(2)))+1; - end - end -end - -maxnoalle = max(noalle_est); -% prior = []; -prior=zeros(maxnoalle,dime(2)-1); -for i=1:dime(2)-1 - prior(:,i) = [SCALE*ones(noalle_est(i),1)/noalle_est(i);zeros(maxnoalle-noalle_est(i),1)]; -end - -adjprior=prior; -for i=1:dime(2)-1 - adjprior(:,i)=adjprior(:,i)+[zeros(noalle_est(i),1);ones(maxnoalle-noalle_est(i),1)]; -end diff --git a/matlab/graph/chooseDistance.m b/matlab/graph/chooseDistance.m deleted file mode 100644 index 7de4105..0000000 --- a/matlab/graph/chooseDistance.m +++ /dev/null @@ -1,242 +0,0 @@ -function [D]=chooseDistance(aln) -% CHOOSEDISTANCE Display a quest dialog for choosing a genetic distance -% input: a structure containing part of variables from the mixture result. -% ouput: a symmetric distance matrix - -if (isempty(aln)) - error('Need input.'); -end - -if aln.npops == 1 - disp('*** ERROR: no population structure is found.'); - return -end - -ButtonName=questdlg('Choose a type of genetic distance?', ... - 'Select genetic distance', ... - 'KL','Nei','Hamming','KL'); - -switch ButtonName, - case 'KL' - disp('Using KL distance.'); - D = dn_kl(aln); - case 'Nei' - disp('Using Nei distance.'); - D = dn_nei(aln); - case 'Hamming' - disp('Using Hamming distance. Please wait...'); - D = dn_hamming(aln); -% case 'LogDet' -% disp('Using LogDet distance'); -% D= dn_logdet(aln); - otherwise - D = []; -end - -D = D + D'; % make it symmetric -%-------------------------------------------------------------------------- -% SUBFUNCTIONS -%-------------------------------------------------------------------------- -function [dist_mat] = dn_kl(aln) -npops = aln.npops; -COUNTS = aln.COUNTS; -adjprior = aln.adjprior; -% data = noIndex(aln.data, aln.noalle); -% partition = aln.partition; - -dist_mat = zeros(npops, npops); -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); - -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yhtä alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end - -for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - dist_mat(pop1,pop2) = div; - end -end - -%-------------------------------------------------------------------------- -function [dist_mat] = dn_nei(aln) - -npops = aln.npops; -COUNTS = aln.COUNTS; -% adjprior = aln.adjprior; -% data = aln.data; -% partition = aln.partition; - -dist_mat = zeros(npops, npops); -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); - -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div1 = sum(sum(dist1.*dist2)); - div2 = sqrt(sum(sum(dist1.^2)))*sqrt(sum(sum(dist2.^2))); - div = -log(div1/div2); - dist_mat(pop1,pop2) = div; - end -end - -%-------------------------------------------------------------------------- -function [dist_mat] = dn_hamming(aln) - -npops = aln.npops; -data = noIndex(aln.data, aln.noalle); -partition = aln.partition; -dist_mat = zeros(npops, npops); -for pop1 = 1:npops - for pop2 = 1:pop1-1 - dist_mat(pop1,pop2) = hamming_dist(data(logical(partition==pop1),[1:end-1]),... - data(logical(partition==pop2),[1:end-1])); - end -end - -function dist = hamming_dist(data1,data2) -[length1,nloci] = size(data1); -length2 = size(data2,1); -dist1 = 0; -for i = 1:length1 - dist2 = 0; - for j = 1:length2 - dist2 = dist2 + sum(data1(i,:)~=data2(j,:))/nloci; - end - dist1 = dist1 + dist2/length2; -end -dist = dist1/length1; - -%-------------------------------------------------------------------------- -function [D]=dn_logdet(aln) -%DN_LOGDET - Log-det (paralinear) distance -%The LogDet model computes the distance from the determinant of the matrix of -%co-occurrence of nucleotides in the two species, according to the formula -% -% D = - 1/4(loge(|F|) - 1/2loge(fA1 fC1 fG1 fT1 fA2 fC2 fG2 fT2)) -% -%Where F is a matrix whose (i,j) element is the fraction of sites at which base -%i occurs in one species and base j occurs in the other. fji is the fraction of -%sites at which species i has base j. The LogDet distance cannot cope with -%ambiguity codes. It must have completely defined sequences. One limitation of -%the LogDet distance is that it may be infinite sometimes, if there are too many -%changes between certain pairs of nucleotides. This can be particularly -%noticeable with distances computed from bootstrapped sequences. -% -% Syntax: [D]=dn_logdet(aln) -% -% Inputs: -% aln - Alignment structure -% -% Outputs: -% D - Distance matrix -% -% See also: - -% Molecular Biology & Evolution Toolbox, (C) 2006 -% Author: James J. Cai -% Email: jamescai@hku.hk -% Website: http://bioinformatics.org/mbetoolbox/ -% Last revision: 5/3/2006 - -if (isstruct(aln)), - S=aln.data; -else - S=aln; -end - -[n,m] = size(S); -D = zeros(n,n); - -for i=1:n-1 - for j=i+1:n - D(i,j) = d_logdet(S(i,:), S(j,:)); - D(j,i) = D(i,j); - end -end - -%-------------------------------------------------------------------------- -function d=d_logdet(seq1, seq2) -[S,gap] = countntchange(seq1, seq2); -% S=count_ntchanges(seq1, seq2); -if (det(S)<=0) - d=inf; -else - f1=sum(S); - f2=sum(S'); - d=(-1/4)*(log(det(S))-(1/2)*log( prod(f1)*prod(f2))); -end - -%-------------------------------------------------------------------------- -function [D,gap]=countntchange(s1,s2) -%COUNTNTCHANGE - Count nucleotide changes in two DNA sequences -%D is a 4x4 array, with bases in seq1 along top, seq2 along side, -%in order A,C,G,T. -% -% Syntax: [D,gap]=countntchange(s1,s2) -% -% Inputs: -% s1 - Sequence 1 vector -% s2 - Sequence 2 vector -% -% Outputs: -% D - Codon Adaptation Index value -% gap - Codon Adaptation Index value -% -% -% See also: COUNTAACHANGE COUNTCDCHANGE - -% Molecular Biology & Evolution Toolbox, (C) 2006 -% Author: James J. Cai -% Email: jamescai@hku.hk -% Website: http://bioinformatics.org/mbetoolbox/ -% Last revision: 3/28/2006 - -if (nargout>1) - [D,gap]=countchange(s1,s2,4); -else - [D]=countchange(s1,s2,4); -end - -% ------------------------------------------------------------------------- -function [D,gap] = countchange(s1,s2,nword) - -if ~(ismember(nword, [4 20 61])), - error('Wrong NWORD') -end -if (length(s1)~=length(s2)), - error('Sequences are not of same length.') -end - -D=zeros(nword); -WORD=1:nword; - -for j=1:nword - s1sites=(s1==WORD(j)); - for i=1:nword - D(i,j)=sum(s1sites & (s2==WORD(i))); - end -end - -if (nargout>1), - gap = length(s1)-sum(D(:)); % gaps -end - diff --git a/matlab/graph/double2labels.m b/matlab/graph/double2labels.m deleted file mode 100644 index eeba8c4..0000000 --- a/matlab/graph/double2labels.m +++ /dev/null @@ -1,15 +0,0 @@ -function labels = double2labels(B) -% DOUBLE2LABELS Transforms a double matrix into a cell array of strings. - -[n m] = size(B); -labels = cell(n, m); - -for i = 1:n - for j = 1:m - if B(i, j) == 0 - labels{i, j} = ''; - else - labels{i, j} = sprintf('%.2g', B(i, j)); - end - end -end diff --git a/matlab/graph/giveColors.m b/matlab/graph/giveColors.m deleted file mode 100644 index 44b165d..0000000 --- a/matlab/graph/giveColors.m +++ /dev/null @@ -1,14 +0,0 @@ -function [colors] = giveColors(n) -if n > 36 - error('Maximum number of colors 36'); -end -colors = [1 0 0; 0 1 0; 0 0 1; 1 1 0 ; 1 0 1; 0 1 1; ... - 0.4 0 0; 0 0.4 0; 0 0 0.4; 0.4 0.4 0; 0.4 0 0.4; 0 0.4 0.4; ... - 0.2 0 0; 0 0.2 0; 0 0 0.2; 0.2 0.2 0; 0.2 0 0.2; 0 0.2 0.2; ... - 0.8 0 0; 0 0.8 0; 0 0 0.8; 0.8 0.8 0; 0.8 0 0.8; 0 0.8 0.8; ... - 0.6 0 0; 0 0.6 0; 0 0 0.6; 0.6 0.6 0; 0.6 0 0.6; 0 0.6 0.6; ... - 0.6 0.2 0.4; 0.2 0.4 0.8; 0.8 0.4 0.2; 0 0.6 0.2; 0.2 0.8 0.6; 0.5 0.2 0.1; ... - 0.6 0.3 0.1]; -colors = colors(1:n,:); -% red; green; blue; yellow -% RGB format: [red green blue] diff --git a/matlab/graph/graph2dot.m b/matlab/graph/graph2dot.m deleted file mode 100644 index b7a9551..0000000 --- a/matlab/graph/graph2dot.m +++ /dev/null @@ -1,139 +0,0 @@ -function graph2dot(adj, filename, varargin) -% GRAPH2DOT Writes a dot file representing an adjacency matrix. -% -% Syntax: graph2dot(adj, filename, ...) -% -% Required input arguments: -% adj - the adjacency matrix of the graph. -% filename - name for the dot file. -% -% Optional input arguments in name, value pairs [default]: -% 'nodelabels' - nodelabels{i} is a string attached to the node i ['i'] -% 'nodeshapes' - cell array of node shapes ['ellipse'] -% 'nodestyles' - cell array of node styles ['solid'] -% 'arclabels' - arclabels{i,j} is a string attached to the i-j arc [''] -% 'arcstyles' - cell array of arc styles ['solid'] -% 'width' - maximum width in inches [10] -% 'height' - maximum height in inches [10] -% 'leftright' - true for left to right layout, false for top-down [false] -% - -% This funtion is adapted from 'graph_to_dot' in GraphViz Matlab -% interface by Kevin Murphy, available from -% http://www.cs.ubc.ca/~murphyk/Software/GraphViz/graphviz.html -% -% First version written by Kevin Murphy 2002. -% Modified by Leon Peshkin, Jan 2004. -% Bugfix by Tom Minka, Mar 2004. -% Modified by Antti Kerminen, Aug 2005. -% - added handling of arc labels -% - added possibility to use bold, dashed, and red arc styles -% - added support for Octave -% - changed filename from optional to required argument -% - removed all code related to undirected arcs -% - some changes in documentation and code layout -% Modified by Antti Kerminen, Nov 2005. -% - added new input arguments: nodeshapes, nodestyles, arcstyles, -% pos -% - minor code refactoring -% Modified by Antti Kerminen, Mar 2006 -% - removed input arguments: boldarcs, dashedarcs, redarcs, pos - -% Check if we are running Octave -isoctave = exist('OCTAVE_VERSION'); - -nnodes = size(adj, 1); - -% Set default args -nodelabels = cell(nnodes, 1); -for i = 1:nnodes - nodelabels{i} = num2str(i); -end -nodeshapes = cell(nnodes, 1); -nodeshapes(:) = {'ellipse'}; -nodestyles = cell(nnodes, 1); -nodestyles(:) = {'solid'}; -arclabels = []; -arcstyles = cell(nnodes); -arccolors = cell(nnodes); -arccolors(:) = {'black'}; -width = 4*nnodes; -height = 8*nnodes; -leftright = 0; - -% Get optional args -for i = 1:2:(nargin - 2) - switch varargin{i} - case 'nodelabels', nodelabels = varargin{i + 1}; - case 'nodeshapes', nodeshapes = varargin{i + 1}; - case 'nodestyles', nodestyles = varargin{i + 1}; - case 'arclabels', arclabels = varargin{i + 1}; - case 'arcstyles', arcstyles = varargin{i + 1}; - case 'arccolors', arccolors = varargin{i + 1}; - case 'width', width = varargin{i + 1}; - case 'height', height = varargin{i + 1}; - case 'leftright', leftright = varargin{i + 1}; - case 'graphvizpath', graphvizpath = varargin{i+1}; - case 'nodecolors', nodecolors = varargin{i+1}; - end -end - -w = cd; -cd(graphvizpath); - -% Construct a format string for nodes -nodeformat = ' %d [label = "%s", shape = %s, style = %s, color = "%s", fontcolor="white"];\n'; - -% Construct a format string for edges -if isempty(arclabels) - attributes = 'style = %s, color = "%s"'; -else - attributes = 'style = %s, color = "%s", label = "%s"'; -end - -if isoctave % no need/support for brackets - edgeformat = strcat(' %d -> %d [', attributes, '];\n'); -else - edgeformat = strcat([' %d -> %d [', attributes, '];\n']); -end - -% Write the file -fid = fopen(filename, 'w'); - -fprintf(fid, 'digraph G {\n'); -fprintf(fid, ' center = 1;\n'); -fprintf(fid, ' size = \"%d, %d\";\n', width, height); -% fprintf(fid, ' ratio=\"compress\";\n'); -% fprintf(fid, ' raio = fill;\n'); -fprintf(fid, ' node[fontsize=24]'); - -if leftright - fprintf(fid, ' rankdir = LR;\n'); -end - -% Write nodes -for node = 1:nnodes - fprintf(fid, nodeformat, node, nodelabels{node}, nodeshapes{node}, ... - nodestyles{node}, nodecolors{node}); -end - -% Write edges -for node1 = 1:nnodes - arcs = find(adj(node1, :)); - for node2 = arcs - style = arcstyles{node1, node2}; - color = arccolors{node1, node2}; - if isempty(arclabels) - fprintf(fid, edgeformat, node1, node2, style, color); - else - fprintf(fid, edgeformat, node1, node2, style, color, ... - arclabels{node1, node2}); - end - end -end - -fprintf(fid, '}'); -fclose(fid); -fclose('all'); % enfore closing the file anyway - -cd(w); diff --git a/matlab/graph/graphvis2.m b/matlab/graph/graphvis2.m deleted file mode 100644 index fbda391..0000000 --- a/matlab/graph/graphvis2.m +++ /dev/null @@ -1,108 +0,0 @@ -function graphvis2(adj_mat, linkage_file_name, inliers, graphviz_path, npops) -% Filename: graphvis2.m -% graphvis2(adj_mat, linkage_file_name) -% -% Description: -% Graph visualization given the adjacency matrix. - -% Author: Jing Tang -% Modified date: 01/06/2005 - -% Input: -% adj_mat: The adjacency matrix. -% linkage_file_name: - -% Output: -% - -% Ensure adj_mat is symmetric and square. - -[n,m] = size(adj_mat); -if n ~= m, error ('Adjacency matrix must be square'), end; -%if ~all(diag(adj_mat)), error('The diagonal must be nonzero'), end; - -npops_in = length(inliers); -groupnames = cell(1,npops_in); -nodecolors = cell(npops_in,1); -allnode_color = giveColors(npops); -arccolors = cell(npops_in); -adjmat = adj_mat ~= 0; -for i=1:npops_in - groupnames{i} = sprintf('Cluster %d',inliers(i)); - nodecolors{i} = num2str(rgb2hsv(allnode_color(inliers(i),:))); - arccolors(i,adjmat(i,:)) = {nodecolors{i}}; -end -nodestyles = cell(npops_in, 1); -nodestyles(:) = {'filled'}; -arcstyles = cell(npops_in); -arcstyles(adjmat) = {'filled'}; - -handle = plotmodel(adj_mat',[1:npops_in],'graphvizpath', graphviz_path, ... - 'nodecolors', nodecolors, ... - 'nodestyles', nodestyles, ... - 'arccolors',arccolors, ... - 'arcstyles',arcstyles, ... - 'varnames', groupnames); - -% set(handle,'menubar','none','numbertitle','off','toolbar','figure'); -set(handle,'numbertitle','off','toolbar','figure'); -m = findall(gcf,'type','uimenu'); -set(m([1:7]),'Visible', 'off'); - -h1 = uimenu('Parent',handle, ... - 'Label','Attributes', ... - 'Tag','attr_menu'); -h2 = uimenu('Parent',h1, ... - 'Label','Rename clusters', ... - 'callback', 'plotflow rename', ... - 'Tag','clustername_menu'); -h3 = uimenu('Parent',h1, ... - 'Label','Prune edges', ... - 'callback','plotflow prune', ... - 'Tag','edge_menu'); -h4 = uimenu('Parent',handle, ... - 'Label','Help', ... - 'callback', 'plotflow help', ... - 'Tag','help_menu'); -% h5 = uimenu('Parent',h1, ... -% 'Callback','baps4cbf about', ... -% 'Enable','on', ... -% 'Label','About', ... -% 'Tag','about_menu'); -set(handle,'Name',[' Gene flow - ' linkage_file_name ]); - -% save the parameters -g.handle = handle; -g.adjmat = adj_mat'; -g.adjmat2 = g.adjmat; -g.k = [1:npops_in]; -g.graphvizpath = graphviz_path; -g.nodecolors = nodecolors; -g.nodestyles = nodestyles; -g.arccolors = arccolors; -g.arcstyles = arcstyles; -g.varnames = groupnames; -g.type = 'GENEFLOW'; -set(h1,'Userdata',g); % store in the attribute menu - - - -% Old version - -% Decide the coordinate of each node. -% [p,p,r] = dmperm(adj_mat); -% nnodes = length(adj_mat); -% nblocks = length(r)-1; -% [B,ix]=sort(p,2); -% Coordinates=[[ix'],zeros(nnodes,1)]; -% -% % Plot the graph structure -% clf reset -% set(gcf, 'color', 'white', 'menubar', 'none', 'numbertitle','off','name', 'Graphical model') -% gplot(adj_mat, Coordinates, '-ob'); -% h=findobj(gca, 'type','line'); -% set(h,'markersize',10) -% xlim([0 nnodes+1]); -% axis off -% text(Coordinates(:,1),Coordinates(:,2)-0.1, int2str(B(:))); -%drawnow diff --git a/matlab/graph/ksdensity_myown.m b/matlab/graph/ksdensity_myown.m deleted file mode 100644 index 49cb3ac..0000000 --- a/matlab/graph/ksdensity_myown.m +++ /dev/null @@ -1,623 +0,0 @@ -function [fout0,xout,u]=ksdensity(yData,varargin) -%KSDENSITY Compute kernel density or distribution estimate -% [F,XI]=KSDENSITY(X) computes a probability density estimate of the sample -% in the vector X. KSDENSITY evaluates the density estimate at 100 points -% covering the range of the data. F is the vector of density values and XI -% is the set of 100 points. The estimate is based on a normal kernel -% function, using a window parameter (bandwidth) that is a function of the -% number of points in X. -% -% F=KSDENSITY(X,XI) specifies the vector XI of values where the density -% estimate is to be evaluated. -% -% [F,XI,U]=KSDENSITY(...) also returns the bandwidth of the kernel smoothing -% window. -% -% KSDENSITY(...) without output arguments produces a plot of the results. -% -% KSDENSITY(AX,...) plots into axes AX instead of GCA. -% -% [...]=KSDENSITY(...,'PARAM1',val1,'PARAM2',val2,...) specifies parameter -% name/value pairs to control the density estimation. Valid parameters -% are the following: -% -% Parameter Value -% 'censoring' A logical vector of the same length of X, indicating which -% entries are censoring times (default is no censoring). -% 'kernel' The type of kernel smoother to use, chosen from among -% 'normal' (default), 'box', 'triangle', and -% 'epanechnikov'. -% 'npoints' The number of equally-spaced points in XI. -% 'support' Either 'unbounded' (default) if the density can extend -% over the whole real line, or 'positive' to restrict it to -% positive values, or a two-element vector giving finite -% lower and upper limits for the support of the density. -% 'weights' Vector of the same length as X, giving the weight to -% assign to each X value (default is equal weights). -% 'width' The bandwidth of the kernel smoothing window. The default -% is optimal for estimating normal densities, but you -% may want to choose a smaller value to reveal features -% such as multiple modes. -% 'function' The function type to estimate, chosen from among 'pdf', -% 'cdf', 'icdf', 'survivor', or 'cumhazard' for the density, -% cumulative probability, inverse cumulative probability, -% survivor, or cumulative hazard functions, respectively. -% -% In place of the kernel functions listed above, you can specify another -% kernel function by using @ (such as @normpdf) or quotes (such as 'normpdf'). -% The function must take a single argument that is an array of distances -% between data values and places where the density is evaluated, and -% return an array of the same size containing corresponding values of -% the kernel function. When the 'function' parameter value is 'pdf', -% this kernel function should return density values, otherwise it should -% return cumulative probability values. Specifying a custom kernel when the -% 'function' parameter value is 'icdf' is an error. -% -% If the 'support' parameter is 'positive', KSDENSITY transforms X using -% a log function, estimates the density of the transformed values, and -% transforms back to the original scale. If 'support' is a vector [L U], -% KSDENSITY uses the transformation log((X-L)/(U-X)). The 'width' parameter -% and U outputs are on the scale of the transformed values. -% -% Example: -% x = [randn(30,1); 5+randn(30,1)]; -% [f,xi] = ksdensity(x); -% plot(xi,f); -% This example generates a mixture of two normal distributions, and -% plots the estimated density. -% -% See also HIST, @. - -% If there is any censoring, we would like to estimate the density up -% to the last non-censored observation. Say this is XMAX. Without -% censoring, the density estimate near XMAX would consist of contributions -% from kernels centered above and below XMAX. We can't compute the -% contributions above XMAX, though, because we have no data. Using only -% the kernels centered below XMAX makes the density estimate biased. -% -% In an attempt to reduce bias, we will compute the contributions -% from kernels centered below XMAX, and fold their values around XMAX. -% The result should be good if the density is nearly flat in this area. -% If the density is increasing then the estimate will still be biased -% downward, and if the density is decreasing it will still be biased -% upward, but the bias will be reduced. - -% Reference: -% A.W. Bowman and A. Azzalini (1997), "Applied Smoothing -% Techniques for Data Analysis," Oxford University Press. - -% Copyright 1993-2006 The MathWorks, Inc. -% $Revision: 1.9.6.8.2.1 $ $Date: 2006/07/13 16:53:59 $ - -if (nargin > 0) && isscalar(yData) && ishandle(yData) ... - && isequal(get(yData,'type'),'axes') - axarg = {yData}; - if nargin>1 - yData = varargin{1}; - varargin(1) = []; - else - yData = []; % error to be dealt with below - end -else - axarg = {}; -end - -% Get y vector and its dimensions -if ~isvector(yData) - error('stats:ksdensity:VectorRequired','X must be a vector.'); -end -yData = yData(:); -yData(isnan(yData)) = []; -n = length(yData); -ymin = min(yData); -ymax = max(yData); - -% Maybe xi was specified, or maybe not -xispecified = false; -if ~isempty(varargin) - if ~ischar(varargin{1}) - xi = varargin{1}; - varargin(1) = []; - xispecified = true; - end -end - -% Process additional name/value pair arguments -okargs={'width' 'npoints' 'kernel' 'support' ... - 'weights' 'censoring' 'cutoff' 'function' }; -defaults = {[] [] 'normal' 'unbounded' ... - 1/n false(n,1) [] 'pdf'}; -[eid,emsg,u,m,kernelname,support,weight,cens,cutoff,ftype] = ... - statgetargs(okargs, defaults, varargin{:}); -if ~isempty(eid) - error(sprintf('stats:ksdensity:%s',eid),emsg); -end - -if isnumeric(support) - if numel(support)~=2 - error('stats:ksdensity:BadSupport',... - 'Value of ''support'' parameter must have two elements.'); - end - if support(1)>=ymin || support(2)<=ymax - error('stats:ksdensity:BadSupport',... - 'Data values must be between lower and upper ''support'' values.'); - end - L = support(1); - U = support(2); -elseif ischar(support) && length(support)>0 - okvals = {'unbounded' 'positive'}; - rownum = strmatch(support,okvals); - if isempty(rownum) - error('stats:ksdensity:BadSupport',... - 'Invalid value of ''support'' parameter.') - end - support = okvals{rownum}; - if isequal(support,'unbounded') - L = -Inf; - U = Inf; - else - L = 0; - U = Inf; - end - if isequal(support,'positive') && ymin<=0 - error('stats:ksdensity:BadSupport',... - 'Cannot set support to ''positive'' with non-positive data.') - end -else - error('stats:ksdensity:BadSupport',... - 'Invalid value of ''support'' parameter.') -end -if isempty(weight) - weight = ones(1,n); -elseif numel(weight)==1 - weight = repmat(weight,1,n); -elseif numel(weight)~=n || numel(weight)>length(weight) - error('stats:ksdensity:InputSizeMismatch',... - 'Value of ''weight'' must be a vector of the same length as X.'); -else - weight = weight(:)'; -end -weight = weight / sum(weight); -if isempty(cens) - cens = false(1,n); -elseif ~all(ismember(cens(:),0:1)) - error('stats:ksdensity:BadCensoring',... - 'Value of ''censoring'' must be a logical vector.'); -elseif numel(cens)~=n || numel(cens)>length(cens) - error('stats:ksdensity:InputSizeMismatch',... - 'Value of ''censoring'' must be a vector of the same length as X.'); -end - -% Kernel can be the name of a function local to here, or an external function -% Kernel numbers are used below: -% 1 2 3 4 5 -kernelnames = {'normal' 'epanechinikov' 'epanechnikov' 'box' 'triangle'}; -kernelhndls = {@normal @epanechnikov @epanechnikov @box @triangle}; -cdfhndls = {@cdf_nl @cdf_ep @cdf_ep @cdf_bx @cdf_tr}; -kernelcuts = [4 sqrt(5) sqrt(5) sqrt(3) sqrt(6)]; -kernelcutoff = Inf; - -% Check function type -okvals = {'pdf' 'cdf' 'survivor' 'cumhazard' 'icdf'}; -if ischar(ftype) - ftype = lower(ftype); -end -rownum = strmatch(ftype,okvals); -if isempty(rownum) - error('stats:ksdensity:BadFunction',... - 'Invalid value of ''function'' parameter.') -elseif length(rownum)>1 - error('stats:ksdensity:BadFunction',... - 'Ambiguous value of ''function'' parameter.') -end -ftype = okvals{rownum}; - -% Set a flag indicating we are to compute the cdf; later on -% we may transform to another function that is a transformation -% of the cdf -iscdf = isequal(ftype,'cdf') | isequal(ftype,'survivor') ... - | isequal(ftype,'cumhazard'); -kernel = kernelname; -if isempty(kernelname) - if iscdf - kernel = cdfhndls{1}; - else - kernel = kernelhndls{1}; - end - kernelname = kernelnames{1}; - kernelcutoff = kernelcuts(1); -elseif ~(isa(kernelname,'function_handle') || isa(kernelname,'inline')) - if ~ischar(kernelname) - error('stats:ksdensity:BadKernel',... - 'Smoothing kernel must be a function.'); - end - - % If this is an abbreviation of our own methods, expand the name now. - % If the string matches the start of both variants of the Epanechnikov - % spelling, that is not an error so pretend it matches just one. - knum = strmatch(lower(kernelname), kernelnames); - if all(ismember(2:3,knum)) % kernel number used here - knum(knum==3) = []; - end - if (length(knum) == 1) - if iscdf - kernel = cdfhndls{knum}; - else - kernel = kernelhndls{knum}; - end - kernelcutoff = kernelcuts(knum); - elseif isequal(ftype,'icdf') - error('stats:ksdensity:IcdfNotAllowed',... - 'Cannot compute inverse cdf for a custom kernel.'); - end -elseif isequal(ftype,'icdf') - error('stats:ksdensity:IcdfNotAllowed',... - 'Cannot compute inverse cdf for a custom kernel.'); -end -if ~isempty(cutoff) - kernelcutoff = cutoff; -end - -if isequal(ftype,'icdf') - % Inverse cdf is special, so deal with it here - if xispecified - p = xi; - else - p = (1:99)/100; - end - - % To start, evaluate the cdf at a range of values - sy = sort(yData); - xi = linspace(sy(1), sy(end), 100); - [yi,xi,u] = ksdensity(yData,xi, 'censoring',cens, 'kernel',kernelname, ... - 'support',support, 'weights',weight,... - 'width',u, 'function','cdf'); - - % If there's a gap of about 0 density, we have to adjust things - gap = find(diff(sy) > 4*u); - if ~isempty(gap) - sy = sy(:)'; - xi = sort([xi, sy(gap)+2*u, sy(gap+1)-2*u]); - [yi,xi,u] = ksdensity(yData,xi, 'censoring',cens, 'kernel',kernelname, ... - 'support',support, 'weights',weight,... - 'width',u, 'function','cdf'); - end - - % Perturb duplicates a small amount to allow interpolation - t = 1 + find(diff(yi)==0); - bigger = 1+eps(class(yi)); - for j=1:length(t) - yi(t(j)) = bigger*yi(t(j)-1); - end - - % Get some starting values - x1 = interp1(yi,xi,p); % interpolate for p in a good range - x1(isnan(x1) & p0 - x1(isnan(x1) & p>max(yi)) = max(xi); % use highest x if p too high, but <1 - x1(p<=0) = L; % use lower bound if p<=0 - x1(p>=1) = U; % and upper bound if p>=1 - notdone = find(p>0 & p<1); % refine the ones with 0 1e-6*abs(x0) & ... - abs(p(notdone)-f0) > 1e-8); - end - - % Plot or return these values - if nargout==0 - plot(axarg{:},x1,p); - else - fout0 = x1; - xout = p; - end - return -end - -% Compute transformed values of data -if isequal(support,'unbounded') - ty = yData; -elseif isequal(support,'positive') - ty = log(yData); -else - ty = log(yData-L) - log(U-yData); % same as log((y-L)./(U-y)) -end - -% Deal with censoring -iscensored = any(cens); -if iscensored - % Compute empirical cdf and create an equivalent weighted sample - [F,XF] = ecdf(ty, 'censoring',cens, 'frequency',weight); - weight = diff(F(:)'); - ty = XF(2:end); - n = length(ty); - N = sum(~cens); - issubdist = (F(end)<1); % sub-distribution, integrates to less than 1 - ymax = max(yData(~cens)); -else - N = n; - issubdist = false; -end - -% Get bandwidth if not already specified -if (isempty(u)), - if ~iscensored - % Get a robust estimate of sigma - med = median(ty); - sig = median(abs(ty-med)) / 0.6745; - else - % Estimate sigma using quantiles from the empirical cdf - Xquant = interp1(F,XF,[.25 .5 .75]); - if ~any(isnan(Xquant)) - % Use interquartile range to estimate sigma - sig = (Xquant(3) - Xquant(1)) / (2*0.6745); - elseif ~isnan(Xquant(2)) - % Use lower half only, if upper half is not available - sig = (Xquant(2) - Xquant(1)) / 0.6745; - else - % Can't easily estimate sigma, just get some indication of spread - sig = ty(end) - ty(1); - end - end - if sig<=0, sig = max(ty)-min(ty); end - if sig>0 - % Default window parameter is optimal for normal distribution - u = sig * (4/(3*N))^(1/5); - else - u = 1; - end -end - -% Get XI values at which to evaluate the density -foldwidth = min(kernelcutoff,3); -if ~xispecified - % Compute untransformed values of lower and upper evaluation points - ximin = min(ty) - foldwidth*u; - if issubdist - ximax = max(ty); - else - ximax = max(ty) + foldwidth*u; - end - - if isequal(support,'positive') - ximin = exp(ximin); - ximax = exp(ximax); - elseif ~isequal(support,'unbounded') - ximin = (U*exp(ximin)+L) / (exp(ximin)+1); - ximax = (U*exp(ximax)+L) / (exp(ximax)+1); - end - - if isempty(m) - m=100; - end - - xi = linspace(ximin, ximax, m); - -elseif (numel(xi) > length(xi)) - error('stats:ksdensity:VectorRequired','XI must be a vector'); -end - -% Compute transformed values of evaluation points that are in bounds -xisize = size(xi); -fout = zeros(xisize); -if iscdf && isfinite(U) - fout(xi>=U) = 1; -end -xout = xi; -xi = xi(:); -if isequal(support,'unbounded') - inbounds = true(size(xi)); - txi = xi; - foldpoint = ymax; -elseif isequal(support,'positive') - inbounds = (xi>0); - xi = xi(inbounds); - txi = log(xi); - foldpoint = log(ymax); -else - inbounds = (xi>L) & (xi= foldpoint - foldwidth*u); - nkeep = length(txi); - nfold = sum(needfold); - txifold = (2*foldpoint) - txi(needfold); - txi(end+1:end+nfold) = txifold; - m = length(txi); -else - nkeep = length(txi); - nfold = 0; -end - -% Now compute density estimate at selected points -blocksize = 3e4; -if n*m<=blocksize && ~iscdf - % For small problems, compute kernel density estimate in one operation - z = (repmat(txi',n,1)-repmat(ty,1,m))/u; - f = weight * feval(kernel, z); -else - % For large problems, try more selective looping - - % First sort y and carry along weights - [ty,idx] = sort(ty); - weight = weight(idx); - - % Loop over evaluation points - f = zeros(1,m); - - if isinf(kernelcutoff) - for k=1:m - % Sum contributions from all - z = (txi(k)-ty)/u; - f(k) = weight * feval(kernel,z); - end - else - % Sort evaluation points and remember their indices - [stxi,idx] = sort(txi); - - jstart = 1; % lowest nearby point - jend = 1; % highest nearby point - for k=1:m - % Find nearby data points for current evaluation point - halfwidth = kernelcutoff*u; - lo = stxi(k) - halfwidth; - while(ty(jstart)0 - % Fold back over the censoring point to give a crisp upper limit - ffold = f(nkeep+1:end); - f = f(1:nkeep); - f(needfold) = f(needfold) + ffold; - txi = txi(1:nkeep); - f(txi>foldpoint) = 0; - - % Include a vertical line at the end - if ~xispecified - xi(end+1) = xi(end); - f(end+1) = 0; - inbounds(end+1) = true; - end -end - - -if iscdf - % Guard against roundoff. Lower boundary of 0 should be no problem. - f = min(1,f); -else - % Apply reverse transformation and create return value of proper size - f = f(:) ./ u; - if isequal(support,'positive') - f = f ./ xi; - elseif isnumeric(support) - f = f * (U-L) ./ ((xi-L) .* (U-xi)); - end -end -fout(inbounds) = f; -xout(inbounds) = xi; - - -% If another function based on the cdf, compute it now -if isequal(ftype,'survivor') - fout = 1-fout; -elseif isequal(ftype,'cumhazard') - fout = 1-fout; - t = (fout>0); - fout(~t) = NaN; - fout(t) = -log(fout(t)); -end - -% Plot the results if they are not requested as return values -if nargout==0 - plot(axarg{:},xout,fout) -else - fout0 = fout; -end - -% ----------------------------- -% The following are functions that define smoothing kernels k(z). -% Each function takes a single input Z and returns the value of -% the smoothing kernel. These sample kernels are designed to -% produce outputs that are somewhat comparable (differences due -% to shape rather than scale), so they are all probability -% density functions with unit variance. -% -% The density estimate has the form -% f(x;k,h) = mean over i=1:n of k((x-y(i))/h) / h - -function f = normal(z) -%NORMAL Normal density kernel. -%f = normpdf(z); -f = exp(-0.5 * z .^2) ./ sqrt(2*pi); - -function f = epanechnikov(z) -%EPANECHNIKOV Epanechnikov's asymptotically optimal kernel. -a = sqrt(5); -z = max(-a, min(z,a)); -f = max(0,.75 * (1 - .2*z.^2) / a); - -function f = box(z) -%BOX Box-shaped kernel -a = sqrt(3); -f = (abs(z)<=a) ./ (2 * a); - -function f = triangle(z) -%TRIANGLE Triangular kernel. -a = sqrt(6); -z = abs(z); -f = (z<=a) .* (1 - z/a) / a; - -% ----------------------------- -% The following are functions that define cdfs for smoothing kernels. - -function f = cdf_nl(z) -%CDF_NL Normal kernel, cdf version -f = normcdf(z); - -function f = cdf_ep(z) -%CDF_EP Epanechnikov's asymptotically optimal kernel, cdf version -a = sqrt(5); -z = max(-a, min(z,a)); -f = ((z+a) - (z.^3+a.^3)/15) * 3 / (4*a); - -function f = cdf_bx(z) -%CDF_BX Box-shaped kernel, cdf version -a = sqrt(3); -f = max(0, min(1,(z+a)/(2*a))); - -function f = cdf_tr(z) -%CDF_TR Triangular kernel, cdf version -a = sqrt(6); -denom = 12; % 2*a^2 -f = zeros(size(z)); % -Inf < z < -a -t = (z>-a & z<0); -f(t) = (a + z(t)).^2 / denom; % -a < z < 0 -t = (z>=0 & za); -f(t) = 1; % a < z < Inf diff --git a/matlab/graph/linkage.m b/matlab/graph/linkage.m deleted file mode 100644 index 89e9a10..0000000 --- a/matlab/graph/linkage.m +++ /dev/null @@ -1,117 +0,0 @@ -function Z = linkage(Y, method) -%LINKAGE Create hierarchical cluster tree. -% Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single -% linkage algorithm. The input Y is a distance matrix such as is -% generated by PDIST. Y may also be a more general dissimilarity -% matrix conforming to the output format of PDIST. -% -% Z = LINKAGE(Y, method) creates a hierarchical cluster tree using -% the specified algorithm. The available methods are: -% -% 'single' --- nearest distance -% 'complete' --- furthest distance -% 'average' --- average distance -% 'centroid' --- center of mass distance (the output Z is meaningful -% only if Y contains Euclidean distances) -% 'ward' --- inner squared distance -% -% Cluster information will be returned in the matrix Z with size m-1 -% by 3, where m is the number of observations in the original data. -% Column 1 and 2 of Z contain cluster indices linked in pairs -% to form a binary tree. The leaf nodes are numbered from 1 to -% m. They are the singleton clusters from which all higher clusters -% are built. Each newly-formed cluster, corresponding to Z(i,:), is -% assigned the index m+i, where m is the total number of initial -% leaves. Z(i,1:2) contains the indices of the two component -% clusters which form cluster m+i. There are n-1 higher clusters -% which correspond to the interior nodes of the output clustering -% tree. Z(i,3) contains the corresponding linkage distances between -% the two clusters which are merged in Z(i,:), e.g. if there are -% total of 30 initial nodes, and at step 12, cluster 5 and cluster 7 -% are combined and their distance at this time is 1.5, then row 12 -% of Z will be (5,7,1.5). The newly formed cluster will have an -% index 12+30=42. If cluster 42 shows up in a latter row, that means -% this newly formed cluster is being combined again into some bigger -% cluster. -% -% The centroid method can produce a cluster tree that is not monotonic. -% This occurs when the distance from the union of two clusters to a third -% cluster is less than the distance from either individual cluster to -% that third cluster. In such a case, sections of the dendrogram change -% direction. This is an indication that another method should be used. -% -% See also PDIST, INCONSISTENT, COPHENET, DENDROGRAM, CLUSTER, CLUSTERDATA, -% KMEANS, SILHOUETTE. - -% Copyright 1993-2002 The MathWorks, Inc. -% $Revision: 1.16 $ - -[k, n] = size(Y); - - -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end - -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end - -method = lower(method(1:2)); % simplify the switch string. - -% a flag for non-monotonic distances in tree. this can only happen with -% the centroid method -monotonic = 1; - -Z = zeros(m-1,3); % allocate the output matrix. - -% during updating clusters, cluster index is constantly changing, R is -% a index vector mapping the original index to the current (row, column) -% index in Y. N denotes how many points are contained in each cluster. - -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; - -for s = 1:(n-1) - X = Y; - - [v, k] = min(X); - - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - - % Update Y. In order to vectorize the computation, we need to compute all - % the indices corresponding to cluster i and j in Y, denoted by I and J. - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end \ No newline at end of file diff --git a/matlab/graph/mutationPlot.m b/matlab/graph/mutationPlot.m deleted file mode 100644 index 2a9c15b..0000000 --- a/matlab/graph/mutationPlot.m +++ /dev/null @@ -1,330 +0,0 @@ -function mutationPlot(tietue) - -global COUNTS; global PARTITION; global SUMCOUNTS; -clearGlobalVars; - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -h0 = findobj('Tag','filename1_text'); -filename1 = get(h0,'String'); -disp('---------------------------------------------------'); -disp('Viewing the mutation plot.'); -disp(['Load the mixture result from: ',[filename1],'...']); - -if (~isstruct(tietue)) -% [filename, pathname] = uigetfile('*.mat', 'Load mixture result file'); -% if (filename==0 & pathname==0) return; -% else -% %display('---------------------------------------------------'); -% %display(['Reading mixture result from: ',[pathname filename],'...']); -% end -% pause(0.0001); -% h0 = findobj('Tag','filename1_text'); -% set(h0,'String',filename); clear h0; -% -% struct_array = load([pathname filename]); -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd') -% disp('Incorrect file format'); -% return -% end -% else -% disp('Incorrect file format'); -% return; -% end - PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS; - alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames; - rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle; -else - PARTITION = tietue.PARTITION; - COUNTS = tietue.COUNTS; - SUMCOUNTS = tietue.SUMCOUNTS; - alleleCodes = tietue.alleleCodes; - adjprior = tietue.adjprior; - popnames = tietue.popnames; - rowsFromInd = tietue.rowsFromInd; - data = double(tietue.data); - npops = tietue.npops; - noalle = tietue.noalle; -end - -nloci = size(COUNTS,2); -ninds = size(data,1)/rowsFromInd; - -if isfield(c,'CQ_COUNTS') - % Linked data - if isfield(c,'gene_lengths') - gene_lengths = c.gene_lengths; - else - [filename, pathname] = uigetfile('*.txt', 'Load file with gene lengths.'); - if (filename==0 & pathname==0) - return - end - gene_lengths = load([pathname filename]); - end - - component_mat = zeros(length(gene_lengths), max(gene_lengths)); - cum_length = cumsum(gene_lengths); - component_mat(1,1:gene_lengths(1))=1:gene_lengths(1); - for i = 2:length(gene_lengths) - component_mat(i,1:gene_lengths(i)) = cum_length(i-1)+1:cum_length(i); - end -else - component_mat = 1:nloci; - gene_lengths = nloci; -end - -if isfield(c,'CQ_COUNTS') - answers = inputdlg({'Index of the individual of interest',... - 'BF limit (log)',... - 'Genes to analyze'},... - 'INPUT',[1; 1; 1],... - {' ','2.3',['1:' num2str(length(gene_lengths))]}); - ind = str2num(answers{1}); - BF = str2num(answers{2}); - genes = str2num(answers{3}); - %ind = input('Input individual: '); - %BF = input('Input BF: '); - %genes = input('Input genes to analyze: '); - all_right = check_inputs(ind,BF,genes, ninds, gene_lengths); - if all_right==0 - return - end - n_genes = length(genes); - nameText = ['ind: ' num2str(ind) ', genes: ' num2str(genes)]; -else - answers = inputdlg({'Index of the individual of interest',... - 'BF limit (log)'},'INPUT',[1; 1],{' ','2.3'}); - ind = str2num(answers{1}); - BF = str2num(answers{2}); - %ind = input('Input individual: '); - %BF = input('Input BF: '); - all_right = check_inputs(ind,BF,1, ninds, gene_lengths); - if all_right==0 - return - end - genes = 1; - n_genes = 1; - nameText = ['ind: ' num2str(ind)]; -end - -origin = PARTITION(ind); -varit = giveColors(npops); - -figure('NumberTitle','off','Name',nameText); - -mutationList = repmat(... - struct('gene',[],'site',[],... - 'row',[],'pops',[],'bf_vals',[]), [10 1]); -n_mutations = 0; - -for i = 1:n_genes - % Loop over different genes - gene = genes(i); - - for j = 1:rowsFromInd - % Loop over different "haplotypes" - left = 0.05; - bottom = 1 - (1/n_genes)*i + (1/n_genes)/(rowsFromInd*2+1)*(2*j-1); - width = 0.9; - height = 1/n_genes/(rowsFromInd*2+1); - axes('Position',[left bottom width height]); - set(gca, 'Xlim', [-.5 , gene_lengths(gene)+.5], 'YLim', [0,1], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - - for k=1:gene_lengths(gene) - % Loop over different sites - site = component_mat(gene,k); - allele = data((ind-1)*rowsFromInd+j, site); - %disp(num2str((ind-1)*rowsFromInd+j)); - counts = squeeze(COUNTS(:,site,:)); - - logml = zeros(npops,1); - if allele>0 - for m=1:npops - % Calculate the predictive probabilities of different - % origins - counts(allele,origin) = counts(allele,origin)-1; - counts(allele,m) = counts(allele,m)+1; - logml(m) = calculateLogml(counts, noalle(site)); - counts(allele,m) = counts(allele,m)-1; - counts(allele,origin) = counts(allele,origin)+1; - end - end - - logml = logml-max(logml); - probs = exp(logml) ./ sum(exp(logml)); - - cumprobs = cumsum(probs); - - if log(max(probs)/probs(origin)) >= BF - h0=patch([k-1 k k k-1], [0 0 cumprobs(1) cumprobs(1)], varit(1,:)); - set(h0,'EdgeColor','none'); - for m=2:npops - h0=patch([k-1 k k k-1], [cumprobs(m-1) cumprobs(m-1) cumprobs(m) cumprobs(m)], varit(m,:)); - set(h0,'EdgeColor','none'); - end - n_mutations = n_mutations+1; - if n_mutations>length(mutationList) - mutationList = [mutationList; repmat(... - struct('gene',[],'site',[],... - 'row',[],'pops',[],'bf_vals',[]), [length(mutationList) 1])]; - end - mutationList(n_mutations).gene = gene; - mutationList(n_mutations).site = k; % The location in gene i! - mutationList(n_mutations).row = j; - aux = log(probs ./ probs(origin)); - mutationList(n_mutations).pops = find(aux>=BF); - mutationList(n_mutations).bf_vals = aux(find(aux>=BF)); - end - end - end -end -mlst_data = isfield(c,'CQ_COUNTS'); -writeResults(mutationList, n_mutations, ind, origin, mlst_data); - - -%-------------------------------------------------------------------- - - -function all_right = check_inputs(ind,BF,genes, ninds, gene_lengths) - -all_right = 0; -if length(ind)~=1 - disp('ERROR: index of one individual must be given.'); - return; -end -if ind<=0 | ind >ninds - disp('ERROR: Index of the given individual is out of range.'); - return -end -if length(BF)~=1 - disp('ERROR: one BF value must be given.'); - return -end -if BF<0 - disp('ERROR: BF must be positive.'); - return -end -if length(genes)<1 | length(genes)>length(gene_lengths) - disp('ERROR: input for the genes was incorrect.'); - return -end -if any(genes<1) | any(genes>length(gene_lengths)) - disp('ERROR: input for the genes was incorrect.'); - return -end -all_right = 1; - -%------------------------------------------------------------------ - -function writeResults(mutationList, n_mutations, ind, home, mlst_data) - -h0 = findobj('Tag','filename2_text'); -outf = get(h0,'String'); clear h0; - -if length(outf)>0 - fid = fopen(outf,'a'); -else - fid = -1; -end - -disp(' '); -disp('--------------------------'); -disp(['Individual: ' num2str(ind) '.']); -disp(['Origin of the individual ' num2str(home) '.']); - -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Individual: ' num2str(ind) '.']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Origin of the individual ' num2str(home) '.']); fprintf(fid, '\n'); -end - - -if n_mutations==0 - disp('No putative locations for mutations detected'); - if fid ~= -1 - fprintf(fid,'%s \n', 'No putative locations for mutations detected'); fprintf(fid, '\n'); - end - return -end - -disp(' '); -if fid ~= -1 - fprintf(fid,'%s \n', [' ']); fprintf(fid, '\n'); -end - -if mlst_data==1 - disp('Putative locations for mutations:'); - disp('gene, site, possible origins'); - if fid ~= -1 - fprintf(fid,'%s \n', 'Putative locations for mutations:'); fprintf(fid, '\n'); - fprintf(fid,'%s \n', 'gene, site, possible origins'); fprintf(fid, '\n'); - end - for i=1:n_mutations - gene = mutationList(i).gene; - site = mutationList(i).site; - text_line = [num2str(gene) blanks(6-floor(log10(gene))) num2str(site) blanks(7-floor(log10(site)))]; - pops = mutationList(i).pops; - bf_vals = mutationList(i).bf_vals; - for j=1:length(pops) - text_line = [text_line num2str(pops(j)) '(' num2str(bf_vals(j)) ') ']; - end - disp(text_line); - if fid ~= -1 - fprintf(fid,'%s \n', [text_line]); fprintf(fid, '\n'); - end - end -else - disp('Putative locations for mutations:'); - disp('locus, haplotype, possible origins'); - if fid ~= -1 - fprintf(fid,'%s \n', 'Putative locations for mutations:'); fprintf(fid, '\n'); - fprintf(fid,'%s \n', 'locus, haplotype, possible origins'); fprintf(fid, '\n'); - end - for i=1:n_mutations - locus = mutationList(i).site; - row = mutationList(i).row; - text_line = [num2str(locus) blanks(7-floor(log10(locus))) num2str(row) blanks(10-floor(log10(row)))]; - pops = mutationList(i).pops; - bf_vals = mutationList(i).bf_vals; - for j=1:length(pops) - text_line = [text_line num2str(pops(j)) '(' num2str(bf_vals(j)) ') ']; - end - disp(text_line); - if fid ~= -1 - fprintf(fid,'%s \n', [text_line]); fprintf(fid, '\n'); - end - end -end - -if fid ~= -1 - fclose(fid); -end - -%------------------------------------------------------------------ - -function val = calculateLogml(counts, noalle) -% counts corresponds to counts of a SINGLE locus. It is two-dimensional -% with as many columns as there are populations. - -npops = size(counts,2); - -prior = ones(noalle,npops)./noalle; -counts = counts(1:noalle,:); - -val = sum(gammaln(sum(prior,1)),2) ... - - sum(gammaln(sum(counts+prior,1)),2) ... - + sum(sum(gammaln(counts+prior))) ... - - sum(sum(gammaln(prior))); - - -%--------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; \ No newline at end of file diff --git a/matlab/graph/phyTreeRead.m b/matlab/graph/phyTreeRead.m deleted file mode 100644 index 228e9ba..0000000 --- a/matlab/graph/phyTreeRead.m +++ /dev/null @@ -1,197 +0,0 @@ -function tr = phyTreeRead(filename) -%PHYTREEREAD reads a NEWICK tree formatted file. -% -% TREE = PHYTREEREAD(FILENAME) reads a NEWICK tree formatted file -% FILENAME, returning the data in the file as a PHYTREE object. FILENAME -% can also be a URL or MATLAB character array that contains the text of a -% NEWICK format file. -% -% The NEWICK tree format is found at: -% http://evolution.genetics.washington.edu/phylip/newicktree.html -% -% Note: This implementation only allows binary trees, non-binary trees -% will be translated into a binary tree with extra branches of length 0. -% -% Example: -% -% tr = phytreeread('pf00002.tree') -% -% See also GETHMMTREE, PHYTREE, PHYTREETOOL, PHYTREEWRITE. - -% Copyright 2003-2004 The MathWorks, Inc. -% $Revision: 1.1.6.10 $ $Author: batserve $ $Date: 2004/04/14 23:57:16 $ - - -if nargin==0 - [filename, pathname] = uigetfile({'*.tree';'*.dnd'},'Select Phylogenetic Tree File'); - if ~filename - disp('Canceled, file not read.'); - tr=[]; - return; - end - filename = [pathname, filename]; -end - -% check input is char -% in a future version we may accept also cells -if ~ischar(filename) - error('Bioinfo:InvalidInput','Input must be a character array') -end - -if size(filename,1)>1 % is padded string ? - strin = cellstr(filename); - strin = [strin{:}]; -elseif (strfind(filename(1:min(10,end)), '://')) % is an url ? - if (~usejava('jvm')) - error('Bioinfo:NoJava','Reading from a URL requires Java.') - end - try - strin = urlread(filename); - catch - error('Bioinfo:CannotReadURL','Cannot read URL "%s".', filename); - end - strin = strread(strin,'%c','delimiter','\n')'; -elseif (exist(filename,'file') || ... - exist(fullfile(cd,filename),'file') ) % is a valid filename ? - strin = textread(filename,'%c','delimiter','\n')'; -else % must be single a string with '\n' - strin = strread(filename,'%c','delimiter','\n')'; -end - -% characterizing the string -numBranches = sum(strin==','); -numLeaves = numBranches + 1; -numLabels = numBranches + numLeaves; - -if (numBranches == 0) - error('Bioinfo:NoCommaInInputString', ... - ['There is not any comma in the data,\ninput string may not '... - 'be in Newick style or is not a valid filename.']) -end - -% find the string features: open and close parentheses and leaves -leafPositions = regexp(strin,'[(,][^(,)]')+1; -parenthesisPositions = regexp(strin,'[()]'); -strFeatures = strin(sort([leafPositions parenthesisPositions])); - -% some consistency checking on the parenthesis -temp = cumsum((strFeatures=='(') - (strFeatures==')')); -if any(temp(1:end-1)<1) || (temp(end)~=0) - error('Bioinfo:InconsistentParentheses','The parentheses structure is inconsistent,\ninput string may not be in Newick style or is not a valid filename.') -end - -dist = zeros(numLabels,1); % allocating space for distances -tree = zeros(numBranches,2); % allocating space for tree pointers -names = cell(numLabels,1); % allocating space for tree labels - -try - -% extract label information for the leaves -leafData = regexp(strin,'[(,][^(,);\[\]]+','match'); -for j=1:numel(leafData) - coi = find(leafData{j}==':',1,'last'); - if isempty(coi) % if no colon no length, the whole label is the name - dist(j) = 0; - names{j} = leafData{j}(2:end); - else % if there is colon, get name and length - dist(j) = strread(leafData{j}(coi+1:end),'%f'); - names{j} = leafData{j}(2:coi-1); - end -end -% uniformizing empty cells, value inside the brackets can never be empty -% because branch names will always be empty -[names{cellfun('isempty',names)}] = deal(''); - -% extract label information for the parenthesis -parenthesisData = regexp(strin,')[^(,);\[\]]*','match'); -parenthesisDist = zeros(numel(parenthesisData),1); -for j=1:numel(parenthesisData) - coi = find(parenthesisData{j}==':',1,'last'); - if isempty(coi) % if no colon no length, the whole label is the name - parenthesisDist(j) = 0; - parenthesisData{j} = parenthesisData{j}(2:end); - else % if there is colon, get name and length - parenthesisDist(j) = strread(parenthesisData{j}(coi+1:end),'%f'); - parenthesisData{j} = parenthesisData{j}(2:coi-1); - end -end -% uniformizing empty cells, value inside brackes may be empty -if any(cellfun('isempty',parenthesisData)) - [parenthesisData{cellfun('isempty',parenthesisData)}] = deal(''); -end - -li = 1; bi = 1; pi = 1; % indexes for leaf, branch and parentheses -queue = zeros(1,2*numLeaves); qp = 0; % setting the queue (worst case size) - -j = 1; - -while j <= numel(strFeatures) - switch strFeatures(j) - case ')' % close parenthesis, pull values from the queue to create - % a new branch and push the new branch # into the queue - lastOpenPar = find(queue(1:qp)==0,1,'last'); - numElemInPar = min(3,qp-lastOpenPar); - switch numElemInPar - case 2 % 99% of the cases, two elements in the parenthesis - bp = bi + numLeaves; - names{bp} = parenthesisData{pi}; % set name - dist(bp) = parenthesisDist(pi); % set length - tree(bi,:) = queue(qp-1:qp); - qp = qp - 2; % writes over the open par mark - queue(qp) = bp; - bi = bi + 1; - pi = pi + 1; - case 3 % find in non-binary trees, create a phantom branch - bp = bi + numLeaves; - names{bp} = ''; % set name - dist(bp) = 0; % set length - tree(bi,:) = queue(qp-1:qp); - qp = qp - 1; % writes over the left element - queue(qp) = bp; - bi = bi + 1; - j = j - 1; %repeat this closing branch to get the rest - case 1 % parenthesis with no meaning (holds one element) - qp = qp - 1; - queue(qp) = queue(qp+1); - pi = pi + 1; - case 0 % an empty parenthesis pair - error('Bioinfo:ParenthesisPairWithNoData', ... - ['Found parenthesis pair with no data,\n', ... - 'input string may not be in Newick style or',... - 'is not a valid filename.']) - end % switch numElemInPar - - case '(' % an open parenthesis marker (0) pushed into the queue - qp = qp + 1; - queue(qp) = 0; - - otherwise % a new leaf pushed into the queue - qp = qp + 1; - queue(qp) = li; - li = li + 1; - end % switch strFeatures - j = j + 1; -end % while j ... - -catch - le = lasterror; - if strcmp(le.identifier,'Bioinfo:ParenthesisPairWithNoData') - rethrow(le) - else - error('Bioinfo:IncorrectString',... - ['An error occurred while trying to interpret the data,\n'... - 'input string may not be in Newick style or is not a '... - 'valid filename.']) - end -end - -% make sure all dists are greater than 0 -dist = max(0,dist); - -if sum(dist) == 0 % there was no distance information so force to an unitary ultrametric tree - tr = phyTree(tree,names); -elseif sum(dist(1:numLeaves)) == 0 % no dist infor for leaves, so force an ultrametric tree - tr = phyTree(tree,dist(numLeaves+1:end),names); -else % put all info into output object - tr = phyTree(tree,dist,names); -end diff --git a/matlab/graph/plotPhytree.m b/matlab/graph/plotPhytree.m deleted file mode 100644 index 0fe2192..0000000 --- a/matlab/graph/plotPhytree.m +++ /dev/null @@ -1,687 +0,0 @@ -function plotPhytree(tr,varargin) -%PLOTPHYTREE renders a phylogenetic tree. -% -% PLOTPHYTREE(TREE) renders a phylogenetic tree object into a MATLAB figure as a -% phylogram. -% -% plotphytree(...,'ROTATION',value) will orient the phylogenetic tree within -% the figure window. Positive angles cause counterclockwise rotation, -% otherwise clockwise rotation. -% -% plotphytree(...,'FONTSIZE',value) will set the lable font size.A value -% specifying the font size to use for text in units determined by the FontUnits -% property (1 point = 1/72 inch). The default is calculated from the data -% according to the number of nodes. -% -% plotphytree(...,'LINESTYLE',value) will set the color of the tree. The -% default color is blue. -% b blue . point - solid -% g green o circle : dotted -% r red x x-mark -. dashdot -% c cyan + plus -- dashed -% m magenta * star (none) no line -% y yellow s square -% k black d diamond -% v triangle (down) -% ^ triangle (up) -% < triangle (left) -% > triangle (right) -% p pentagram -% h hexagram -% -% plotphytree(...,'FONTCOLOR',value) will set the color of lable. It is a -% vector includes [R, G, B] components. Each with the range of 0 to 1. -% The default setting is [.2 .2 .2]. -% -% -% Example: -% -% tr = phytreeread('pf00002.tree'); -% plotphytree(tr,'ROTATION',-pi/2, 'FONTSIZE', 8, 'LINESTYLE', 'b', 'FONTCOLOR', [0.2 0.2 0.2]); -% -% - -if numel(tr)~=1 - error('plotphytree:NoMultielementArrays',... - 'Phylogenetic tree must be an 1-by-1 object.'); -end - -% set defaults -rotation = 0; -fontsize = 1; -fontsizeset = false; -linestyle = '-b'; -fontcolor = [.2 .2 .2]; - - -if nargin>1 && islogical(varargin{1}) - activeBranches = varargin{1}; - argStart = 2; -else - argStart = 1; -end - -if nargin - argStart > 0 - if rem(nargin - argStart,2) == 1 - error('plotphytree:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'rotation', 'fontsize', 'linestyle', 'fontcolor'}; - - for j = argStart:2:nargin-argStart - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('plotphytree:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('plotphytree:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - switch(k) - case 1 % rotation - if isreal(pval(1)) - rotation = double(pval(1)); - else - error('plotphytree:NotValidType',... - 'ROTATION must be numeric and real'); - end - case 2 % fontsize - if isreal(pval(1)) - fontsize = uint8(pval(1)); - fontsizeset = true; - else - error('plotphytree:NotValidType',... - 'fontsize must be numeric'); - end - case 3 % linecolor - linestyle = pval(1); - case 4 % Fontcolor - fontcolor = [pval(1) pval(2) pval(3)]; - end - end - end -end - -orgtr=tr; -treeinfo = GET(tr); - -%Get tree structure -tr = struct(tr); -tr.numLeaves = treeinfo.NumLeaves; -tr.numNodes = treeinfo.NumNodes; -tr.numBranches = treeinfo.NumBranches; -tr.LeafNames = treeinfo.LeafNames; -tr.BranchNames = treeinfo.BranchNames; -tr.numTree = size(tr.tree, 1); - -% obtain parents for every node -tr.par(tr.tree(:)) = tr.numLeaves + [1:tr.numBranches 1:tr.numBranches]; - -% obtain numbers of leaves for each nodes -tr.numChildrenofNode = ChildCount(tr.par, tr.numLeaves, tr.numNodes); -tr.numChildrenofNode(tr.numChildrenofNode == 0) = 1; - -% find angle of each node -unitDegree = 2 * pi / tr.numLeaves; - -tr.NodeSector = tr.numChildrenofNode * unitDegree; -tr.NodeSector(tr.NodeSector == 0) = unitDegree; -tr.NodeSector(tr.numNodes) = 0; - -tr.NodeAngle = zeros(tr.numNodes, 1); -tr.NodeAngle(tr.tree(tr.numTree, 2)) = tr.NodeSector(tr.tree(tr.numTree, 2)) / 2; -tr.NodeAngle(tr.tree(tr.numTree, 1)) = tr.NodeSector(tr.tree(tr.numTree, 2)) + tr.NodeSector(tr.tree(tr.numTree, 1)) / 2; -for i = tr.numTree - 1 : -1 :1 - tr.NodeAngle(tr.tree(i, 2)) = tr.NodeAngle(tr.par(tr.tree(i, 2))) - tr.NodeSector(tr.par(tr.tree(i, 2))) / 2 + tr.NodeSector(tr.tree(i, 2)) / 2; - tr.NodeAngle(tr.tree(i, 1)) = tr.NodeAngle(tr.par(tr.tree(i, 1))) + tr.NodeSector(tr.par(tr.tree(i, 1))) / 2 - tr.NodeSector(tr.tree(i, 1)) / 2; -end - -% Rotation -tr.NodeAngle = tr.NodeAngle + rotation; - -% find (x, y) coordinates of nodes -tr.d = tr.dist; -tr.x = tr.d .* cos(tr.NodeAngle); -tr.y = tr.d .* sin(tr.NodeAngle); - -for i = tr.numNodes - 1: -1 : 1 - tr.x(i) = tr.x(i) + tr.x(tr.par(i)); - tr.y(i) = tr.y(i) + tr.y(tr.par(i)); -end - -nodeIndex = 1 : tr.numNodes; -X = tr.x([nodeIndex;[tr.par(1:tr.numNodes-1) tr.numNodes]]); -Y = tr.y([nodeIndex;[tr.par(1:tr.numNodes-1) tr.numNodes]]); -tr.txtAngle = cal_textAngle(tr); - -fig = gcf; -% fig = figure('Renderer','ZBuffer'); -h.fig = fig; -h.axes = axes; hold on; -sepUnit = max(tr.x)*[-1/20 21/20]; - -set(h.axes,'XTick',[],'YTick',[]); -set(h.axes,'Position',[.05 .05 .9 .9]) -dispTerminalLabels = false; -axis equal - -h.BranchLines = plot(X,Y,linestyle); - -% resize figure if needed -temp = 10/pi*tr.numLeaves; -correctFigureSize(fig,temp,temp); -fontRatio = max(get(fig,'Position').*[0 0 1 0])/tr.numLeaves; -set(h.axes, 'Fontsize', fontsize); - -% set leaf nodes labels -leafIndex = 1 : tr.numLeaves; -X = tr.x(leafIndex); -Y = tr.y(leafIndex); - -txtangle = tr.txtAngle; - -% % Rotate Label -h.leafNodeLabels = zeros(1, tr.numLeaves); -for i = 1:tr.numLeaves - h.leafNodeLabels(i) = text(X(i),Y(i),tr.names(i), 'Rotation', txtangle(i)); -end - -set(h.leafNodeLabels,'color',fontcolor,'clipping','on') -if fontsizeset - set(h.leafNodeLabels, 'Fontsize', fontsize); -else - set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio*1.2))); -end - -textHeight = mean(cell2mat(get(h.leafNodeLabels,'Extent')))*[0 0 0 1]'; -for ind = 1:numel(h.leafNodeLabels) - if X(ind) - tr.x(tr.par(ind)) < 0 - if txtangle(ind) < 90.0 - 0.001 || txtangle(ind) > 90.0 + 0.001 - set(h.leafNodeLabels(ind),'horizontal','right') - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')+[sepUnit(1)*cos(txtangle(ind) * 2 * pi/360) sepUnit(1)*sin(txtangle(ind) * 2 * pi/360) 0]) - else - if Y(ind) > 0 - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')-[0 sepUnit(1) 0]) - else - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')+[0 sepUnit(1) 0]) - end - end - else - if txtangle(ind) < 90.0 - 0.001 || txtangle(ind) > 90.0 + 0.001 - set(h.leafNodeLabels(ind),'horizontal','left') - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')-[sepUnit(1)*cos(txtangle(ind) * 2 * pi/360) sepUnit(1)*sin(txtangle(ind) * 2 * pi/360) 0]) - else - if Y(ind) > 0 - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')-[0 sepUnit(1) 0]) - else - set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')+[0 sepUnit(1) 0]) - end - end - end -end - -dispLeafLabels = 1; -% correct axis limits given the extent of labels -if dispLeafLabels - E = cell2mat(get(h.leafNodeLabels,'Extent')); - if strcmp(get(gca,'XDir'),'reverse') - E(:,1) = E(:,1) - E(:,3); - end - if strcmp(get(gca,'YDir'),'reverse') - E(:,2) = E(:,2) - E(:,4); - end - E=[E;[xlim*[1;0] ylim*[1;0] diff(xlim) diff(ylim)]]; - mins = min(E(:,[1 2])); - maxs = max([sum(E(:,[1 3]),2) sum(E(:,[2 4]),2)]); - axis([mins(1) maxs(1) mins(2) maxs(2)]) -end - -if dispTerminalLabels - set(h.terminalNodeLabels,'Fontsize',min(9,ceil(fontRatio/1.5))); -end - -box off -hold off - -% store handles -set(fig,'UserData',h) -if nargout - handles = h; -end - - -%************************************************************** -% Count number of children for each node -function numChildren = ChildCount(par, numleafs, numnodes) -n = length(par); -numChildren = zeros(1, numnodes); -NoBranchStart = numleafs + 1; - -for i = 1 : numleafs - numChildren(par(i)) = numChildren(par(i)) + 1; -end - -for i = NoBranchStart : n - numChildren(par(i)) = numChildren(par(i)) + numChildren(i); -end -%************************************************************** - -%********************************************************************* -function correctFigureSize(fig,recommendedHeight,recommendedWidth) -% helper function to increase initial figure size depending on the screen & -% tree sizes -screenSize = diff(reshape(get(0,'ScreenSize'),2,2),[],2)-[0;100]; - % 100 gives extra space for the figure header and win toolbar -position = get(fig,'Position'); -if recommendedHeight > position(4) - if recommendedHeight < sum(position([2 4])) - position(2) = sum(position([2 4])) - recommendedHeight; - position(4) = recommendedHeight; - elseif recommendedHeight < screenSize(2) - position(2) = 30; - position(4) = recommendedHeight; - else - position(2) = 30; - position(4) = screenSize(2); - end -end -if recommendedWidth > position(3) - if recommendedWidth < sum(position([1 3])) - position(1) = sum(position([1 3])) - recommendedWidth; - position(3) = recommendedWidth; - elseif recommendedWidth < screenSize(1) - position(1) = 0; - position(3) = recommendedHeight; - else - position(1) = 0; - position(3) = screenSize(1); - end -end -set(fig,'Position',position) -%********************************************************************* - -%********************************************************************* -function [lefttree, righttree, thirdtree] = findsubtreeofnode(tr) - -lefttree = zeros(tr.numBranches, tr.numNodes); -righttree = zeros(tr.numBranches, tr.numNodes); -thirdtree = zeros(tr.numBranches, tr.numNodes); - -% thirdtree_left = zeros(tr.numBranches, tr.numNodes); -% thirdtree_right = zeros(tr.numBranches, tr.numNodes); - -lefttree(1, 1) = tr.tree(1, 1); -righttree(1,1) = tr.tree(1, 2); -lefttree(1, tr.numNodes) = 1; -righttree(1, tr.numNodes) = 1; - -for i = 2 : tr.numBranches - j = 0; - tmp = tr.tree(i, 1); - lefttree(i, 1) = tmp; - lefttree(i, tr.numNodes) = 1; - if tmp > tr.numLeaves - lefttrlen = lefttree(tmp - tr.numLeaves, tr.numNodes); - sub = lefttree(tmp - tr.numLeaves, 1 : lefttrlen); - lefttree(i, 2 : lefttrlen + 2 - 1) = sub; - - righttrlen = righttree(tmp - tr.numLeaves, tr.numNodes); - sub = righttree(tmp - tr.numLeaves, 1 : righttrlen); - lefttree(i, lefttrlen + 3 - 1: lefttrlen + 3 - 1 + righttrlen - 1) = sub; - lefttree(i, tr.numNodes) = lefttrlen + righttrlen + lefttree(i, tr.numNodes); - end - - j = 0; - tmp = tr.tree(i, 2); - righttree(i, 1) = tmp; - righttree(i, tr.numNodes) = 1; - if tmp > tr.numLeaves - lefttrlen = lefttree(tmp - tr.numLeaves, tr.numNodes); - sub = lefttree(tmp - tr.numLeaves, 1 : lefttrlen); - righttree(i, 2 : lefttrlen + 2 - 1) = sub; - - righttrlen = righttree(tmp - tr.numLeaves, tr.numNodes); - sub = righttree(tmp - tr.numLeaves, 1 : righttrlen); - righttree(i, lefttrlen + 3 - 1: lefttrlen + 3 - 1 + righttrlen - 1) = sub; - righttree(i, tr.numNodes) = lefttrlen + righttrlen + righttree(i, tr.numNodes); - end -end - -% Third tree -node = 1 : tr.numLeaves; -for i = 1 : tr.numBranches - tmp = [lefttree(i, 1:lefttree(i, tr.numNodes)), righttree(i, 1:righttree(i, tr.numNodes))]; - tf = ismember(node, tmp); - tmp = node(find(tf == 0)); - tmp = tmp(find(tmp <= tr.numLeaves)); - nlength = length(tmp); - thirdtree(i, 1 : nlength) = tmp; - thirdtree(i, tr.numNodes) = nlength; -end - -%********************************************************************* -function [treeangle, leftnode, rightnode] = caltreeangle(treenode, root, tr) -tmp = find(treenode <= tr.numLeaves); -leftnode = treenode(tmp(1)); -rightnode = treenode(tmp(length(tmp))); - -x0 = tr.x(root); -y0 = tr.y(root); -x1 = tr.x(leftnode); -y1 = tr.y(leftnode); -x2 = tr.x(rightnode); -y2 = tr.y(rightnode); - -treeangle = cal_angle(x0, y0, x1, y1, x2, y2); - -%********************************************************************* -function [subtree1, subtree2] = findsubtree(node, tr) -subtree1 = []; -subtree2 = []; - -nodeindex = 1 : tr.numNodes; - -i1 = 0; -i2 = 0; - -flag1 = 0; -flag2 = 0; - -while ~flag1 || ~flag2 - nodetmp = find(tr.par == node); - - if nodetmp(1) <= tr.numLeaves - subtree1 = [subtree1, nodetmp(1)]; - i1 = i1 + 1; - flag1 = 1; - else - if i1 == 0 - subtree1 = [subtree1, nodetmp(1)]; - end - [subtr1, subtr2] = findsubtree(nodetmp(1), tr); - flag1 = 1; - subtree1 = [subtree1, subtr1, subtr2]; - end - - if nodetmp(2) <= tr.numLeaves - subtree2 = [subtree2, nodetmp(2)]; - i2 = i2 + 1; - flag2 = 1; - else - if i2 == 0 - subtree2 = [subtree2, nodetmp(2)]; - end - [subtr1, subtr2] = findsubtree(nodetmp(2), tr); - flag2 = 1; - subtree2 = [subtree2, subtr1, subtr2]; - end -end - -%********************************************************************* -function leavenodes = findanothersubtree(node, tr) -nextnode = node; -cutpoint = tr.par(length(tr.par)); -subtree = []; -tmpnode = nextnode; -nextnode = tr.par(nextnode); - -while nextnode <= cutpoint - [subtree1, subtree2] = findsubtree(nextnode, tr); - if sum(find(subtree1 == node)) == 0 - subtree = [subtree, subtree1]; - else - subtree = [subtree, subtree2]; - end - tmpnode = nextnode; - if nextnode < cutpoint - nextnode = tr.par(nextnode); - else - break; - end -end -leavenodes = subtree(find(subtree <= tr.numLeaves)); - -%********************************************************************* -function slope = cal_slope(x1, y1, x2, y2) -if x1 == x2 - slope = tan(pi/2); -else - slope = (y2 - y1)/(x2 - x1); -end - -%********************************************************************* -function xangle = cal_angle(x0, y0, x1, y1, x2, y2) -k1 = cal_slope(x0, y0, x1, y1); -k2 = cal_slope(x0, y0, x2, y2); -xangle = atan(abs((k2 - k1)/(1 + k1 * k2))); - -%********************************************************************* -function [leftnode, rightnode, xangle] = cal_thr_angle(trnode, node, tr) -leftnode = 0; -rightnode = 0; -xangle = 0; - -n = length(trnode); -if n == 1 - leftnode = trnode(1); - rightnode = trnode(1); - xangle = 0; - return; -end - -x0 = tr.x(node); -y0 = tr.y(node); - -for i = 1 : n - 1 - x1 = tr.x(trnode(i)); - y1 = tr.y(trnode(i)); - k1 = cal_slope(x0, y0, x1, y1); - for j = i + 1 : n - x2 = tr.x(trnode(j)); - y2 = tr.y(trnode(j)); - k2 = cal_slope(x0, y0, x2, y2); - theta = atan(abs((k2 - k1)/(1 + k1 * k2))); - if xangle < theta - xangle = theta; - leftnode = trnode(i); - rightnode = trnode(j); - end - end -end - -%********************************************************************* -function [left, right] = leftnrightnode(subtree, tr) -lefttree = tr.tree(:, 1); -righttree = tr.tree(:, 2); - -left = 0; -right = 0; -numindex = length(lefttree); -i = 1; -for i = 1 : numindex - [tf, index] = ismember(lefttree(i), subtree); - if tf == 1 - left = lefttree(i); - break; - end -end - -for i = 1 : numindex - [tf, index] = ismember(righttree(i), subtree); - if tf == 1 - right = righttree(i); - break; - end -end - -if left == 0 - left = right; -end -if right == 0 - right = left; -end - -%********************************************************************* -function nodeangle = cal_nodeangle(y, x) -nodeangle = atan2(y, x); -if nodeangle < 0 - nodeangle = 2*pi + nodeangle; -end - -%********************************************************************* -function tree = adjustsubtreeangle(subtr1, subtr2, subtr3, orgnode, tr) - -tr1node = subtr1(find(subtr1 <= tr.numLeaves)); -tr2node = subtr2(find(subtr2 <= tr.numLeaves)); - -tr.x(subtr1) = tr.x(subtr1) - tr.x(orgnode); -tr.y(subtr1) = tr.y(subtr1) - tr.y(orgnode); - -tr.x(subtr2) = tr.x(subtr2) - tr.x(orgnode); -tr.y(subtr2) = tr.y(subtr2) - tr.y(orgnode); - -tr.x(subtr3) = tr.x(subtr3) - tr.x(orgnode); -tr.y(subtr3) = tr.y(subtr3) - tr.y(orgnode); - -[tree1left, tree1right] = leftnrightnode(tr1node, tr); -[tree2left, tree2right] = leftnrightnode(tr2node, tr); -tr3node = subtr3;%leftnrightnode(subtr3, tr); -tree3left = tr3node(1); -tree3right = tr3node(2); - -a1left = cal_nodeangle(tr.y(tree1left), tr.x(tree1left)); -a1right = cal_nodeangle(tr.y(tree1right), tr.x(tree1right)); - -a2left = cal_nodeangle(tr.y(tree2left), tr.x(tree2left)); -a2right = cal_nodeangle(tr.y(tree2right), tr.x(tree2right)); - -a3left = cal_nodeangle(tr.y(tree3left), tr.x(tree3left)); -a3right = cal_nodeangle(tr.y(tree3right), tr.x(tree3right)); - -daylight = [a1right - a3left, a2right - a1left, a3right - a2left]; -equaldaylight = sum(abs(daylight))/3; - -adjust1n3 = equaldaylight - daylight(1); -adjust2n3 = daylight(3) - equaldaylight; - -% Rotation --- subtr1 -a = tr.x(subtr1); -b = tr.y(subtr1); -if adjust1n3 ~= 0 - tr.x(subtr1) = a * cos(adjust1n3) - b * sin(adjust1n3); - tr.y(subtr1) = a * sin(adjust1n3) + b * cos(adjust1n3); -end - -% Rotation --- subtr2 -a = tr.x(subtr2); -b = tr.y(subtr2); -if adjust2n3 ~= 0 - tr.x(subtr2) = a * cos(adjust2n3) - b * sin(adjust2n3); - tr.y(subtr2) = a * sin(adjust2n3) + b * cos(adjust2n3); -end - -tr.x(subtr1) = tr.x(subtr1) + tr.x(orgnode); -tr.y(subtr1) = tr.y(subtr1) + tr.y(orgnode); - -tr.x(subtr2) = tr.x(subtr2) + tr.x(orgnode); -tr.y(subtr2) = tr.y(subtr2) + tr.y(orgnode); - -tr.x(subtr3) = tr.x(subtr3) + tr.x(orgnode); -tr.y(subtr3) = tr.y(subtr3) + tr.y(orgnode); - -tree = tr; - -%********************************************************************* -function tree = adjusttree(lefttree, righttree, thirdtree, node, tr) - -tr1 = lefttree(node - tr.numLeaves, 1 : lefttree(node - tr.numLeaves, tr.numNodes)); -tr2 = righttree(node - tr.numLeaves, 1 : righttree(node - tr.numLeaves, tr.numNodes)); -tr3 = thirdtree(node - tr.numLeaves, 1 : thirdtree(node - tr.numLeaves, tr.numNodes)); -[lefttreeangle, l_leftnode, l_rightnode] = caltreeangle(tr1, node, tr); -[righttreeangle, r_leftnode, r_rightnode] = caltreeangle(tr2, node, tr); -[thr_leftnode, thr_rightnode, xangle] = cal_thr_angle(tr3, node, tr); - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -d1 = sqrt((tr.x(l_rightnode) - tr.x(thr_leftnode)) * (tr.x(l_rightnode) - tr.x(thr_leftnode)) + (tr.y(l_rightnode) -tr.y(thr_leftnode)) * (tr.y(l_rightnode) -tr.y(thr_leftnode))); -d2 = sqrt((tr.x(l_rightnode) - tr.x(thr_rightnode)) * (tr.x(l_rightnode) - tr.x(thr_rightnode)) + (tr.y(l_rightnode) -tr.y(thr_rightnode)) * (tr.y(l_rightnode) -tr.y(thr_rightnode))); - -if d2 > d1 - tmp = thr_leftnode; - thr_leftnode = thr_rightnode; - thr_rightnode = tmp; -end - -% daylight = 2 * pi - lefttreeangle - righttreeangle - xangle; -% equaldaylight = daylight / 3; - -[angle1to3, angle2to3, angle1to2] = cal_daylight(l_leftnode, l_rightnode, r_leftnode, r_rightnode, thr_leftnode, thr_rightnode, node, tr); - -daylight = angle1to3 + angle2to3 + angle1to2; -equaldaylight = daylight / 3; - -adjust1n3 = equaldaylight - angle1to3; -adjust2n3 = angle2to3 - equaldaylight; - -tr.x(tr1) = tr.x(tr1) - tr.x(node); -tr.y(tr1) = tr.y(tr1) - tr.y(node); - -tr.x(tr2) = tr.x(tr2) - tr.x(node); -tr.y(tr2) = tr.y(tr2) - tr.y(node); - -% Rotation --- subtr1 -a = tr.x(tr1); -b = tr.y(tr1); -if adjust1n3 ~= 0 - tr.x(tr1) = a * cos(adjust1n3) - b * sin(adjust1n3); - tr.y(tr1) = a * sin(adjust1n3) + b * cos(adjust1n3); -end - -% Rotation --- subtr2 -a = tr.x(tr2); -b = tr.y(tr2); -if adjust2n3 ~= 0 - tr.x(tr2) = a * cos(adjust2n3) - b * sin(adjust2n3); - tr.y(tr2) = a * sin(adjust2n3) + b * cos(adjust2n3); -end - -tr.x(tr1) = tr.x(tr1) + tr.x(node); -tr.y(tr1) = tr.y(tr1) + tr.y(node); - -tr.x(tr2) = tr.x(tr2) + tr.x(node); -tr.y(tr2) = tr.y(tr2) + tr.y(node); - -tree = tr; - -%********************************************************************* -function [angle1to3, angle2to3, angle1to2] = cal_daylight(l_leftnode, l_rightnode, r_leftnode, r_rightnode, thr_leftnode, thr_rightnode, node, tr) - -x0 = tr.x(node); -y0 = tr.y(node); - -llx = tr.x(l_leftnode); -lly = tr.y(l_leftnode); -lrx = tr.x(l_rightnode); -lry = tr.y(l_rightnode); -rlx = tr.x(r_leftnode); -rly = tr.y(r_leftnode); -rrx = tr.x(r_rightnode); -rry = tr.y(r_rightnode); - -tlx = tr.x(thr_leftnode); -tly = tr.y(thr_leftnode); -trx = tr.x(thr_rightnode); -tryy = tr.y(thr_rightnode); - -angle1to3 = cal_angle(x0, y0, llx, lly, trx, tryy); -angle2to3 = cal_angle(x0, y0, rrx, rry, tlx, tly); -angle1to2 = cal_angle(x0, y0, lrx, lry, rlx, rly); - -%********************************************************************* -function txtAngle = cal_textAngle(tr) -nodeIndex = 1 : tr.numLeaves; -X = tr.x(nodeIndex) - tr.x(tr.par(nodeIndex)); -Y = tr.y(nodeIndex) - tr.y(tr.par(nodeIndex)); - -txtAngle = atan(Y./X) * 360/(2*pi); \ No newline at end of file diff --git a/matlab/graph/plotVarmuus.m b/matlab/graph/plotVarmuus.m deleted file mode 100644 index 072bcb5..0000000 --- a/matlab/graph/plotVarmuus.m +++ /dev/null @@ -1,102 +0,0 @@ -function plotVarmuus(V, C, pointers, varmuus, coordinates, partition, tekstit) - -if nargin < 7 - tekstit = pointers; -end - -notEmptyPops = length(unique(partition)); -if notEmptyPops>30 - disp(['Number of populations: ' num2str(notEmptyPops)]); - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - - -h1 = figure; -hold on - -colors=giveColors(notEmptyPops); - -[I, J] = find(coordinates>0 | coordinates<0); -I=unique(I); -xmin = min(coordinates(I,1)); -xmax = max(coordinates(I,1)); -xdiff = (xmax-xmin); -xmean = xmin + xdiff/2; - -ymin = min(coordinates(I,2)); -ymax = max(coordinates(I,2)); -ydiff = (ymax-ymin); -ymean = ymin + ydiff/2; - -pituus = max(ydiff,xdiff)*1.1/2; - -zmax = 0.8*max(varmuus) + 0.2; - -axis([xmean-pituus xmean+pituus ymean-pituus ymean+pituus 0 zmax]); -grid(gca); -d = [1 2 3 1]; - - - -for i=1:length(C) - koko = length(C{i}); - soluPisteet = V(C{i},:); - center = [mean(soluPisteet(:,1)) mean(soluPisteet(:,2))]; - center = repmat(center, [koko 1]); - soluPisteet = soluPisteet + (center - soluPisteet)./1000; - - - apu = zeros(2*koko, 3); - apu(1:koko, 1:2) = soluPisteet; - apu(koko+1:end, 1:2) = soluPisteet; - apu(koko+1:end, 3) = varmuus(i); - - taulu = pointers{i}; - if length(taulu)>0 - color = colors(partition(taulu(1)),:); - pisteet =[1:koko 1]; - patch('XData', apu(pisteet,1), 'YData', apu(pisteet,2), ... - 'ZData', apu(pisteet,3), 'FaceColor',color, 'Clipping', ... - 'on', 'EdgeColor','k', 'LineWidth', 1); - - pisteet = pisteet+koko; - patch('XData', apu(pisteet,1), 'YData', apu(pisteet,2), ... - 'ZData', apu(pisteet,3), 'FaceColor',color, 'Clipping', ... - 'on', 'EdgeColor','k', 'LineWidth', 1); - - for j = 1:koko-1 - pisteet = [j j+1 j+koko+1 j+koko j]; - patch('XData', apu(pisteet,1), 'YData', apu(pisteet,2), ... - 'ZData', apu(pisteet,3), 'FaceColor',color, 'Clipping', ... - 'on', 'EdgeColor','k', 'LineWidth', 1); - end - - pisteet = [koko 1 koko+1 2*koko koko]; - patch('XData', apu(pisteet,1), 'YData', apu(pisteet,2), ... - 'ZData', apu(pisteet,3), 'FaceColor',color, 'Clipping', ... - 'on', 'EdgeColor','k', 'LineWidth', 1); - end -end - -if ~isequal(tekstit, -1) - for i=1:length(pointers) - taulu = pointers{i}; - teksti = tekstit{i}; - if isnumeric(teksti) - teksti = num2str(teksti); - end - if length(taulu)>0 - text(coordinates(taulu(1),1),coordinates(taulu(1),2), ... - varmuus(i) + zmax/100, teksti, 'FontSize', 10); - end - end -end - -view(3); -hold off - - diff --git a/matlab/graph/plotflow.m b/matlab/graph/plotflow.m deleted file mode 100644 index 119b653..0000000 --- a/matlab/graph/plotflow.m +++ /dev/null @@ -1,282 +0,0 @@ -function plotflow(action) -switch action - case 'rename' - rename; - case 'prune' - prune; - case 'edit_pop_name' - changePopNumber(0); - case 'cancel_pop_name' - closereq; - case 'load_pop_names' - loadPopNames; - case 'ok_pop_name' - okPopName; - case 'five_back' - changePopNumber(-5); - case 'one_back' - changePopNumber(-1); - case 'one_ahead' - changePopNumber(1); - case 'five_ahead' - changePopNumber(5); - case 'help' - openHelp; -end -return - -% ------------------------------------------------------------------------- -function rename -h0 = gcf; -h1 = findobj(h0, 'Tag','attr_menu'); -g = get(h1,'Userdata'); -varnames = g.varnames; -openInputPopNamesFigure(varnames); % rename clusters - -% ------------------------------------------------------------------------- -function prune -h0 = gcf; -h1 = findobj(h0, 'Tag','attr_menu'); -g = get(h1,'Userdata'); -adjmat = g.adjmat; -if isfield(g,'adjmat2') - adjmat2 = g.adjmat2; - min_strength = min(adjmat2(logical(adjmat2>0))); -else - min_strength = min(adjmat(logical(adjmat>0))); -end - -%waitALittle; -answer = inputdlg( ['Specify the minimal strength of the gene flow graph such that'... - ' edges with lower weight will be pruned'],... - 'Prune the graph',1, {num2str(min_strength)}); -if isempty(answer) % cancel has been pressed - return -else - min_strength = str2num(answer{1}); - fprintf('Minimal flow strength: %4.5f\n', min_strength); - adjmat(adjmat num_of_pops - disable('five_ahead_button'); -else enable('five_ahead_button'); -end; -if (num_of_curr + 1) > num_of_pops - disable('one_ahead_button'); -else enable('one_ahead_button'); -end; - - -function loadPopNames -%Loads the population names to UserData of 'population_ -%names_figure'. Gets the names from a file specified by -%the user. - -[filename,pathname] = uigetfile('*.txt','Load Population Names'); -if (filename == 0) & (pathname == 0) - %Cancel was pushed. - return, -end; -input_file = [pathname filename]; -%Read population names from the file to a variable 'names': -fid = fopen(input_file); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); -%Check that the number of lines is the same as npops: -h0 = findobj('Tag','data_info_button'); -tiedot = get(h0,'UserData'); -counts = tiedot.countsUD; -sizc = size(counts); npops = sizc(3); -length_names = length(names); -if npops ~= length_names - msgbox(['Loading of names was unsuccessful.' ... - 'The number of lines in a file that contains the names ' ... - 'must be same as the number of observed ' ... - 'sampling units in the data.'] ,'Error', ... - 'error'); - return; -end; -%Save names to the UserData of the 'population_names_figure': -h0 = findobj('Tag','population_names_figure'); -tiedot = get(h0,'UserData'); -tiedot.tempnamesUD = names; -set(h0,'UserData',tiedot); -%Update text-field to initial state: -h0 = findobj('Tag','input_pop_name_text'); -set(h0,'String','Name of Cluster 1:'); -%Set the name of the first population to the screen: -h0 = findobj('Tag','pop_name_edit'); -set(h0,'String',names{1}); - - -function okPopName -%Saves the names of populations and closes -%'population_names_figure'. - -h0 = findobj('Tag','population_names_figure'); -tiedot = get(h0,'UserData'); -names = tiedot.tempnamesUD; -%Close the figure: -% closereq; -close(h0); - -handle = gcf; -h0 = findobj(handle,'Tag','attr_menu'); -g = get(h0,'Userdata'); -g.varnames = names; -set(h0,'UserData',g); - -% redraw the figure -switch g.type - case 'GENEFLOW' - graphvizpath = g.graphvizpath; - nodecolors = g.nodecolors; - nodestyles = g.nodestyles; - arccolors = g.arccolors; - arcstyles = g.arcstyles; - groupnames = g.varnames; - - d = cd; - plotmodel(g.adjmat2,g.k,'graphvizpath', graphvizpath, ... - 'nodecolors', nodecolors, ... - 'nodestyles', nodestyles, ... - 'arccolors',arccolors, ... - 'arcstyles',arcstyles, ... - 'varnames', groupnames, ... - 'target', ['matlab:',num2str(handle)]); - cd(d); - case {'nj', 'upgma'} - switch g.visualtype - case {'square', 'angular'} - viewDendrogram(g.visualtype) - case {'radial', 'phylogram'} - viewUnrooted(g.visualtype) - end - otherwise - return -end - -function enable(obj_tag) -h0 = findobj('Tag',obj_tag); -set(h0,'Enable','on'); - - -function disable(obj_tag) -h0 = findobj('Tag',obj_tag); -set(h0,'Enable','off'); - - -function openHelp -info{1}='Gene Flow Between Populations'; -info{2}=''; -info{3}='Produced using GraphViz'; -info{4}='Source: gene flow matrix produced by BAPS'; -info{5}=''; -info{6}='Author: Jing Tang'; -info{7}=''; -helpdlg(info,'Help'); diff --git a/matlab/graph/plotmodel.m b/matlab/graph/plotmodel.m deleted file mode 100644 index c76b0dc..0000000 --- a/matlab/graph/plotmodel.m +++ /dev/null @@ -1,342 +0,0 @@ -function handle = plotmodel(B, k, varargin) -% PLOTMODEL Plot a causal model. -% -% Syntax: plotmodel(B, k, ...) -% -% Plots a causal model as a directed acyclic graph. The graph -% layout is done with Graphviz, see www.graphviz.org for details. -% You need either Graphviz installed in your system or a Java -% interpreter with internet connection. -% -% Intended usage is to first call LiNGAM to do the causal discovery -% and then to call 'plotmodel' to visualize the estimated model. -% -% Example: -% [B stde ci k] = lingam(data); -% plotmodel(B, k); -% -% Also latent models can be plotted with dashed nodes and incoming/ -% outgoing arcs. Use 'latent' argument with logical column array -% indicating the latent variables (in causal order). -% -% Example: -% latent = logical([0 1 0 0]'); -% plotmodel(B, k, 'latent', latent, 'target', 'psviewer'); -% -% To plot on a specific Matlab figure, use the 'target' argument -% with value 'matlab:[handle]', e.g. 'matlab:3' to plot on figure -% no. 3. -% -% Required input arguments: -% B - the weight matrix. -% k - the causal order of variables. -% -% Optional input arguments in name, value pairs [default]: -% 'target' - target of output ['matlab' (Octave: 'psviewer')]: -% 'matlab' - plot graph on Matlab figure window -% 'java' - plot graph with Grappa. Uses a remote layout server -% if 'dot' can't be found. In that case, needs an -% internet connection. -% 'psviewer' - plot graph with a PS viewer. Default for Octave. -% Edit file 'settings.m' to set your PS viewer. -% '[filename]' - plot graph on eps-file '[filename]' -% 'varnames' - names of variables in cell array of strings [{'x1' 'x2' ...}] -% 'plotarcs' - plot labels for arcs (boolean) [true] -% 'layout' - layout of the graph ['td']: -% 'td' - plot nodes in hierarhial layers, top-down -% 'lr' - plot nodes in hierarhial layers, left to right -% 'circle' - plot nodes in cirle (not possible with -% 'target', 'java') -% 'nodeshapes' - cell array of node shapes ['ellipse'] -% 'nodestyles' - cell array of node styles ['solid'] -% 'arcstyles' - cell array of arc styles ['solid'] -% 'arccolors' - cell array of arc colors ['black'] -% 'arclabels' - cell array of arc labels ['B(j, i)'] -% 'latent' - a logical column vector of latent variables [[]] -% - -% Check if we are running Octave -isoctave = exist('OCTAVE_VERSION'); - -% Check number of input arguments -if nargin < 2 || rem(nargin, 2) ~= 0 - error('number of input arguments must be >= 2 and even'); -end - -% Initialize variables -dims = size(B, 1); -adj = B(k, k)'; - -if isoctave - target = 'psviewer'; -else - target = 'matlab'; -end -handle = []; % Matlab figure handle, [] = create a new figure - -varnames = cell([dims 1]); -for i = 1:dims - varnames{i} = strcat('x', int2str(k(i))); -end - -plotarcs = true; -layout = 'td'; -nodeshapes = cell(dims, 1); -nodeshapes(:) = {'ellipse'}; -nodestyles = cell(dims, 1); -nodestyles(:) = {'solid'}; -adjmat = adj ~= 0; -arcstyles = cell(dims); -arcstyles(adjmat) = {'solid'}; -arccolors = cell(dims); -arccolors(adjmat) = {'black'}; -latent = []; - -% Handle optional input arguments -for i = 1:2:(nargin - 2) - switch varargin{i} - case 'target', [target handle] = parsetarget(varargin{i + 1}); - case 'plotarcs', plotarcs = varargin{i + 1}; - case 'layout', layout = varargin{i + 1}; - case 'nodeshapes', nodeshapes = varargin{i + 1}; - case 'nodestyles', nodestyles = varargin{i + 1}; - case 'nodecolors', nodecolors = varargin{i + 1}; - case 'arcstyles', arcstyles = varargin{i + 1}; - case 'arccolors', arccolors = varargin{i + 1}; - case 'arclabels', arclabels = varargin{i + 1}; - case 'latent', latent = varargin{i + 1}; - case 'varnames' - tempnames = varargin{i + 1}; - for i = 1:dims - varnames{i} = tempnames{k(i)}; - end - case 'graphvizpath', graphvizpath = varargin{i+1}; - otherwise, warning('unknown input argument: %s\n', varargin{i}); - end -end - -% If the latent variables are given, force the drawing attributes -% of nodes and incoming/outgoing arcs to dashed -nodestyles(latent) = {'dashed'}; -latentindices = find(latent)'; -for i = latentindices - in = adjmat(:, i); - out = adjmat(i, :); - arcstyles(i, out) = {'dashed'}; - arcstyles(in, i) = {'dashed'}; -end - -% Set filenames -dotfile = 'plotmodel_temp.dot'; -switch target - case 'matlab' - if isoctave - format = 'ps'; - imgfile = 'plotmodel_temp.ps'; - else - format = 'png'; - imgfile = 'plotmodel_temp.png'; - end - case 'java' - % no need for image file - case 'psviewer' - format = 'ps'; - imgfile = newtempfile('ps'); - settings % find out the PS viewer - otherwise - format = 'ps'; - imgfile = target; -end - -% Generate labels for arcs -if ~exist('arclabels') - if plotarcs - arclabels = double2labels(adj); - else - arclabels = double2labels(zeros(dims)); - end -end - -% Call graph2dot to write the graph to a dot-file -graph2dot(adj, dotfile, ... - 'nodelabels', varnames, ... - 'nodeshapes', nodeshapes, ... - 'nodestyles', nodestyles, ... - 'nodecolors', nodecolors, ... - 'arclabels', arclabels, ... - 'arcstyles', arcstyles, ... - 'arccolors', arccolors, ... - 'arclabels', arclabels, ... - 'leftright', strcmp(layout, 'lr'),... - 'graphvizpath', graphvizpath); - -if strcmp(target, 'java') - % Call Grappa to produce the picture - libstr = '-cp ../lib/grappa1_2.jar:../lib/lingam.jar'; - classstr = ' lingam.GraphPlotter '; - if isoctave - progcall = strcat('java ', libstr, classstr, dotfile); - else - progcall = strcat(['java ', libstr, classstr, dotfile]); - end -else - % Call Graphviz to produce the picture - if strcmp(layout, 'circle') - prog = 'circo'; - else - prog = 'dot'; - end - - if isoctave - progcall = strcat(prog, ' -T', format, ' ', dotfile, ' -o ', imgfile); - else - progcall = strcat([prog, ' -T', format, ' ', dotfile, ... - ' -o ', imgfile]); - end -end - -% Call Graphviz/Grappa -if ispc - shell = 'dos'; -else - shell = 'unix'; -end - -cd(graphvizpath); % change the directory to use dot.exe -shellcall = strcat(shell, '(''', progcall, ''')'); -[status msg] = eval(shellcall); - -if status - if strcmp(target, 'java') - error('calling Java caused an error: %s\n', msg); - else - error('calling "%s" caused an error: %s\n', prog, msg); - end -end - -switch target - case 'matlab' - % Read the image file - if isoctave - options = '-antialias'; - [img cmap] = imread(imgfile, options); - else - [img cmap] = imread(imgfile, 'png'); - end - - % Create a new window / set the current - if isempty(handle) - handle = figure; % Create a new figure - figx = 100; - figy = 100; - else - figure(handle); % Draw on the given figure - if ~isoctave - position = get(handle, 'Position'); - figx = position(1); - figy = position(2); - end - end - - % Resize the figure window - if ~isoctave - set(handle, 'Position', [figx figy size(img, 2) size(img, 1)]); - end - - % Show the image and set the color map - image(img); - if isoctave - toolow = find(cmap < 0.0); - toohigh = find(cmap > 1.0); - if any(toolow) || any(toohigh) - fprintf('Color map values are out of range [0 1]!\n'); - else - colormap(cmap); - end - else - colormap(cmap); - set(gca, 'Position', [0 0 1 1]); - set(gca, 'XTick', []); - set(gca, 'YTick', []); - end - % delete(imgfile); - case 'java' - % nothing to do - case 'psviewer' - fprintf('graph plotted to file "%s"\n', imgfile); - if isoctave - progcall = strcat(PSVIEWER, ' ', imgfile, ' &'); - else - progcall = strcat([PSVIEWER, ' ', imgfile, ' &']); - end - shellcall = strcat(shell, '(''', progcall, ''')'); - [status msg] = eval(shellcall); - if status - error('calling "%s" caused an error: %s\n', prog, msg); - end - otherwise - fprintf('graph plotted to file "%s"\n', imgfile); -end - -% added at 25.5.2009 for saving the raw dot and png from graphviz. -save_preproc = questdlg('Do you wish to save the graph?',... - 'Save graph in png and dot format?',... - 'Yes','No','Yes'); -if isequal(save_preproc,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.png','Save the graph as'); - if filename~=0 - [path,name,suffix,version] = fileparts(filename); - copyfile(dotfile, [pathname '\' name '.dot']); - copyfile(imgfile, [pathname '\' filename]); - end - -end -% h1 = findobj('Tag','filename1_text'); -% [pathstr, name] = fileparts(get(h1,'string')); - -delete(dotfile); -delete(imgfile); -figure(handle); - -% ----------------------------------------------------------------------------- -function name = newtempfile(suffix) -% NEWTEMPFILE Create a new temporary file. - -prefix = 'plotmodel_temp_'; % common suffix for all temp files -maxnumber = 0; - -files = dir('.'); -for i = 1:length(files) - filename = files(i).name; - - dotindex = find(filename == '.'); % works for Matlab and Octave - if isempty(dotindex) - continue; - end - - suff = filename(dotindex + 1:length(filename)); - if strcmp(suff, suffix) & findstr(filename, 'plotmodel_temp_') == 1 - number = str2num(filename(16:dotindex - 1)); - if number > maxnumber - maxnumber = number; - end - end -end - -name = strcat('plotmodel_temp_', num2str(maxnumber + 1), '.', suffix); - - -% ----------------------------------------------------------------------------- -function [target, handle] = parsetarget(s) -% PARSETARGET Parse target and a possible figure handle. - -colonind = find(s == ':'); % works for Matlab and Octave -if isempty(colonind) - target = s; - handle = []; -else - colonind = colonind(1); - target = s(1:colonind - 1); - handle = str2num(s(colonind + 1:length(s))); -end diff --git a/matlab/graph/population_names_figure.m b/matlab/graph/population_names_figure.m deleted file mode 100644 index 2f508bd..0000000 --- a/matlab/graph/population_names_figure.m +++ /dev/null @@ -1,100 +0,0 @@ -function fig = population_names_figure() -% This is the machine-generated representation of a Handle Graphics object -% and its children. Note that handle values may change when these objects -% are re-created. This may cause problems with any callbacks written to -% depend on the value of the handle at the time the object was saved. -% -% To reopen this object, just type the name of the M-file at the MATLAB -% prompt. The M-file and its associated MAT-file must be on your path. - -% load population_names_figure -waitALittle; -load baps4 -h0 = figure('Color',[0.8 0.8 0.8], ... - 'Colormap',mat0, ... - 'CloseRequestFcn','plotflow cancel_pop_name', ... - 'MenuBar','none', ... - 'Name','Cluster Names', ... - 'NumberTitle','off', ... - 'Position',[533 448 365 197], ... - 'Resize','off', ... - 'Tag','population_names_figure'); -h1 = uimenu('Parent',h0, ... - 'Label','Files', ... - 'Tag','input_pop_name_file_menu'); -h2 = uimenu('Parent',h1, ... - 'Callback','plotflow load_pop_names', ... - 'Label','Load Names', ... - 'Tag','load_pop_names_menu'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','plotflow cancel_pop_name', ... - 'ListboxTop',0, ... - 'Position',[122.25 12 59 19], ... - 'String','Cancel', ... - 'Tag','cancel_pop_name_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'Callback','plotflow ok_pop_name', ... - 'ListboxTop',0, ... - 'Position',[192.75 12 59 19], ... - 'String','Ok', ... - 'Tag','ok_pop_name_button'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.753 0.753 0.753], ... - 'ListboxTop',0, ... - 'Position',[18.75 47.25 233.25 82.5], ... - 'Style','frame', ... - 'Tag','input_pop_name_frame'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[0.753 0.753 0.753], ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[30.75 107.25 203.25 13.5], ... - 'String','Name of Cluster 1:', ... - 'Style','text', ... - 'Tag','input_pop_name_text'); -h1 = uicontrol('Parent',h0, ... - 'Units','points', ... - 'BackgroundColor',[1 1 1], ... - 'Callback','plotflow edit_pop_name', ... - 'HorizontalAlignment','left', ... - 'ListboxTop',0, ... - 'Position',[32.25 86.25 201.75 17.25], ... - 'Style','edit', ... - 'Tag','pop_name_edit'); -h1 = uicontrol('Parent',h0, ... - 'FontWeight','bold', ... - 'Units','points', ... - 'Callback','plotflow five_back', ... - 'ListboxTop',0, ... - 'Position',[159.75 60 15 15], ... - 'String','<<', ... - 'Tag','five_back_button'); -h1 = uicontrol('Parent',h0, ... - 'FontWeight','bold', ... - 'Units','points', ... - 'Callback','plotflow one_back', ... - 'ListboxTop',0, ... - 'Position',[179.25 60 15 15], ... - 'String','<', ... - 'Tag','one_back_button'); -h1 = uicontrol('Parent',h0, ... - 'FontWeight','bold', ... - 'Units','points', ... - 'Callback','plotflow one_ahead', ... - 'ListboxTop',0, ... - 'Position',[199.5 60 15 15], ... - 'String','>', ... - 'Tag','one_ahead_button'); -h1 = uicontrol('Parent',h0, ... - 'FontWeight','bold', ... - 'Units','points', ... - 'Callback','plotflow five_ahead', ... - 'ListboxTop',0, ... - 'Position',[219.75 60 15 15], ... - 'String','>>', ... - 'Tag','five_ahead_button'); -if nargout > 0, fig = h0; end diff --git a/matlab/graph/seqNeighJoin.m b/matlab/graph/seqNeighJoin.m deleted file mode 100644 index d63fe0c..0000000 --- a/matlab/graph/seqNeighJoin.m +++ /dev/null @@ -1,250 +0,0 @@ -function t = seqNeighJoin(D, method, names, varargin) -%SEQNEIGHJOIN neighbor-joining method for phylogenetic tree reconstruction. -% -% TREE = SEQNEIGHJOIN(DIST) computes a phylogenetic tree object from the -% pairwise distances DIST between the species or products applying the -% neighbor-joining method. The input DIST is a matrix (or vector) such as -% is generated by SEQPDIST. -% -% TREE = SEQNEIGHJOIN(DIST,METHOD) selects the method to compute the -% distances of the new nodes to all other nodes at every iteration. The -% general expression to calculate the distances between the new node (n) -% (after joining i and j) and all others nodes (k) is given by: -% -% D(n,k) = a*D(i,k) + (1-a)*D(j,k) - a*D(n,i) - (1-a)*D(n,j) -% -% This expression guarantees to find the correct tree with additive data -% (a.k.a. minimum variance reduction). The options for METHOD are: -% -% 'equivar' --- Assumes equal variance and independence of -% (default) evolutionary distance estimates (a = 1/2). Such as -% in Studier and Keppler, JMBE (1988). -% 'firstorder' --- Assumes a first order model of the variances and -% covariances of evolutionary distance estimates, 'a' -% is adjusted at every iteration to a value between 0 -% and 1. Such as in Gascuel, JMBE (1997). -% 'average' --- New distances are the weighted average of previous -% distances, the branch distances are ignored: -% D(n,k) = [ D(i,k) + D(j,k) ] /2 -% As in the original neighbor-joining algorithm by -% Saitou and Nei, JMBE (1987) -% -% TREE = SEQNEIGHJOIN(DIST,METHOD,NAMES) passes a list of names to label -% the leaf nodes (e.g. species or products) in the phylogenetic tree -% object. NAMES can be a vector of structures with the fields 'Header' or -% 'Name' or a cell array of strings. In both cases the number of elements -% provided must comply with the number of samples used to generate the -% pairwise distances in DIST. -% -% TREE = SEQNEIGHJOIN(...,'REROOT',false) excludes rerooting the resulting -% tree. This is useful to observe the original linkage order followed by -% the algorithm. By default SEQNEIGHJOIN reroots the resulting tree using -% the mid-point method. -% -% Example: -% -% % Load a multiple alignment of amino acids: -% seqs = fastaread('pf00002.fa'); -% -% % Measure the 'Jukes-Cantor' pairwise distances: -% dist = seqpdist(seqs,'method','jukes-cantor','indels','pair'); -% -% % Build the phylogenetic using the neighbor-joining algorithm -% tree = seqneighjoin(dist,'equivar',seqs) -% view(tree) -% -% See also MULTIALIGN, PHYTREE, PHYTREE/REROOT, PHYTREE/VIEW, SEQLINKAGE, -% SEQPDIST. - -% References: -% [1] Saitou N, Nei M.The neighbor-joining method: a new method for -% reconstructing phylogenetic trees. Mol Biol Evol.(1987) 4(4):406-25 -% [2] Gascuel O. BIONJ: An improved version of the NJ algorithm based on a -% simple model of sequence data. Mol. Biol. Evol. (1997) 14:685-695 -% [3] Studier JA, Keppler KJ. A note on the neighbor-joining algorithm of -% Saitou and Nei. Mol Biol Evol. (1988) 5(6):729-31. - -% Copyright 2003-2006 The MathWorks, Inc. -% $Revision: 1.1.8.2 $ $Date: 2006/05/17 20:48:43 $ - -rerootTree = true; -checkForNames = true; - -% check the input distances -if isnumeric(D) - [m, n] = size(D); - if isvector(D) - n = numel(D); - m = (1+sqrt(1+8*n))/2; % number of leaves - if m ~= fix(m) - error('Bioinfo:seqneighjoin:DbadSize',... - 'Size of DIST not compatible with the output of the SEQPDIST function.'); - end - D = squareform(D); - elseif m~=n - error('Bioinfo:seqneighjoin:DnotSquare',... - 'Size of DIST not compatible with the output of the SEQPDIST function.'); - end -else - error('Bioinfo:seqneighjoin:DnotNumeric',... - 'DIST must be a numeric vector compatible with the output of the SEQPDIST function.'); -end - -% Selects appropiate method -if nargin == 1 - method = 'e'; % set default method - checkForNames = false; -else - okmethods = {'equivar','firstorder','average','reroot'}; - methodkeys = {'e','f','a'}; - k = find(strncmpi(method,okmethods,numel(method))); - if isempty(k) - error('Bioinfo:seqneighjoin:UnknownMethod',... - 'Unknown method name: %s.',method); - elseif length(k)>1 - error('Bioinfo:seqneighjoin:IncorrectMethod',... - 'Ambiguous method name: %s.',method); - elseif k<4 - method = methodkeys{k}; - else % case that second and third input are optional paired inputs - if numel(varargin) || nargin==2 - error('Bioinfo:seqneighjoin:IncorrectInputArguments',... - 'Incorrect format of input arguments.') - end - varargin = {method,names}; - checkForNames = false; - method = 'e'; % set default method - end -end - -% detects the names -if checkForNames && (nargin >2) % names were supplied, check validity - if iscell(names) || isfield(names,'Header') || isfield(names,'Name') || isfield(names,'LocusName') - if isfield(names,'Header') % if struct put them in a cell - names = {names(:).Header}; - elseif isfield(names,'Name') % if struct put them in a cell - names = {names(:).Name}; - elseif isfield(names,'LocusName') % if struct put them in a cell - names = {names(:).LocusName}; - end - names = names(:); - namesSupplied = true; - if numel(names)~=m - error('Bioinfo:seqneighjoin:IncorrectSize',... - 'NAMES must have the same size as number of leaves in the tree') - end - elseif strncmpi(names,{'reroot'},numel(names)) - varargin = {names varargin{:}}; - namesSupplied = false; - else - error('Bioinfo:seqneighjoin:IncorrectInputType',... - 'NAMES must be a cell with char arrays or a vector of structures.') - end -else - namesSupplied = false; -end - -% check optional input arguments -nvarargin = numel(varargin); -if nvarargin - if rem(nvarargin,2) - error('Bioinfo:seqneighjoin:IncorrectNumberOfArguments',... - 'Incorrect number of arguments to %s.',mfilename); - end - okargs = {'reroot',''}; - for j=1:2:nvarargin - pname = varargin{j}; - pval = varargin{j+1}; - k = find(strncmpi(pname,okargs,numel(pname))); - if isempty(k) - error('Bioinfo:seqneighjoin:UnknownParameterName',... - 'Unknown parameter name: %s.',pname); - elseif length(k)>1 - error('Bioinfo:seqneighjoin:AmbiguousParameterName',... - 'Ambiguous parameter name: %s.',pname); - else - rerootTree = opttf(pval); - if isempty(rerootTree) - error('Bioinfo:seqneighjoin:rerootInputOptionNotLogical',... - '%s must be a logical value, true or false.',... - upper(char(okargs(k)))); - end - end - end -end - -% ------------------------------------------------------------------------ -% Algorithm starts here: -% -% D - pairwise distance matrix (full form) -% method - 'a' for Saitou & Nei, 'e' for Studier & Keppler, and 'f' for -% Gascuel -% names - labels for leaf nodes - -N = size(D,1); -Z = zeros(N-1,2); -bd = zeros(N*2-1,1); - -p = 1:N; % pointers in matrix -bc=1; % branch counter -if method=='f' - V=D; %initialize the variance matrix for Gascuel method -end - -for n = N:-1:3 - R = sum(D)/(n-2); % sums of columns - Q = D-repmat(R,n,1)-repmat(R',1,n)+diag(inf(n,1)); %Studier & Keppler optimized - % S = (sum(sum(D))/(n-2)+Q)/2 % original total sum in Saitou & Nei - [m,g] = min(Q(:)); %#ok - [i,j] = ind2sub(n,g); % find minimum - if i>j - k=i;i=j;j=k; % j>i always - end - pp = p([i j]); % pointers to join - bl = (R([i j])*[1 -1;-1 1] + D(j,i))/2; % branch lengths - bl = max(bl,0); - bd(pp) = bl; % save branch lengths - Z(bc,:) = pp; % save pointers - h = [1:i-1 i+1:j-1 j+1:n]; - switch method % distances to new node - case 'a' % Saitou & Nei method - d = (sum(D(h,[i j]),2))/2; - D = [[D(h,h) d];[d' 0]]; % update distance matrix - case 'e' % Studier & Keppler method - d = (sum(D(h,[i j]),2)-D(j,i))/2; - if any(d<0) - d = max(d,0); - end - D = [[D(h,h) d];[d' 0]]; % update distance matrix - case 'f' % Gascuel method - if V(i,j) - lambda = max(0,min(1,(1+sum(V(h,i)-V(h,j))/(n-2)/V(i,j))/2)); - else - lambda = 1/2; - end - d = D(h,[i j])*[lambda;1-lambda] - bl*[lambda;1-lambda]; - if any(d<0) - d = max(d,0); - end - D = [[D(h,h) d];[d' 0]]; % update distance matrix - v = V(h,[i j])*[lambda;1-lambda] - V(i,j)*lambda*(1-lambda); - if any(v<0) - v = max(v,0); - end - V = [[V(h,h) v];[v' 0]]; % update variance matrix - end - p = [p(h),N+bc]; % update pointers - bc = bc+1; % update branch counter -end -Z(bc,:) = p; % pointers of last branch -bd(p) = D(2)/2; % lengths of last branch - -% convert data to a phylogenetic tree object -if namesSupplied - t = phyTree(Z,bd,names); -else - t = phyTree(Z,bd); -end -if rerootTree - t = reRoot(t); % reroot with mid-point method -end diff --git a/matlab/graph/seqlinkage.m b/matlab/graph/seqlinkage.m deleted file mode 100644 index 6d7ddf6..0000000 --- a/matlab/graph/seqlinkage.m +++ /dev/null @@ -1,123 +0,0 @@ -function t = seqlinkage(d, method, names) -%SEQLINKAGE constructs a phylogenetic tree from pairwise distances. -% -% TREE = SEQLINKAGE(DIST) computes a phylogenetic tree object from the -% pairwise distances DIST between the species or products. The input DIST -% is a matrix (or vector) such as is generated by SEQPDIST. -% -% TREE = SEQLINKAGE(DIST,METHOD) creates a phylogenetic tree object using -% a specified distance method. The available methods are: -% -% 'single' --- nearest distance (single linkage method) -% 'complete' --- furthest distance (complete linkage method) -% 'average' (default) --- unweighted average distance (UPGMA) (also -% known as group average) -% 'weighted' --- weighted average distance (WPGMA) -% 'centroid' --- unweighted center of mass distance (UPGMC) -% 'median' --- weighted center of mass distance (WPGMC) -% -% TREE = SEQLINKAGE(DIST,METHOD,NAMES) passes a list of names to label the -% leaf nodes (e.g. species or products) in the phylogenetic tree object. -% NAMES can be a vector of structures with the fields 'Header' or 'Name' -% or a cell array of strings. In both cases the number of elements -% provided must comply with the number of samples used to generate the -% pairwise distances in DIST. -% -% Example: -% -% % Load a multiple alignment of amino acids: -% seqs = fastaread('pf00002.fa'); -% -% % Measure the 'Jukes-Cantor' pairwise distances: -% dist = seqpdist(seqs,'method','jukes-cantor','indels','pair'); -% -% % Build the phylogenetic tree with the single linkage method and pass -% % the names of the sequences: -% tree = seqlinkage(dist,'single',seqs) -% view(tree) -% -% See also PHYTREE, PHYTREE/PLOT, PHYTREE/VIEW, PHYTREEWRITE, -% SEQNEIGHJOIN, SEQPDIST. - -% Copyright 2003-2005 The MathWorks, Inc. -% $Revision: 1.1.6.6 $ $Date: 2005/06/09 21:57:42 $ - -% check the input distances -[m, n] = size(d); - -if m==n && m>1 % transform to vector form - D = zeros(m*(m-1)/2,1); - k=1; - for j = 1:m-1 - for i = j+1:m - D(k) = d(i,j); - k = k+1; - end - end -else - D=d(:); -end - -D=D'; % linkage uses row form -n = numel(D); -m = (1+sqrt(1+8*n))/2; % number of leaves -if m ~= fix(m) - error('Bioinfo:BadSize',... - 'Size of DIST not compatible with the output of the SEQPDIST function.'); -end - -% Selects appropiate method -if nargin == 1 % set default switch to be 's' - method = 'av'; -else - okmethods = {'single','nearest',... - 'complete','farthest',... - 'average','upgma',... - 'weighted','wpgma',... - 'centroid','upgmc',... - 'median','wpgmc'}; - methodkeys = {'si','si','co','co','av','av','we','we','ce','ce',... - 'me','me','wa','wa'}; - s = strmatch(lower(method), okmethods); %#ok - if isempty(s) - error('Bioinfo:UnknownMethod','Unknown method name: %s.',method); - elseif length(s)>1 - error('Bioinfo:IncorrectMethod','Ambiguous method name: %s.',method); - else - method = methodkeys{s}; - end -end - -% detects the names -if nargin == 3 % names were supplied, check validity - if iscell(names) || isfield(names,'Header') || isfield(names,'Name') || isfield(names,'LocusName') - if isfield(names,'Header') % if struct put them in a cell - names = {names(:).Header}; - elseif isfield(names,'Name') % if struct put them in a cell - names = {names(:).Name}; - elseif isfield(names,'LocusName') % if struct put them in a cell - names = {names(:).LocusName}; - end - names = names(:); - namesSupplied = true; - if numel(names)~=m - error('Bioinfo:IncorrectSize',... - 'NAMES must have the same size as number of leaves in the tree') - end - else - error('Bioinfo:IncorrectInputType',... - 'NAMES must be a cell with char arrays or a vector of structures.') - end -else - namesSupplied = false; -end - -% call the stats linkage program -T = linkage(D,method); - -% convert data to a phylogenetic tree object -if namesSupplied - t = phyTree(T,names); -else - t = phyTree(T); -end diff --git a/matlab/graph/showColors.m b/matlab/graph/showColors.m deleted file mode 100644 index 1499a7b..0000000 --- a/matlab/graph/showColors.m +++ /dev/null @@ -1,16 +0,0 @@ -function showColors(n_col) - -if n_col>36 - error('Maximum number of colors 36'); -end - -figure('NumberTitle','off','Name','Colors'); - -set(gca, 'Xlim', [-.5 , n_col+.5], 'YLim', [0,1], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -varit = giveColors(n_col); -for k=1:length(varit) - h0=patch([k-1 k k k-1], [0 0 1 1], varit(k,:)); - set(h0,'EdgeColor','none'); -end \ No newline at end of file diff --git a/matlab/graph/showColors2.m b/matlab/graph/showColors2.m deleted file mode 100644 index 1499a7b..0000000 --- a/matlab/graph/showColors2.m +++ /dev/null @@ -1,16 +0,0 @@ -function showColors(n_col) - -if n_col>36 - error('Maximum number of colors 36'); -end - -figure('NumberTitle','off','Name','Colors'); - -set(gca, 'Xlim', [-.5 , n_col+.5], 'YLim', [0,1], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -varit = giveColors(n_col); -for k=1:length(varit) - h0=patch([k-1 k k k-1], [0 0 1 1], varit(k,:)); - set(h0,'EdgeColor','none'); -end \ No newline at end of file diff --git a/matlab/graph/statgetargs.m b/matlab/graph/statgetargs.m deleted file mode 100644 index db021cc..0000000 --- a/matlab/graph/statgetargs.m +++ /dev/null @@ -1,83 +0,0 @@ -function [eid,emsg,varargout]=statgetargs(pnames,dflts,varargin) -%STATGETARGS Process parameter name/value pairs for statistics functions -% [EID,EMSG,A,B,...]=STATGETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...) -% accepts a cell array PNAMES of valid parameter names, a cell array -% DFLTS of default values for the parameters named in PNAMES, and -% additional parameter name/value pairs. Returns parameter values A,B,... -% in the same order as the names in PNAMES. Outputs corresponding to -% entries in PNAMES that are not specified in the name/value pairs are -% set to the corresponding value from DFLTS. If nargout is equal to -% length(PNAMES)+1, then unrecognized name/value pairs are an error. If -% nargout is equal to length(PNAMES)+2, then all unrecognized name/value -% pairs are returned in a single cell array following any other outputs. -% -% EID and EMSG are empty if the arguments are valid. If an error occurs, -% EMSG is the text of an error message and EID is the final component -% of an error message id. STATGETARGS does not actually throw any errors, -% but rather returns EID and EMSG so that the caller may throw the error. -% Outputs will be partially processed after an error occurs. -% -% This utility is used by some Statistics Toolbox functions to process -% name/value pair arguments. -% -% Example: -% pnames = {'color' 'linestyle', 'linewidth'} -% dflts = { 'r' '_' '1'} -% varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'} -% [eid,emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:}) % error -% [eid,emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok - -% Copyright 1993-2004 The MathWorks, Inc. -% $Revision: 1.4.2.1 $ $Date: 2003/11/01 04:28:41 $ - -% We always create (nparams+2) outputs: -% one each for emsg and eid -% nparams varargs for values corresponding to names in pnames -% If they ask for one more (nargout == nparams+3), it's for unrecognized -% names/values - -% Initialize some variables -emsg = ''; -eid = ''; -nparams = length(pnames); -varargout = dflts; -unrecog = {}; -nargs = length(varargin); - -% Must have name/value pairs -if mod(nargs,2)~=0 - eid = 'WrongNumberArgs'; - emsg = 'Wrong number of arguments.'; -else - % Process name/value pairs - for j=1:2:nargs - pname = varargin{j}; - if ~ischar(pname) - eid = 'BadParamName'; - emsg = 'Parameter name must be text.'; - break; - end - i = strmatch(lower(pname),pnames); - if isempty(i) - % if they've asked to get back unrecognized names/values, add this - % one to the list - if nargout > nparams+2 - unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; - - % otherwise, it's an error - else - eid = 'BadParamName'; - emsg = sprintf('Invalid parameter name: %s.',pname); - break; - end - elseif length(i)>1 - eid = 'BadParamName'; - emsg = sprintf('Ambiguous parameter name: %s.',pname); - break; - else - varargout{i} = varargin{j+1}; - end - end -end - -varargout{nparams+1} = unrecog; diff --git a/matlab/graph/viewDendrogram.m b/matlab/graph/viewDendrogram.m deleted file mode 100644 index df37e65..0000000 --- a/matlab/graph/viewDendrogram.m +++ /dev/null @@ -1,56 +0,0 @@ -function viewDendrogram(action) -% VIEWDENDROGRAM function to called by VIEWNJ to draw dendrogram -% trees - -handle = gcf; -h0 = findobj(handle,'Tag','attr_menu'); -g = get(h0,'Userdata'); -g.visualtype = action; -set(h0,'Userdata',g); % store in the attribute menu -cla -axis off -% clf -% close(g.handle); -% h0 = figure('NumberTitle','off'); -% g.handle = h0; -% set(h0,'menubar','none','toolbar','figure'); -% set(h0,'Tag','nj_plot'); -% set(h0,'Name',['Neighbor-Joining tree - ' g.filename]);; -if strcmp(g.type,'NJ') - t = seqNeighJoin(g.D, 'equivar', g.varnames); -else - t = seqlinkage(g.D, 'average', g.varnames); -end - -switch action - case 'square' - % plotNJ(g.D, 1, char(g.varnames)); - Plot(t,'type','square'); - case 'angular' - % plotNJ(g.D, 0, char(g.varnames)); - Plot(t,'type','angular'); -end - -% h1 = uimenu('Parent',handle, ... -% 'Label','Attributes', ... -% 'Tag','attr_menu'); -% h2 = uimenu('Parent',h1, ... -% 'Label','Rename clusters', ... -% 'callback', 'plotflow rename', ... -% 'Tag','clustername_menu'); -% h2 = uimenu('Parent',h1, ... -% 'Label','Visual type', ... -% 'Tag','visualtype_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Square', ... -% 'callback', 'viewDendrogram(''square'')', ... -% 'Tag','viewsquare_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Angular', ... -% 'callback', 'viewDendrogram(''angular'')', ... -% 'Tag','viewangular_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Radial', ... -% 'callback', 'viewUnrooted', ... -% 'Tag','viewradial_menu'); -%set(h1,'Userdata',g); % store in the attribute menu \ No newline at end of file diff --git a/matlab/graph/viewMixPartition.m b/matlab/graph/viewMixPartition.m deleted file mode 100644 index 150e19b..0000000 --- a/matlab/graph/viewMixPartition.m +++ /dev/null @@ -1,88 +0,0 @@ -function viewMixPartition(partition, popnames) - -notEmptyPops = length(unique(partition)); -if notEmptyPops>30 - disp(['Number of populations: ' num2str(notEmptyPops)]); - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - -nind = length(partition); -%npops = max(partition); -npops = notEmptyPops; - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - -h0 = figure; -set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -tiedot.popnames = popnames; -tiedot.info = partition; -set(h0,'UserData',tiedot); - -set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -eiTyhjatPopulaatiot = unique(partition); - -for i=1:nind - % Suhteellisten osuuksien laskeminen - pop = partition(i); - pop = find(eiTyhjatPopulaatiot==pop); - - % Pylvään piirtäminen - h0 =patch([i-1, i, i, i-1], [0, 0, 1, 1], varit(pop,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - -end - - - -if ~isempty(popnames) - npops = size(popnames,1); - for i=1:npops - firstInd = popnames{i,2}; - if size(popnames,1) ~=nind - line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat - end - if i30 - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - - -h0 = figure; -set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -tiedot.popnames = popnames; -tiedot.info = osuudet; -set(h0,'UserData',tiedot); - -set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -for i=1:nind - - if any(osuudet(i,:)>0) - cumOsuudet = cumsum(osuudet(i,:)); - - % Pylvään piirtäminen - for j=1:npops - if j==1 - if cumOsuudet(1)>0 - h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(j,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - else - if (cumOsuudet(j)>cumOsuudet(j-1)) - h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... - cumOsuudet(j), cumOsuudet(j)], varit(j,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - end - end - end -end - - - -if ~isempty(popnames) - npops = size(popnames,1); - for i=1:npops - firstInd = popnames{i,2}; - line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat - - if i30 - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - -h0 = figure; -set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -set(h0,'Tag','admixture_result'); -set(h0,'Name',['Admixture Result - ' filename]); -tiedot.popnames = popnames; -tiedot.info = osuudet; -set(h0,'UserData',tiedot); - -set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -for i=1:nind - - if any(osuudet(i,:)>0) - cumOsuudet = cumsum(osuudet(i,:)); - - % Pylvään piirtäminen - for j=1:npops - if j==1 - if cumOsuudet(1)>0 - h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(j,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - else - if (cumOsuudet(j)>cumOsuudet(j-1)) - h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... - cumOsuudet(j), cumOsuudet(j)], varit(j,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - end - end - end -end - - - -if ~isempty(popnames) - npops = size(popnames,1); - for i=1:npops - firstInd = popnames{i,2}; - if size(popnames,1)~=nind - line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat - end - - % The determination of x_paikka is changed, since popnames could be - % non-sequential. - Jing - if i0); -% end -% % 'adjacency matrix' -% adj_mat = cell(ncluster,ncluster); -% k = 2; -% for i = 1:ncluster -% for j = k:ncluster -% adj_mat{i,j} = intersect(clusters{i},clusters{j}); -% end -% k = k + 1; -% end -% -% adj_mat = zeros(ncluster,ncluster); -% k = 2; -% for i = 1:ncluster -% for j = k:ncluster -% adj_mat(i,j) = length(intersect(clusters{i},clusters{j})); -% end -% k = k + 1; -% end -% adj_mat = adj_mat+adj_mat'; -% admix_count = sum(adj_mat,1); -% order = zeros(1,ncluster); -% middle_cluster = find(recomb_count==max(admix_count)); -% reference_cluster = middle_cluster; -% middle_point = floor(ncluster/2); -% order(middle_point) = middle_cluster(1); -% for i = middle_point+1:ncluster -% order(i) = find(adj_mat(:,reference_cluster)==max(adj_mat(:,reference_cluster))); -% adj_mat(referecen_cluster,order(i)) = 0; -% adj_mat(order(i),reference_cluster) = 0; -% reference_cluster = order(i); -% end -% [ordered_cluster,ix] = sort(size_cluster,'descend'); - -% disp(['Number of populations: ' num2str(npops)]); -% if npops>30 -% disp(' '); -% disp('Figure can be drawn only if the number of populations'); -% disp('is less or equal to 30.'); -% disp(' '); -% return; -% end - - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - -h0 = figure; -set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -set(h0,'Tag','admixture_result'); -set(h0,'Name',['Admixture Result After Permutation - ' filename]); - -if size(popnames,1) > 150 % display only groups - for i=1:nind - popnames{i,1} = cell({'|'}); % CELL in the CELL - end -end - -for i = inliers - pool = find(partition==i); - if isempty(pool) - continue - end - chosen_ix = 1 + floor(length(pool)/2); -% if ~find(pool==chosen_one) % gap, find the other way -% chosen_one = pool(end) - floor(length(pool)/2); -% if ~find(pool==chosen_one) % then the algorithm really sucks -% chosen_one = pool(1); -% end -% end - chosen_one = pool(chosen_ix); - popnames{chosen_one,1} = {[popnames{chosen_one,1}{1} sprintf('Cluster%d',i)]}; -end - - -tiedot.popnames = popnames; -tiedot.info = data; -set(h0,'UserData',tiedot); - -set(gca,'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -% Display the color bars -for i=1:nind - - if any(data(i,:)>0) - cumOsuudet = cumsum(data(i,:)); - - % Pylvään piirtäminen - for j=1:admixnpops - if j==1 - if cumOsuudet(1)>0 - h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(inliers(j),:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - else - if (cumOsuudet(j)>cumOsuudet(j-1)) - h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... - cumOsuudet(j), cumOsuudet(j)], varit(inliers(j),:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - end - end - end -end - - -% Display the texts -if ~isempty(popnames) - npops = size(popnames,1); % NB! npops now stands for the number of individuals. - for i=1:npops - firstInd = popnames{i,2}; - if size(popnames,1)~=nind - line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat - end - - % The determination of x_paikka is changed, since popnames could be - % non-sequential. - Jing - if i36 - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 36.'); - disp(' '); - return; -end - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - -h0 = figure; -set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -set(h0,'Tag','admixture_result'); -set(h0,'Name',['Admixture Result - ' filename]); -tiedot.popnames = popnames; -tiedot.info = osuudet; -set(h0,'UserData',tiedot); - -set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -for i=1:nind - - if any(osuudet(i,:)>0) - cumOsuudet = cumsum(osuudet(i,:)); - - % Pylvään piirtäminen - for j = 1:admixnpops - if j == 1 - if cumOsuudet(1)>0 - h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(inliers(j),:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - else - if (cumOsuudet(j)>cumOsuudet(j-1)) - h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... - cumOsuudet(j), cumOsuudet(j)], varit(inliers(j),:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end - end - end - end -end - - - -if ~isempty(popnames) - npops = size(popnames,1); % NB! npops now stands for the number of individuals. - for i = 1:npops - firstInd = popnames{i,2}; - if size(popnames,1)~=nind - line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat - end - - % The determination of x_paikka is changed, since popnames could be - % non-sequential. - Jing - if i30 - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - -nind = length(partition); -totalNumRows = 0; -for ind = 1:nind - totalNumRows = totalNumRows+rows(ind,2)-rows(ind,1)+1; - %totalNumRows = totalNumRows+length(rows{ind}); -end -%npops = max(partition); -npops = notEmptyPops; - -varit = giveColors(npops); -korkeinviiva = 1.05; -pieninarvo = -korkeinviiva; - -h0 = figure('NumberTitle', 'off'); %image_figure; %Muutettu -tiedot.rows = rows; -tiedot.info = partition; -tiedot.popnames = popnames; -set(h0,'UserData',tiedot); - -set(gca, 'Xlim', [-.5 ,totalNumRows+.5], 'YLim', [pieninarvo ,korkeinviiva], ... - 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); - -eiTyhjatPopulaatiot = unique(partition); - -for i=1:nind - pop = partition(i); - pop = find(eiTyhjatPopulaatiot==pop); - - % Pylväiden piirtäminen - for rivi = rows(i,1):rows(i,2) - h0 =patch([rivi-1, rivi, rivi, rivi-1], [0, 0, 1, 1], varit(pop,:)); - set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! - end -end - - -npops = size(rows,1); -for i=1:npops - firstRow = rows(i,1); - line([firstRow-1, firstRow-1], [0,1], 'Color', 'k'); %Populaatioiden rajat -end - -if ~isempty(popnames) - for i=1:npops - %rivi = rows(i}; - %x_paikka = (rivi(1)-1+rivi(end))/2; - x_paikka = (rows(i,1) - 1 + rows(i,2)) / 2; - - korkeuskerroin = pieninarvo / -0.2; - suhdekerroin = npops/6; - for letter_num = 1:length(popnames{i,1}{1}) - letter= popnames{i,1}{1}(letter_num);%alter .004| - text(x_paikka+korjaus(letter)*suhdekerroin, ... - 0.0005*korkeuskerroin-0.02*letter_num*korkeuskerroin, ... - letter, 'Interpreter','none'); - end - end -end - -line([totalNumRows,totalNumRows],[0,1],'Color','k'); - -%------------------------------------------------------------------------------------- - -function extra = korjaus(letter) - if any(letter == 'ijlI') - extra = 0.022; - elseif any(letter == 'r') - extra = 0.016; - elseif any(letter == 'k') - extra = 0.009; - elseif any(letter == 'f') - extra = 0.013; - elseif any(letter == 't') - extra = 0.014; - elseif any(letter == 'w') - extra = -0.003; - else - extra = 0; -end; diff --git a/matlab/graph/viewUnrooted.m b/matlab/graph/viewUnrooted.m deleted file mode 100644 index 85a5a12..0000000 --- a/matlab/graph/viewUnrooted.m +++ /dev/null @@ -1,61 +0,0 @@ -function viewUnrooted(action) -% VIEWUNROOTED function to called by VIEWNJ to draw unrooted -% neighbor-joining trees - -handle = gcf; -h0 = findobj(handle,'Tag','attr_menu'); -g = get(h0,'Userdata'); -g.visualtype = action; -set(h0,'Userdata',g); % store in the attribute menu -cla reset -axis off -% close(g.handle); -% h0 = figure('NumberTitle','off'); -% g.handle = h0; -% set(h0,'menubar','none','toolbar','figure'); -% set(h0,'Tag','nj_plot'); -% set(h0,'Name',['Neighbor-Joining tree - ' g.filename]);; - -if strcmp(g.type,'NJ') - t = seqNeighJoin(g.D, 'equivar', g.varnames); -else - t = seqlinkage(g.D, 'average', g.varnames); -end - -switch action - case 'radial' - % plotNJ(g.D, 1, char(g.varnames)); - Plot(t,'type','radial'); - case 'phylogram' - % plotNJ(g.D, 0, char(g.varnames)); - plotPhytree(t); -end - -% h1 = uimenu('Parent',handle, ... -% 'Label','Attributes', ... -% 'Tag','attr_menu'); -% h2 = uimenu('Parent',h1, ... -% 'Label','Rename clusters', ... -% 'callback', 'plotflow rename', ... -% 'Tag','clustername_menu'); -% h2 = uimenu('Parent',h1, ... -% 'Label','Visual type', ... -% 'Tag','visualtype_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Square', ... -% 'callback', 'viewDendrogram(''square'')', ... -% 'Tag','viewsquare_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Angular', ... -% 'callback', 'viewDendrogram(''angular'')', ... -% 'Tag','viewangular_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Radial', ... -% 'callback', 'viewUnrooted(''radial'')', ... -% 'Tag','viewradial_menu'); -% h3 = uimenu('Parent',h2, ... -% 'Label','Phylogram', ... -% 'callback', 'viewUnrooted(''phylogram'')', ... -% 'Tag','viewphylogram_menu'); -% set(h1,'Userdata',g); % store in the attribute menu - diff --git a/matlab/graph/view_admixture.m b/matlab/graph/view_admixture.m deleted file mode 100644 index 41e51e3..0000000 --- a/matlab/graph/view_admixture.m +++ /dev/null @@ -1,254 +0,0 @@ -function view_admixture(proportionsIt,npops,admixnpops,... - popnames,partition,pvalue,source) - -% Find the outlier individuals -% removed_inds = find(~any(proportionsIt,2)); -removed_inds = logical(~any(proportionsIt,2)); -outliers = unique(partition(removed_inds)); -if ~isempty(outliers) - fprintf('Outlier clusters: '); - for i = 1:length(outliers) - fprintf(['%s' blanks(2)], num2str(outliers(i))); - end - fprintf('\n'); -end - -% total_clusters = [1:max(partition)]; -total_clusters = [1:npops]; -inliers = setdiff(total_clusters, outliers); -if length(inliers)~=admixnpops - disp('*** ERROR: in view linkage admixture.'); - return -end - -if isempty(popnames) || size(popnames,1)==size(partition,1) - - groupnames = cell(1,admixnpops); - for i=1:admixnpops - groupnames{i} = sprintf('Cluster %d',inliers(i)); - end - waitALittle; - [s,v] = listdlg('PromptString',[sprintf('%d ',admixnpops) 'Clusters available:'],... - 'SelectionMode','multiple',... - 'Name','Select Clusters',... - 'ListString',groupnames); - if isempty(s) || ~v - disp('*** WARNING: Viewing admixture cancelled.'); - return - else - fprintf('Viewing admixture in cluster(s): %s\n',num2str(inliers(s))); - end - - waitALittle; - answer = inputdlg( ['Input the p value for the strain admixture. Strains with '... - 'nonsignificant admixture will be displayed as a single color bar'],... - 'Input the upper bound p value',1,{'0.05'}); - if isempty(answer) % cancel has been pressed - return - else - maxp = str2num(answer{1}); - fprintf('P value: %4.2f\n', maxp); - - % nonsignificant strains are forced to be single color bar. - nonsignificant_ix = find( (pvalue>maxp & pvalue <=1) ); - if ~isempty(nonsignificant_ix) - for i = nonsignificant_ix' - % incluster = find(proportionsIt(i,:)==max(proportionsIt(i,:))); - incluster = logical(proportionsIt(i,:)==max(proportionsIt(i,:)) & proportionsIt(i,:)~=0); - proportionsIt(i,:) = zeros(1, admixnpops); - proportionsIt(i,incluster) = 1; - end - end - - ix = find(ismember(partition(:),inliers(s))); - % if isempty(ix) - % disp('*** ERROR: No significant data. Try again with a higher p value.'); - % return - % end - proportionsIt_raw = proportionsIt; - partition_raw = partition; - proportionsIt = proportionsIt(ix,:); - if isempty(popnames) || size(popnames,1)==size(partition,1) - popnames = popnames(ix,:); - end - partition = partition(ix); - pvalue = pvalue(ix); - end -else - waitALittle; - answer = inputdlg( ['Input the p value for the strain admixture. Strains with '... - 'nonsignificant admixture will be displayed as a single color bar'],... - 'Input the upper bound p value',1,{'0.05'}); - if isempty(answer) % cancel has been pressed - return - else - maxp = str2num(answer{1}); - fprintf('P value: %4.2f\n', maxp); - - % nonsignificant strains are forced to be single color bar. - nonsignificant_ix = find( (pvalue>maxp & pvalue <=1) ); - if ~isempty(nonsignificant_ix) - for i = nonsignificant_ix' - % incluster = find(proportionsIt(i,:)==max(proportionsIt(i,:))); - incluster = logical(proportionsIt(i,:)==max(proportionsIt(i,:)) & proportionsIt(i,:)~=0); - proportionsIt(i,:) = zeros(1, admixnpops); - proportionsIt(i,incluster) = 1; - end - end - - % ix = find(ismember(partition(:),inliers(s))); - % if isempty(ix) - % disp('*** ERROR: No significant data. Try again with a higher p value.'); - % return - % end - % proportionsIt_raw = proportionsIt; - % partition_raw = partition; - % proportionsIt = proportionsIt(ix,:); -% if isempty(popnames) || size(popnames,1)==size(partition,1) -% popnames = popnames(ix,:); -% end -% partition = partition(ix); -% pvalue = pvalue(ix); - end - -end - - - -talle = questdlg(['Do you want names to be visible in the admixture ' ... - 'result graphics?'], 'Names visible?', 'Yes', 'No', 'Yes'); - -if isequal(talle,'No') - if isempty(popnames) || size(popnames,1)==size(partition,1) - viewPartition4(proportionsIt, [], npops, ... - admixnpops, inliers, partition, source); - else - viewPartition2(proportionsIt, [], admixnpops, partition, source); - end -else - if isempty(popnames) || size(popnames,1)==size(partition,1) - viewPartition4(proportionsIt, popnames, npops, ... - admixnpops, inliers, partition, source); - else - viewPartition2(proportionsIt, popnames, admixnpops, partition, source); - end -end - -if isempty(popnames) || size(popnames,1)==size(partition,1) - talle = questdlg(['Permutate the labelling orders to get a more structured graphics? '], 'Reorder the sample?', 'Yes', 'No', 'Yes'); - - if isequal(talle,'Yes') - h0 = findobj('Tag','admixture_result'); - if ~isempty(h0) - close(h0); - % drawnow; - waitALittle; - [partition_ordered, proportionsIt,popnames,pvalue] = ... - viewPartition3(proportionsIt, popnames, npops, admixnpops, ... - inliers, partition, pvalue, source); - % c.mixtureType = 'linkage_mix'; - % c.npops = npops; - talle = questdlg('Save the ordered admixture result in text?', ... - 'Save results?','Yes','No','Yes'); - if isequal(talle,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.txt','Save results as'); - if isempty(filename) && isempty(pathname) - % Cancel was pressed - return - else - fprintf(1,'Saving the admixture result...\n'); - dest = [pathname filename]; - clusters = inliers(s); - tulostaAdmixtureTiedot(popnames, proportionsIt, pvalue, source, dest,... - maxp,clusters); - end - end - - else - disp('***ERROR: In plotting the admixture results.'); - return; - end - drawnow; - end -end - - -%-------------------------------------------------------------------------- - - -function tulostaAdmixtureTiedot(popnames, proportions, uskottavuus, ... - source, dest, maxp,clusters) - -fid = fopen(dest,'w'); -ninds = length(uskottavuus); -npops = size(proportions,2); - -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Source file: ' source]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Total number of clusters: ' npops]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Viewing admixture in cluster(s): ',num2str(clusters)]);fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Number of individuals in selection: ' num2str(ninds)]); fprintf(fid, '\n'); - fprintf(fid,'P value: %4.2f \n', maxp); fprintf(fid, '\n'); - fprintf(fid, '\n'); -end - -namelength = zeros(1,ninds); -for ind = 1:ninds - namelength(ind) = length(popnames{ind,1}{1}); -end -maxlength = max(namelength); -ekaRivi = ['Index' blanks(2) blanks(maxlength-4) 'Name' blanks(2)]; -for pop = 1:npops - ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; -end -ekaRivi = [ekaRivi blanks(2) 'p']; % Added on 29.08.06 -disp(ekaRivi); -fprintf(fid, '%s \n',ekaRivi); fprintf(fid,'\n'); -for ind = 1:ninds - index = num2str(popnames{ind,2}); - namelength=['%' num2str(2+maxlength) 's']; - rivi = [sprintf('%5s',index) sprintf(namelength,popnames{ind,1}{1})... - blanks(2)]; - if any(proportions(ind,:)>0) - for pop = 1:npops-1 - rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - end - rivi = [rivi proportion2str(proportions(ind,npops))]; - rivi = [rivi blanks(2) ownNum2Str(uskottavuus(ind))]; - end - disp(rivi); - if fid ~= -1 - fprintf(fid,'%s \n',rivi); fprintf(fid,'\n'); - end -end -fclose(fid); -fprintf(1,'finished.\n'); - -%-------------------------------------------------------------------------- -% function dispLine -% disp('---------------------------------------------------'); - -%-------------------------------------------------------------------------- -function str = proportion2str(prob) -%prob belongs to [0.00, 0.01, ... ,1]. -%str is a 4-mark presentation of proportion. - -if abs(prob)<1e-3 - str = '0.00'; -elseif abs(prob-1) < 1e-3; - str = '1.00'; -else - prob = round(100*prob); - if prob<10 - str = ['0.0' num2str(prob)]; - else - str = ['0.' num2str(prob)]; - end; -end; - - diff --git a/matlab/graph/view_energy.m b/matlab/graph/view_energy.m deleted file mode 100644 index a88dcc2..0000000 --- a/matlab/graph/view_energy.m +++ /dev/null @@ -1,488 +0,0 @@ -function view_energy() - -%waitALittle; -% [filename1, pathname1] = uigetfile('*.mat', 'Load mixture results.'); -% if (sum(filename1)==0) || (sum(pathname1)==0) -% return; -% end -% struct_array = load([pathname1 filename1]); - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -h0 = findobj('Tag','filename1_text'); -filename1 = get(h0,'String'); -disp('---------------------------------------------------'); -disp('Viewing the energy lanscape (Kolmogorov–Smirnov test).'); -disp(['Load the mixture result from: ',[filename1],'...']); - - if isfield(c,'c') %Matlab versio - c = c.c; - if ~isfield(c,'mixtureType') - disp('*** ERROR: Incorrect file format'); - return - end - %else - % disp('*** ERROR: Incorrect file format'); - % return; - end -if ~isfield(c,'changesInLogml') - disp('*** WARNING: Old mixture format detected.'); - if 0 % Disabled 12/06/07, Jukka Siren - switch c.mixtureType - case 'linkage_mix' - %waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - struct_array = load([pathname filename]); - disp('The corresponding preprocessed data is needed.'); - disp(['Load the preprocessed data from: ',[pathname filename], '...']); - if isfield(struct_array,'c') - c_raw = struct_array.c; - if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||... - ~all(size(c_raw.data)==size(c.data)) - disp('Mixture result and preprocessed data do not match.'); - return; - end - - if ~isfield(c_raw,'linkage_model') - model_entry = input('Specify the linkage model(1-linear 2-codon):'); - switch model_entry, - case 1 - linkage_model = 'linear'; - case 2 - linkage_model = 'codon'; - end - else - linkage_model = c_raw.linkage_model; - end - else - disp('*** ERROR: Incorrect file format'); - return; - end - disp('Start calculating log likelihood. Please wait...'); - npops = c.npops; - partition = c.PARTITION; - cq_counts = c.CQ_COUNTS; - cq_sumcounts = c.CQ_SUMCOUNTS; - sp_counts = c.SP_COUNTS; - sp_sumcounts = c.SP_SUMCOUNTS; - - % Shrink the raw data - noalle_cq = size(c.adjprior_cq,1); - informative_cq = logical(sum(c.adjprior_cq)~=noalle_cq); - noalle_sp = size(c.adjprior_sp,1); - informative_sp = logical(sum(c.adjprior_sp)~=noalle_sp); - cq_counts = uint16(cq_counts(:,informative_cq,:)); - cq_sumcounts = uint16(cq_sumcounts(:,informative_cq)); - sp_counts = uint16(sp_counts(:,informative_sp,:)); - sp_sumcounts = uint16(sp_sumcounts(:,informative_sp)); - - data = c_raw.data; - ninds = size(data,1); - component_mat = c_raw.component_mat; - clear c_raw; - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - - if length(noalle_clique)~=length(find(informative_cq)) - disp('*** ERROR: The linkage model is not consistent with the data.'); - return - end - - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - - pop_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - [1:npops], adjprior_cq, adjprior_sp); - - changesInLogml = zeros(npops,ninds); - for ind = 1:ninds - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml(:,ind) = computeChanges(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - partition, pop_logml,... - ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - end - case 'mix' % Independence model. - %waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - struct_array = load([pathname filename]); - disp('The corresponding preprocessed data is needed.'); - disp(['Load the preprocessed data from: ',[pathname filename], '...']); - if isfield(struct_array,'c') - c_raw = struct_array.c; - if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||... - ~all(size(c_raw.data(:,[1:end-1]))==size(c.data)) - disp('Mixture result and preprocessed data do not match.'); - return; - end - else - disp('*** ERROR: Incorrect file format'); - return; - end - - - npops = c.npops; - pops = 1:npops; - rowsFromInd = c.rowsFromInd; - data = c.data; - ninds = size(data,1)/rowsFromInd; - adjprior = c.adjprior; - priorTerm = c_raw.priorTerm; - COUNTS = c.COUNTS; - SUMCOUNTS = c.SUMCOUNTS; - PARTITION = c.PARTITION; - POP_LOGML = computePopulationLogml2(pops, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS); - changesInLogml = zeros(npops,ninds); - for ind = 1:ninds - [muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - adjprior, priorTerm, COUNTS, SUMCOUNTS, PARTITION, POP_LOGML); - changesInLogml(:,ind) = muutokset; - end - otherwise - disp('This model is under construction.'); - return - end - - - c.changesInLogml = changesInLogml; - - fprintf(1,'Saving the result...') - %waitALittle; - save_preproc = questdlg('Do you wish to save the updated mixture result?',... - 'Save mixture results?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save mixture result as'); - if isempty(filename) && isempty(pathname) - return; - else - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - filename1 = filename; - end - - end; - fprintf(1,'Finished.\n'); - end - return -end - -if isequal(c.mixtureType,'spatialPop') || isequal(c.mixtureType,'popMix') - % Group level clustering, using the correct partition - view_density(c.changesInLogml, c.groupPartition, filename1); -else - view_density(c.changesInLogml, c.PARTITION, filename1); -end - - -% ------------------------------------------------------------------------- -function view_density(changesInLogml, partition, filename) -npops = size(changesInLogml,1); -groupnames = cell(1,npops+1); -for i=1:npops - groupnames{i} = sprintf('Cluster %d',i); -end - -disp('Genetic affinity matrix (row=source col=target): test statistic(p value) '); -ekarivi = 'Cluster '; -for i = 1:npops - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -end -disp(ekarivi); - - -for i=1:npops - indsi = logical(partition==i); % choose cluster i - nindsi = sum(indsi); - rivi = [blanks(4-floor(log10(i))) num2str(i) ':']; - - h = zeros(npops,1); % indicator 0 same 1 different - h1 = h; - p = zeros(npops,1); % p values - k = zeros(npops,1); % test statistic - k1 = k; - p2 = zeros(npops,1); - for j=1:npops - indsj = logical(partition==j); - [h(j), p(j), k(j)] = kstest2(zscore(changesInLogml(j,indsi)), zscore(changesInLogml(i,indsj))); - [h1(j), p1(j), k1(j)] = kstest2(zscore(changesInLogml(j,indsi)), zscore(changesInLogml(i,indsj)),0.05,'larger'); - [h2(j), p2(j), k2(j)] = kstest2(zscore(changesInLogml(j,indsi)), zscore(changesInLogml(i,indsj)),0.05,'smaller'); - - - if k(j)==k2(j) - k(j) = -k(j); - - end - % [h2(j),p2(j),k2(j)] = kstest2(changesInLogml(j,indsi), changesInLogml(i,indsj),0.05,'smaller'); - % nonzero(i,j) = k1(j)+k2(j); - % nonzero(i,j) = - % (k(j)*(h1(j)-0.5))*(mean(changesInLogml(j,indsi)-mean(changesInLogml(i,indsj)))); - nonzero(i,j) = k(j); - rivi = [rivi ' ' num2str(nonzero(i,j),'%5.4f') '(' num2str(p(j),'%5.4f') ')']; - end - disp(rivi); - - % sibling = setdiff(find(h==0),i); - - % parent = find(p==min(p)); -% fprintf(1,'Cluster %d ',i); -% fprintf(1,'%s(%f) ',mat2str(parent),min(p)); -% -% offspring = find(p2==min(p2)); -% fprintf(1,'%s(%f)\n',mat2str(offspring), min(p2)); -end - -% groupnames{npops+1} = sprintf('All clusters'); -% %waitALittle; -% [s1,v1] = listdlg('PromptString','Select one source cluster:',... -% 'SelectionMode','single',... -% 'Name','Select source cluster',... -% 'ListString',groupnames); -% if isempty(s1) || ~v1 -% disp('*** WARNING: Viewing loglikelihood cancelled.'); -% return -% elseif s1==npops+1 -% view_all_density(changesInLogml, partition, filename); -% else -% remain_pop = logical((1:npops)~=s1); -% %waitALittle; -% [s2,v2] = listdlg('PromptString', 'Select target clusters:',... -% 'SelectionMode','multiple',... -% 'Name','Select target cluster',... -% 'ListString',groupnames(remain_pop)); -% if isempty(s2) || ~v2 -% disp('*** WARNING: Viewing loglikelihood cancelled.'); -% return -% end -% inds = logical(partition==s1); -% ninds = sum(inds); % individuals in the source cluster -% remain = find(remain_pop); -% fprintf('Source cluster: %d\n', s1); -% fprintf('Number of strains: %d\n',ninds); -% fprintf('Target cluster(s): %s\n', num2str(remain(s2))); -% -% f = zeros(length(s2),100); -% xi = zeros(length(s2),100); -% for i = 1:length(s2) -% [f(i,:),xi(i,:)] = ksdensity_myown(changesInLogml(remain(s2(i)),inds)'); -% end -% -% map = giveColors(npops); -% h0 = figure('NumberTitle', 'off'); % density plot; -% set(h0,'Tag','density_plot'); -% set(h0,'Name',['Density of log likelihood changes - ' filename]); -% set(gca,'ColorOrder',map(remain(s2),:)); -% hold on; -% plot(xi',f'); -% set(gca,'xlim',[min(min(xi)),max(max(xi))]); -% legend(groupnames(remain(s2))); -% xlabel('Change of log likelihood'); -% ylabel('Estimated density'); -% title(sprintf('Cluster %d',s1)); -% -% % h1 = figure('NumberTitle', 'off'); % histogram plot; -% % set(h1,'Tag','histogram_plot'); -% % set(h1,'Name',['Histogram of log likelihood changes - ' filename]); -% % hist(changesInLogml(remain(s2),inds)'); -% % colormap(map(remain(s2),:)); -% % % histfit(changesInLogml(remain(s2),inds)'); % need statistical toolbox -% % legend(groupnames(remain(s2))); -% % xlabel('Change of log likelihood'); -% % ylabel('Frequency'); -% % title(sprintf('Cluster %d',s1)); -% % rose(changesInLogml(remain(s2),inds)); -% end - - -%--------------------------------------% -%%% functions for linkage model %%% -%--------------------------------------- - -%-------------------------------------------------------------------------- - -function changes = computeChanges(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - partition, pop_logml,... - ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -npops = size(cq_counts,3); -changes = zeros(npops,1); - -i1 = partition(ind); -i1_logml = pop_logml(i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -cq_counts(:,:,i1) = cq_counts(:,:,i1)-indCqCounts; -cq_sumcounts(i1,:) = cq_sumcounts(i1,:)-sumCq; -sp_counts(:,:,i1) = sp_counts(:,:,i1)-indSpCounts; -sp_sumcounts(i1,:) = sp_sumcounts(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - i1, adjprior_cq, adjprior_sp); - -cq_counts(:,:,i1) = cq_counts(:,:,i1)+indCqCounts; -cq_sumcounts(i1,:) = cq_sumcounts(i1,:)+sumCq; -sp_counts(:,:,i1) = sp_counts(:,:,i1)+indSpCounts; -sp_sumcounts(i1,:) = sp_sumcounts(i1,:)+sumSp; - - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = pop_logml(i2); - -cq_counts(:,:,i2) = cq_counts(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -cq_sumcounts(i2,:) = cq_sumcounts(i2,:)+repmat(sumCq,[npops-1 1]); -sp_counts(:,:,i2) = sp_counts(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -sp_sumcounts(i2,:) = sp_sumcounts(i2,:) + repmat(sumSp,[npops-1 1]); - -new_i2_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - i2, adjprior_cq, adjprior_sp); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - - -nall_cq = size(cq_counts,1); -nall_sp = size(sp_counts, 1); -ncliq = size(cq_counts,2); -nsep = size(sp_counts, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 length(pops)]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 length(pops)]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - - -%--------------------------------------% -%%% functions for independence model %%% -%--------------------------------------- -%-------------------------------------------------------------------------- -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS, PARTITION, POP_LOGML) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml2(i1, adjprior, priorTerm,COUNTS, SUMCOUNTS); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml2(i2, adjprior, priorTerm,COUNTS, SUMCOUNTS); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - -%-------------------------------------------------------------------------- -function popLogml = computePopulationLogml2(pops, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -%-------------------------------------------------------------------------- -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - diff --git a/matlab/graph/view_geneflow.m b/matlab/graph/view_geneflow.m deleted file mode 100644 index 21e8631..0000000 --- a/matlab/graph/view_geneflow.m +++ /dev/null @@ -1,183 +0,0 @@ -function view_geneflow - -%waitALittle; -% [filename, pathname] = uigetfile('*.mat', 'Load admixture results.'); -% if (sum(filename)==0) || (sum(pathname)==0) -% return; -% end - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -h0 = findobj('Tag','filename1_text'); -filename = get(h0,'String'); - -% struct_array = load([pathname filename]); -disp('---------------------------------------------------'); -disp('Viewing the gene flow.'); -disp(['Load the admixture result from: ',[filename],'...']); -% if isfield(struct_array,'c') %Matlab versio -% c = struct_array.c; -% if ~isfield(c,'proportionsIt') -% disp('*** ERROR: Incorrect file format'); -% return -% end -% elseif isfield(struct_array,'proportionsIt') %Mideva versio -% c = struct_array; -% if ~isfield(c,'proportionsIt') -% disp('*** ERROR: Incorrect file format'); -% return -% end -% else -% disp('*** ERROR: Incorrect file format'); -% return; -% end - -proportionsIt = c.proportionsIt; -popnames = c.popnames; partition = c.PARTITION; - -% if isempty(popnames) || size(popnames,1)==size(partition,1) % bacteria/spatial - if isempty(popnames) - ninds = size(partition,1); - popnames=cell(ninds,2); - for ind=1:ninds - popnames{ind,1}=cellstr(num2str(ind)); - end - popnames(:,2)=num2cell((1:ninds)'); - end - - npops = c.npops; - if isfield(c,'admixnpops') - admixnpops = c.admixnpops; - else % if the admixture result is based on pre-defined partitions. - admixnpops = npops; - end - - if ~isfield(c,'pvalue') % compatiable with old data - disp('*** WARNING: Old admixture format detected.'); - disp('*** WARNING: pvalue is not found in the admixture result.'); - disp('*** WARNING: all the admixture will be significant.'); - pvalue = ones(size(partition,1),1); - else - pvalue = c.pvalue; - end - - removed_inds = logical(~any(proportionsIt,2)); - outliers = unique(partition(removed_inds)); - total_clusters = [1:npops]; - if ~isempty(outliers) - fprintf('Outlier clusters: '); - for i = 1:length(outliers) - fprintf(['%s' blanks(2)], num2str(outliers(i))); - end - fprintf('\n'); - inliers = setdiff(total_clusters, outliers); - else - inliers = total_clusters; - end - - if length(inliers)~=admixnpops - disp('*** ERROR: in view linkage admixture.'); - return - end - - groupnames = cell(1,admixnpops); - for i=1:admixnpops - groupnames{i} = sprintf('Cluster %d',inliers(i)); - end - - %waitALittle; - answer = inputdlg( ['Input the p value for the strain admixture. Strains with '... - 'nonsignificant admixture will be displayed as a single color bar'],... - 'Input the upper bound p value',1,{'0.05'}); - if isempty(answer) % cancel has been pressed - return - else - maxp = str2num(answer{1}); - fprintf('P value: %4.2f\n', maxp); - - % nonsignificant strains are forced to be single color bar. - nonsignificant_ix = find( (pvalue>maxp & pvalue <=1) ); - if ~isempty(nonsignificant_ix) - for i = nonsignificant_ix' - incluster = logical(proportionsIt(i,:)==max(proportionsIt(i,:))); - proportionsIt(i,:) = zeros(1, admixnpops); - proportionsIt(i,incluster) = 1; - end - end - - ix = find(ismember(partition(:),inliers)); - proportionsIt = proportionsIt(ix,:); - partition = partition(ix); - end - - % Calculate the gene flow weight. - % W[j,i]: the gene flow from cluster j to i. - W = ones(admixnpops,admixnpops); - for i=1:admixnpops - inds = logical(partition==inliers(i)); - ninds = sum(inds); - for j = 1:admixnpops - W(j,i) = sum(proportionsIt(inds,j))/ninds; - end - end - disp('Gene flow matrix (row=source col=target): '); - ekarivi = 'Cluster '; - for i = 1:admixnpops - ekarivi = [ekarivi ownNum2Str(inliers(i)) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - for i = 1:admixnpops - rivi = [blanks(4-floor(log10(i))) num2str(inliers(i)) ':']; - for j = 1:admixnpops - rivi = [rivi ' ' num2str(omaRound(W(i,j)),'%5.4f')]; - end - disp(rivi); - end - - disp('Generating the gene flow graph. Please wait...'); - % Show graph -% %waitALittle; -% talle = questdlg(['Do you want to view the gene flow graph?' ... -% ], 'Visualization?', 'Yes', 'No', 'Yes'); -% if isequal(talle,'No') -% return -% else - h0 = findobj('Tag','geneflow_menu'); - graphviz_path = get(h0,'Userdata'); - if isempty(graphviz_path) - %waitALittle; - graphviz_path = uigetdir('','Specify the location of dot.exe in the GraphViz package: '); - if graphviz_path == 0 - return; - else - h0 = findobj('Tag','geneflow_menu'); - set(h0,'Userdata',graphviz_path); - end - end - % store the current directory. - d = cd; - try - graphvis2(W,filename,inliers,graphviz_path,npops); - disp('Finished.'); - catch - disp('*** ERROR: dot.exe was not found.'); - h_errdlg = errordlg('dot.exe was not found in the specified path.'); - set(h_errdlg,'WindowStyle','Modal') - set(h0,'Userdata',[]); - end - cd(d); -% end -% else -% disp('This module is under developpment.'); -% end - -% ------------------------------------------------------------------------- -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10000; -num = round(num); -num2 = num/10000; - - - - diff --git a/matlab/graph/view_loglikelihood.m b/matlab/graph/view_loglikelihood.m deleted file mode 100644 index 9c0c1dd..0000000 --- a/matlab/graph/view_loglikelihood.m +++ /dev/null @@ -1,476 +0,0 @@ -function view_loglikelihood -%waitALittle; -% [filename1, pathname1] = uigetfile('*.mat', 'Load mixture results.'); -% if (sum(filename1)==0) || (sum(pathname1)==0) -% return; -% end -% struct_array = load([pathname1 filename1]); - -h0 = findobj('Tag','load_menu'); -c = get(h0,'UserData'); -h0 = findobj('Tag','filename1_text'); -filename1 = get(h0,'String'); -disp('---------------------------------------------------'); -disp('Viewing the change of log likelihood.'); -disp(['Load the mixture result from: ',[filename1],'...']); - - if isfield(c,'c') %Matlab versio - c = c.c; - if ~isfield(c,'mixtureType') - disp('*** ERROR: Incorrect file format'); - return - end - %else - % disp('*** ERROR: Incorrect file format'); - % return; - end -if ~isfield(c,'changesInLogml') - disp('*** WARNING: Old mixture format detected.'); - if 0 % Disabled 12/06/07, Jukka Siren - switch c.mixtureType - case 'linkage_mix' - %waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - struct_array = load([pathname filename]); - disp('The corresponding preprocessed data is needed.'); - disp(['Load the preprocessed data from: ',[pathname filename], '...']); - if isfield(struct_array,'c') - c_raw = struct_array.c; - if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||... - ~all(size(c_raw.data)==size(c.data)) - disp('Mixture result and preprocessed data do not match.'); - return; - end - - if ~isfield(c_raw,'linkage_model') - model_entry = input('Specify the linkage model(1-linear 2-codon):'); - switch model_entry, - case 1 - linkage_model = 'linear'; - case 2 - linkage_model = 'codon'; - end - else - linkage_model = c_raw.linkage_model; - end - else - disp('*** ERROR: Incorrect file format'); - return; - end - disp('Start calculating log likelihood. Please wait...'); - npops = c.npops; - partition = c.PARTITION; - cq_counts = c.CQ_COUNTS; - cq_sumcounts = c.CQ_SUMCOUNTS; - sp_counts = c.SP_COUNTS; - sp_sumcounts = c.SP_SUMCOUNTS; - - % Shrink the raw data - noalle_cq = size(c.adjprior_cq,1); - informative_cq = logical(sum(c.adjprior_cq)~=noalle_cq); - noalle_sp = size(c.adjprior_sp,1); - informative_sp = logical(sum(c.adjprior_sp)~=noalle_sp); - cq_counts = uint16(cq_counts(:,informative_cq,:)); - cq_sumcounts = uint16(cq_sumcounts(:,informative_cq)); - sp_counts = uint16(sp_counts(:,informative_sp,:)); - sp_sumcounts = uint16(sp_sumcounts(:,informative_sp)); - - data = c_raw.data; - ninds = size(data,1); - component_mat = c_raw.component_mat; - clear c_raw; - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - - if length(noalle_clique)~=length(find(informative_cq)) - disp('*** ERROR: The linkage model is not consistent with the data.'); - return - end - - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - - pop_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - [1:npops], adjprior_cq, adjprior_sp); - - changesInLogml = zeros(npops,ninds); - for ind = 1:ninds - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml(:,ind) = computeChanges(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - partition, pop_logml,... - ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - end - case 'mix' % Independence model. - %waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - struct_array = load([pathname filename]); - disp('The corresponding preprocessed data is needed.'); - disp(['Load the preprocessed data from: ',[pathname filename], '...']); - if isfield(struct_array,'c') - c_raw = struct_array.c; - if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||... - ~all(size(c_raw.data(:,[1:end-1]))==size(c.data)) - disp('Mixture result and preprocessed data do not match.'); - return; - end - else - disp('*** ERROR: Incorrect file format'); - return; - end - - - npops = c.npops; - pops = 1:npops; - rowsFromInd = c.rowsFromInd; - data = c.data; - ninds = size(data,1)/rowsFromInd; - adjprior = c.adjprior; - priorTerm = c_raw.priorTerm; - COUNTS = c.COUNTS; - SUMCOUNTS = c.SUMCOUNTS; - PARTITION = c.PARTITION; - POP_LOGML = computePopulationLogml2(pops, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS); - changesInLogml = zeros(npops,ninds); - for ind = 1:ninds - [muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - adjprior, priorTerm, COUNTS, SUMCOUNTS, PARTITION, POP_LOGML); - changesInLogml(:,ind) = muutokset; - end - otherwise - disp('This model is under construction.'); - return - end - - - c.changesInLogml = changesInLogml; - - fprintf(1,'Saving the result...') - %waitALittle; - save_preproc = questdlg('Do you wish to save the updated mixture result?',... - 'Save mixture results?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save mixture result as'); - if isempty(filename) && isempty(pathname) - return; - else - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - filename1 = filename; - end - - end; - fprintf(1,'Finished.\n'); - end - return -end - -if isequal(c.mixtureType,'spatialPop') || isequal(c.mixtureType,'popMix') - % Group level clustering, using the correct partition - view_density(c.changesInLogml, c.groupPartition, filename1); -else - view_density(c.changesInLogml, c.PARTITION, filename1); -end - -% ------------------------------------------------------------------------- -function view_all_density(changesInLogml, partition, filename) -npops = size(changesInLogml,1); -groupnames = cell(1,npops); -for i=1:npops - groupnames{i} = sprintf('Cluster %d',i); -end -nrows = npops*(npops-1); -f = zeros(nrows,100); -xi = zeros(nrows,100); -map = giveColors(npops); -h = zeros(npops,1); - -k = 1; -for i = 1:npops - inds = logical(partition==i); - % ninds = sum(inds); % individuals in the source cluster - target_pop = find(logical((1:npops)~=i)); - for j = target_pop - [f(k,:),xi(k,:)] = ksdensity_myown(changesInLogml(j,inds)'); - k = k+1; - end - h(i) = figure('NumberTitle', 'off', 'menubar','none','toolbar','figure'); % density plot; - set(h(i),'Tag','density_plot'); - set(h(i),'Name',sprintf('%d Density of log likelihood changes - %s',i, filename)); - set(gca,'ColorOrder',map(target_pop,:)); - hold on; - plot(xi([(i-1)*(npops-1)+1:i*(npops-1)],:)',f([(i-1)*(npops-1)+1:i*(npops-1)],:)'); - - legend(groupnames(target_pop)); - xlabel('Change of log likelihood'); - ylabel('Estimated density'); - title(sprintf('Cluster %d',i)); -end - -x_min = min(min(xi)); -x_max = max(max(xi)); -for i = 1:npops - set(get(h(i),'CurrentAxes'),'xlim',[x_min, x_max]); -end - -% ------------------------------------------------------------------------- -function view_density(changesInLogml, partition, filename) -npops = size(changesInLogml,1); -groupnames = cell(1,npops+1); -for i=1:npops - groupnames{i} = sprintf('Population %d',i); -end -groupnames{npops+1} = sprintf('All clusters'); -%waitALittle; -[s1,v1] = listdlg('PromptString','Select one source cluster:',... - 'SelectionMode','single',... - 'Name','Select source cluster',... - 'ListString',groupnames); -if isempty(s1) || ~v1 - disp('*** WARNING: Viewing loglikelihood cancelled.'); - return -elseif s1==npops+1 - view_all_density(changesInLogml, partition, filename); -else - remain_pop = logical((1:npops)~=s1); - %waitALittle; - [s2,v2] = listdlg('PromptString', 'Select target clusters:',... - 'SelectionMode','multiple',... - 'Name','Select target cluster',... - 'ListString',groupnames(remain_pop)); - if isempty(s2) || ~v2 - disp('*** WARNING: Viewing loglikelihood cancelled.'); - return - end - inds = logical(partition==s1); - ninds = sum(inds); % individuals in the source cluster - remain = find(remain_pop); - fprintf('Source cluster: %d\n', s1); - fprintf('Number of strains: %d\n',ninds); - fprintf('Target cluster(s): %s\n', num2str(remain(s2))); - - f = zeros(length(s2),100); - xi = zeros(length(s2),100); - for i = 1:length(s2) - [f(i,:),xi(i,:)] = ksdensity_myown(-changesInLogml(remain(s2(i)),inds)'); - end - - map = giveColors(npops); - h0 = figure('NumberTitle', 'off'); % density plot; - set(h0,'Tag','density_plot'); - set(h0,'Name',['Density of log likelihood changes - ' filename]); - set(gca,'ColorOrder',map(remain(s2),:)); - hold on; - plot(xi',f'); - set(gca,'xlim',[min(min(xi)),max(max(xi))]); - legend(groupnames(remain(s2))); - xlabel('Change of log likelihood'); - ylabel('Estimated density'); - title(sprintf('Population %d',s1)); - - % h1 = figure('NumberTitle', 'off'); % histogram plot; - % set(h1,'Tag','histogram_plot'); - % set(h1,'Name',['Histogram of log likelihood changes - ' filename]); - % hist(changesInLogml(remain(s2),inds)'); - % colormap(map(remain(s2),:)); - % % histfit(changesInLogml(remain(s2),inds)'); % need statistical toolbox - % legend(groupnames(remain(s2))); - % xlabel('Change of log likelihood'); - % ylabel('Frequency'); - % title(sprintf('Cluster %d',s1)); - % rose(changesInLogml(remain(s2),inds)); -end - - -%--------------------------------------% -%%% functions for linkage model %%% -%--------------------------------------- - -%-------------------------------------------------------------------------- - -function changes = computeChanges(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - partition, pop_logml,... - ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -npops = size(cq_counts,3); -changes = zeros(npops,1); - -i1 = partition(ind); -i1_logml = pop_logml(i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -cq_counts(:,:,i1) = cq_counts(:,:,i1)-indCqCounts; -cq_sumcounts(i1,:) = cq_sumcounts(i1,:)-sumCq; -sp_counts(:,:,i1) = sp_counts(:,:,i1)-indSpCounts; -sp_sumcounts(i1,:) = sp_sumcounts(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - i1, adjprior_cq, adjprior_sp); - -cq_counts(:,:,i1) = cq_counts(:,:,i1)+indCqCounts; -cq_sumcounts(i1,:) = cq_sumcounts(i1,:)+sumCq; -sp_counts(:,:,i1) = sp_counts(:,:,i1)+indSpCounts; -sp_sumcounts(i1,:) = sp_sumcounts(i1,:)+sumSp; - - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = pop_logml(i2); - -cq_counts(:,:,i2) = cq_counts(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -cq_sumcounts(i2,:) = cq_sumcounts(i2,:)+repmat(sumCq,[npops-1 1]); -sp_counts(:,:,i2) = sp_counts(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -sp_sumcounts(i2,:) = sp_sumcounts(i2,:) + repmat(sumSp,[npops-1 1]); - -new_i2_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),... - double(sp_counts), double(sp_sumcounts),... - i2, adjprior_cq, adjprior_sp); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(cq_counts, cq_sumcounts,... - sp_counts, sp_sumcounts,... - pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - - -nall_cq = size(cq_counts,1); -nall_sp = size(sp_counts, 1); -ncliq = size(cq_counts,2); -nsep = size(sp_counts, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 length(pops)]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 length(pops)]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - - -%--------------------------------------% -%%% functions for independence model %%% -%--------------------------------------- -%-------------------------------------------------------------------------- -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS, PARTITION, POP_LOGML) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml2(i1, adjprior, priorTerm,COUNTS, SUMCOUNTS); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml2(i2, adjprior, priorTerm,COUNTS, SUMCOUNTS); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - -%-------------------------------------------------------------------------- -function popLogml = computePopulationLogml2(pops, adjprior, priorTerm, ... - COUNTS, SUMCOUNTS) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -%-------------------------------------------------------------------------- -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - diff --git a/matlab/graph/vorPlot.m b/matlab/graph/vorPlot.m deleted file mode 100644 index 58c88f5..0000000 --- a/matlab/graph/vorPlot.m +++ /dev/null @@ -1,88 +0,0 @@ -function vorPlot(V,C,partition, pointers, coordinates, tekstit) - -if nargin < 6 - tekstit = pointers; -end - -notEmptyPops = length(unique(partition)); -if notEmptyPops>30 - disp(['Number of populations: ' num2str(notEmptyPops)]); - disp(' '); - disp('Figure can be drawn only if the number of populations'); - disp('is less or equal to 30.'); - disp(' '); - return; -end - -npops = length(unique(partition)); - -if npops > 30 - return -end - -colors=giveColors(npops); - -h1 = figure('NumberTitle', 'off', 'Name', 'Colored Voronoi tessellation'); -hold on - - -[I, J] = find(coordinates>0 | coordinates<0); -I=unique(I); -xmin = min(coordinates(I,1)); -xmax = max(coordinates(I,1)); -xdiff = (xmax-xmin); -xmean = xmin + xdiff/2; - -ymin = min(coordinates(I,2)); -ymax = max(coordinates(I,2)); -ydiff = (ymax-ymin); -ymean = ymin + ydiff/2; - -pituus = max(ydiff,xdiff)*1.1/2; - -axis([xmean-pituus xmean+pituus ymean-pituus ymean+pituus]); - - -for i=1:length(C) - X=V(C{i},:); - %k=convhull(X(:,1),X(:,2),{'QJ', 'Pp'}); - k=convhull(X(:,1),X(:,2)); - taulu = pointers{i}; - if length(taulu)>0 - color=colors(partition(taulu(1)),:); - patch(X(k,1),X(k,2),color); - plot(coordinates(taulu(1),1),coordinates(taulu(1),2),'Color',[1 1 1], 'MarkerSize', 50); - %text(coordinates(taulu(1),1),coordinates(taulu(1),2),num2str(taulu), 'FontSize', 8); - end -end - -if ~isequal(tekstit, -1) - for i=1:length(pointers) - taulu = pointers{i}; - teksti = tekstit{i}; - if isnumeric(teksti) - teksti = num2str(teksti); - end - if length(taulu)>0 - text(coordinates(taulu(1),1),coordinates(taulu(1),2),teksti, ... - 'Interpreter', 'none', 'FontSize', 8); - end - end -end -%[I,J] = find(coordinates(:,1) > 0); - -%plot(coordinates(I,1), coordinates(I,2), 'k.'); - -%hold off -%{ -%h0 = image_figure; -%hold on; -for i=1:length(partition) - if coordinates(i,1)>=0 - %plot(coordinates(i,1),coordinates(i,2),'k.'); - plot(coordinates(i,1),coordinates(i,2),'Color',[1 1 1], 'MarkerSize', 40); - text(coordinates(i,1),coordinates(i,2),num2str(i), 'FontSize', 8); - end -end -%} -hold off; \ No newline at end of file diff --git a/matlab/graph/waitALittle.m b/matlab/graph/waitALittle.m deleted file mode 100644 index 2d2571b..0000000 --- a/matlab/graph/waitALittle.m +++ /dev/null @@ -1,3 +0,0 @@ -function waitALittle -A = rand(500); -gammaln(A); \ No newline at end of file diff --git a/matlab/graph/winontop.m b/matlab/graph/winontop.m deleted file mode 100644 index 5c19f37..0000000 --- a/matlab/graph/winontop.m +++ /dev/null @@ -1,8 +0,0 @@ -%WINONTOP -% WINONTOP(FH) set "topmost" property of the figure specified by handle -% FH to state "on". -% -% WINONTOP(FH,1) same as WINONTOP(FH) -% -% WINONTOP(FH,0) set "topmost" property of the figure specified by handle -% FH to state "off". \ No newline at end of file diff --git a/matlab/independent/greedyMix.m b/matlab/independent/greedyMix.m deleted file mode 100644 index 2bc5cdf..0000000 --- a/matlab/independent/greedyMix.m +++ /dev/null @@ -1,1788 +0,0 @@ -function greedyMix(tietue) - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% check whether partition compare mode is selected -h1 = findobj('Tag','partitioncompare_menu'); -partitionCompare = get(h1, 'userdata'); - - -if isequal(tietue,-1) - - input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'BAPS-format', 'GenePop-format', 'Preprocessed data', ... - 'BAPS-format'); - - switch input_type - - case 'BAPS-format' - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in BAPS-format'); - if filename==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - - data = load([pathname filename]); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('Incorrect Data-file.'); - return; - end - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - input_pops = questdlg(['When using data which are in BAPS-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); - if isequal(input_pops,'Yes') - waitALittle; - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - waitALittle; - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - popnames = []; - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - popnames = []; - end - else - popnames = []; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - - case 'GenePop-format' - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in GenePop-format'); - if filename==0 - return; - end - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - kunnossa = testaaGenePopData([pathname filename]); - if kunnossa==0 - return - end - [data,popnames]=lueGenePopData([pathname filename]); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - - case 'Preprocessed data' - waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - - struct_array = load([pathname filename]); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('Incorrect file format'); - return; - end - data = double(c.data); rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; - clear c; - otherwise - return - end - -else - data = double(tietue.data); rowsFromInd = tietue.rowsFromInd; alleleCodes = tietue.alleleCodes; - noalle = tietue.noalle; adjprior = tietue.adjprior; priorTerm = tietue.priorTerm; - dist = tietue.dist; popnames = tietue.popnames; Z = tietue.Z; - clear tietue; -end - - -global PARTITION; global COUNTS; -global SUMCOUNTS; global POP_LOGML; -clearGlobalVars; - -c.data=data; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; - -ninds = length(unique(data(:,end))); -ekat = (1:rowsFromInd:ninds*rowsFromInd)'; -c.rows = [ekat ekat+rowsFromInd-1]; - -% partition compare -if ~isempty(partitionCompare) - nsamplingunits = size(c.rows,1); - partitions = partitionCompare.partitions; - npartitions = size(partitions,2); - partitionLogml = zeros(1,npartitions); - for i = 1:npartitions - % number of unique partition lables - npops = length(unique(partitions(:,i))); - - partitionInd = zeros(ninds*rowsFromInd,1); - partitionSample = partitions(:,i); - for j = 1:nsamplingunits - partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j); - end - partitionLogml(i) = ... - initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior); - - end - % return the logml result - partitionCompare.logmls = partitionLogml; - set(h1, 'userdata', partitionCompare); - return -end - -if fixedK - [logml, npops, partitionSummary]=indMix_fixK(c); -else - [logml, npops, partitionSummary]=indMix(c); -end - -if logml==1 - return -end - -data = data(:,1:end-1); - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); -changesInLogml = writeMixtureInfo(logml, rowsFromInd, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, fixedK); - -viewMixPartition(PARTITION, popnames); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - % ------------------------------------------- - % Added by Jing, 26.12.2005 - if (sum(filename)==0) || (sum(pathname)==0) - % Cancel was pressed - return; - else - % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end; - % ------------------------------------------- - - c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; - c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; - c.noalle = noalle; c.mixtureType = 'mix'; - c.logml = logml; c.changesInLogml = changesInLogml; -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 -else - if exist('baps4_output.baps','file') - delete('baps4_output.baps') - end -end - - - - - -%------------------------------------------------------------------------------------- -%------------------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... - adjprior, priorTerm, i2); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, rowsFromInd, data, adjprior, priorTerm); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = computeRows(rowsFromInd, inds, ninds); - -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - -%------------------------------------------------------------------------------------ - - -function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = computeRows(rowsFromInd, inds, ninds); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - PARTITION(ind) = pop2; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - PARTITION(ind) = pop1; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%-------------------------------------------------------------------------- - - - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]=getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - - -%----------------------------------------------------------------------------------- - - -%-------------------------------------------------------------------------- - -function logml = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -koot = rows(:,1) - rows(:,2) + 1; -maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%initializeGammaln(ninds, maxSize, max(noalle)); - -logml = laskeLoggis(counts, sumcounts, adjprior); - - -%----------------------------------------------------------------------- - - -function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -nloci = size(counts,2); -npops = size(counts,3); -adjnoalle = zeros(max(noalle),nloci); -for j=1:nloci - adjnoalle(1:noalle(j),j)=noalle(j); - if (noalle(j)0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Model: independent']); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end; -end - -if npops > 1 - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if indvidual i is moved to group j:']); fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; - else - ekarivi = ' ind '; - end - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - end - - %ninds = size(data,1)/rowsFromInd; - changesInLogml = LOGDIFF'; - for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - % adjprior, priorTerm); - %changesInLogml(:,ind) = muutokset; - muutokset = changesInLogml(:,ind); - - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end - end - - disp(' '); disp(' '); - disp('KL-divergence matrix in PHYLIP format:'); - - dist_mat = zeros(npops, npops); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); - end - - maxnoalle = size(COUNTS,1); - nloci = size(COUNTS,2); - d = zeros(maxnoalle, nloci, npops); - prior = adjprior; - prior(find(prior==1))=0; - nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - prior(1,nollia)=1; - for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end - ekarivi = num2str(npops); - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); - end - - for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end - % disp(rivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - % end - end - - - dist_mat = dist_mat + dist_mat'; % make it symmetric - for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- - - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n 1)); -end - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - -%-------------------------------------------------------- - - -function data = addAlleles(data, ind, line, divider) -% Lisaa BAPS-formaatissa olevaan datataulukkoon -% yksilöä ind vastaavat rivit. Yksilön alleelit -% luetaan genepop-formaatissa olevasta rivist? -% line. Jos data on 3 digit formaatissa on divider=1000. -% Jos data on 2 digit formaatissa on divider=100. - -nloci = size(data,2)-1; -if size(data,1) < 2*ind - data = [data; zeros(100,nloci+1)]; -end - -k=1; -merkki=line(k); -while ~isequal(merkki,',') - k=k+1; - merkki=line(k); -end -line = line(k+1:end); -clear k; clear merkki; - -alleeliTaulu = sscanf(line,'%d'); - -if length(alleeliTaulu)~=nloci - disp('Incorrect data format.'); -end - -for j=1:nloci - ekaAlleeli = floor(alleeliTaulu(j)/divider); - if ekaAlleeli==0 ekaAlleeli=-999; end; - tokaAlleeli = rem(alleeliTaulu(j),divider); - if tokaAlleeli==0 tokaAlleeli=-999; end - - data(2*ind-1,j) = ekaAlleeli; - data(2*ind,j) = tokaAlleeli; -end - -data(2*ind-1,end) = ind; -data(2*ind,end) = ind; - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen väliss?täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntej? 1 jos odotetaan muita merkkej? -for i=1:pit - merkki = line(i); - if (isspace(merkki) & tila==0) - %Ei tehd?mitään. - elseif (isspace(merkki) & tila==1) - tila = 0; - elseif (~isspace(merkki) & tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) & tila==1) - %Ei tehd?mitään - end -end - - -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; - - - -function loggis = laskeLoggis(counts, sumcounts, adjprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior))) - ... - sum(sum(gammaln(1+sumcounts))); -loggis = logml2; \ No newline at end of file diff --git a/matlab/independent/greedyPopMix.m b/matlab/independent/greedyPopMix.m deleted file mode 100644 index 02d388a..0000000 --- a/matlab/independent/greedyPopMix.m +++ /dev/null @@ -1,252 +0,0 @@ -function greedyPopMix - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -clearGlobalVars; - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% check whether partition compare mode is selected -h1 = findobj('Tag','partitioncompare_menu'); -partitionCompare = get(h1, 'userdata'); - -% LASKENNAN ALKUARVOJEN MÄÄRITTÄMINEN - -input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'BAPS-format', 'GenePop-format', 'Preprocessed data', ... - 'BAPS-format'); - -if isempty(input_type) - return -end - -if isequal(input_type,'BAPS-format') %Raakadata - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in BAPS-format'); - if filename==0 - return; - end - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - data = load([pathname filename]); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('Incorrect Data-file.'); - return; - end - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(data); - rowsFromInd = 0; %Ei tiedet? - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - load_names = questdlg('Do you wish to specify the names of the groups?',... - 'Input group names?','Yes','No','Yes'); - if isequal(load_names,'Yes') - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load group names'); - popnames = initPopNames([pathname filename]); - if (size(popnames,1)~=ninds) - disp('Incorrect name-file.'); - popnames = []; - end - else - popnames = []; - end - -elseif isequal(input_type,'GenePop-format') - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load data in GenePop-format'); - if filename==0 - return; - end - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - kunnossa = testaaGenePopData([pathname filename]); - if kunnossa==0 - return - end - - [data, popnames]=lueGenePopDataPop([pathname filename]); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(data); - rowsFromInd = 2; %Tiedetään GenePop:in tapauksessa. - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; -end - -if ~isequal(input_type, 'Preprocessed data') - 2 - - npops = size(rows,1); - PARTITION = 1:npops'; %Jokainen "yksil? eli populaatio on oma ryhmäns? - [sumcounts, counts, logml] = ... - initialPopCounts(a_data, npops, rows, noalle, adjprior); - COUNTS = counts; SUMCOUNTS = sumcounts; - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - - clear('counts', 'sumcounts','pathname','filename','vast2',... - 'vast3','vast4'); - [Z,dist] = getPopDistancesByKL(adjprior); %Saadaan COUNTS:in avulla. - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.Z = Z; c.popnames = popnames; c.rowsFromInd = rowsFromInd; - c.npops = npops; c.logml = logml; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % Lu Cheng, 08.06.2012 - clear c; - end; -end - -if isequal(input_type, 'Preprocessed data') - waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - %load([pathname filename],'c'); - %if ~exist('c') %TESTAUS - % disp('Incorrect file format.'); - % return - %elseif ~isfield(c,'rows') - % disp('Incorrect file format.'); - % return - %end - struct_array = load([pathname filename]); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'rows') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'rows') %Mideva versio - c = struct_array; - else - disp('Incorrect file format'); - return; - end - data = double(c.data); rows = c.rows; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; Z = c.Z; popnames = c.popnames; rowsFromInd = c.rowsFromInd; - clear c; -end - -c.data=data; c.rows = rows; c.alleleCodes = alleleCodes; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; - -% partition compare -if ~isempty(partitionCompare) - nsamplingunits = size(rows,1); - partitions = partitionCompare.partitions; - npartitions = size(partitions,2); - partitionLogml = zeros(1,npartitions); - for i = 1:npartitions - % number of unique partition lables - npops = length(unique(partitions(:,i))); - try - partitionInd = zeros(rows(end),1); - partitionSample = partitions(:,i); - for j = 1: nsamplingunits - partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j); - end - partitionLogml(i) = ... - initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior); - catch - disp('*** ERROR: unmatched data.'); - return - end - end - % return the logml result - partitionCompare.logmls = partitionLogml; - set(h1, 'userdata', partitionCompare); - return -end - -if fixedK - [logml, npops, partitionSummary]=indMix_fixK(c); -else - [logml, npops, partitionSummary]=indMix(c); -end - -if logml==1 - return -end - -data = data(:,1:end-1); -viewPopMixPartition(PARTITION, rows, popnames); -%npops = poistaTyhjatPopulaatiot(npops); -%POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); -changesInLogml = writeMixtureInfoPop(logml, rows, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, fixedK); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - if rowsFromInd==0 - %Käytettiin BAPS-formaattia, eik?rowsFromInd ole tunnettu. - [popnames, rowsFromInd] = findOutRowsFromInd(popnames, rows); - end - - groupPartition = PARTITION; - - fiksaaPartitioYksiloTasolle(rows, rowsFromInd); - - c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior = adjprior; - c.rowsFromInd = rowsFromInd; c.popnames = popnames; - c.data = data; c.npops = npops; c.noalle = noalle; - c.mixtureType = 'popMix'; c.groupPartition = groupPartition; - c.rows = rows; c.logml = logml; c.changesInLogml = changesInLogml; -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 -else - if exist('baps4_output.baps','file') - delete('baps4_output.baps') - end -end diff --git a/matlab/independent/indMix.m b/matlab/independent/indMix.m deleted file mode 100644 index 98ea2be..0000000 --- a/matlab/independent/indMix.m +++ /dev/null @@ -1,1234 +0,0 @@ -function [logml, npops, partitionSummary]=indMix(c,npops,dispText) -% Greedy search algorithm with unknown number of classes for regular -% clustering. -% Input npops is not used if called by greedyMix or greedyPopMix. - -logml = 1; - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; -clearGlobalVars; - -noalle = c.noalle; rows = c.rows; data = c.data; -adjprior = c.adjprior; priorTerm = c.priorTerm; rowsFromInd = c.rowsFromInd; - -if isfield(c,'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - -if nargin < 2 - dispText = 1; - npopstext = []; - ready = false; - teksti = 'Input upper bound to the number of populations (possibly multiple values): '; - while ready == false - npopstextExtra = inputdlg(teksti ,... - 'Input maximum number of populations',1,{'20'}); - drawnow - if isempty(npopstextExtra) % Painettu Cancel:ia - return - end - npopstextExtra = npopstextExtra{1}; - if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; - else - npopstext = [npopstext ' ' npopstextExtra]; - ready = true; - end - end - clear ready; clear teksti; - if isempty(npopstext) | length(npopstext)==1 - return - else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösiä annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - logml = 1; partitionSummary=1; npops=1; - return - end - clear ykkoset; - end -else - npopsTaulu = npops; -end - -nruns = length(npopsTaulu); - -initData = data; -data = data(:,1:end-1); - -logmlBest = -1e50; -partitionSummary = -1e50*ones(30,2); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1) = zeros(30,1); -worstLogml = -1e50; worstIndex = 1; - -for run = 1:nruns - npops = npopsTaulu(run); - if dispText - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - end - ninds = size(rows,1); - - initialPartition = admixture_initialization(initData, npops, Z); - [sumcounts, counts, logml] = ... - initialCounts(initialPartition, data, npops, rows, noalle, adjprior); - PARTITION = zeros(ninds, 1); - for i=1:ninds - apu = rows(i); - PARTITION(i) = initialPartition(apu(1)); - end - - COUNTS = counts; SUMCOUNTS = sumcounts; - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - LOGDIFF = repmat(-Inf,ninds,npops); - clear initialPartition; clear counts; clear sumcounts; - - % PARHAAN MIXTURE-PARTITION ETSIMINEN - nRoundTypes = 7; - kokeiltu = zeros(nRoundTypes, 1); - roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan. - ready = 0; vaihe = 1; - - if dispText - disp(' '); - disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); - end - - while ready ~= 1 - muutoksia = 0; - - if dispText - disp(['Performing steps: ' num2str(roundTypes)]); - end - - for n = 1:length(roundTypes) - - round = roundTypes(n); - kivaluku=0; - - if kokeiltu(round) == 1 %Askelta kokeiltu viime muutoksen jälkeen - - elseif round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon. - inds = 1:ninds; - aputaulu = [inds' rand(ninds,1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - muutosNyt = 0; - - for ind = inds - i1 = PARTITION(ind); - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - - if round==1, [maxMuutos, i2] = max(muutokset); - end - - if (i1~=i2 & maxMuutos>1e-5) - % Tapahtui muutos - muutoksia = 1; - if muutosNyt == 0 - muutosNyt = 1; - if dispText - disp('Action 1'); - end - end - kokeiltu = zeros(nRoundTypes,1); - kivaluku = kivaluku+1; - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - logml = logml+maxMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - end - end - if muutosNyt == 0 - kokeiltu(round) = 1; - end - - elseif round==2 %Populaation yhdistäminen toiseen. - maxMuutos = 0; - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm); - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - if maxMuutos>1e-5 - muutoksia = 1; - kokeiltu = zeros(nRoundTypes,1); - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - adjprior, priorTerm); - logml = logml + maxMuutos; - if dispText - disp('Action 2'); - end - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - kokeiltu(round) = 1; - end - - - elseif round==3 || round==4 %Populaation jakaminen osiin. - maxMuutos = 0; - ninds = size(rows,1); - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = max(min(20, floor(ninds2/5)),2); - elseif round==4 - npops2 = 2; %Moneenko osaan jaetaan - end - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - kokeiltu = zeros(nRoundTypes,1); - %rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat)); - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit; lisa']; - %rivit = [rivit; rows(ind)']; - end - diffInCounts = computeDiffInCounts(rivit', size(COUNTS,1), ... - size(COUNTS,2), data); - i1 = PARTITION(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); - logml = logml + maxMuutos; - if dispText - if round==3 - disp('Action 3'); - else - disp('Action 4'); - end - end - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - - else - kokeiltu(round)=1; - end - elseif round == 5 || round == 6 - j=0; - muutettu = 0; - poplogml = POP_LOGML; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - logdiff = LOGDIFF; - - pops = randperm(npops); - while (j < npops & muutettu == 0) - j = j+1; - pop = pops(j); - totalMuutos = 0; - inds = find(PARTITION==pop); - if round == 5 - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, ... - rows, data, adjprior, priorTerm); - end - - i = 0; - - while (length(inds) > 0 & i < length(inds)) - i = i+1; - ind =inds(i); - - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - muutokset(pop) = -1e50; % Varmasti ei suurin!!! - [maxMuutos, i2] = max(muutokset); - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - - totalMuutos = totalMuutos+maxMuutos; - logml = logml+maxMuutos; - if round == 6 - % Lopetetaan heti kun muutos on positiivinen. - if totalMuutos > 1e-5 - i=length(inds); - end - end - end - - if totalMuutos>1e-5 - kokeiltu = zeros(nRoundTypes,1); - muutettu=1; - if muutoksia==0 - muutoksia = 1; % Ulompi kirjanpito. - if dispText - if round==5 - disp('Action 5'); - else - disp('Action 6'); - end - end - end - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - % Missään vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION = partition; - SUMCOUNTS = sumcounts; - POP_LOGML = poplogml; - COUNTS = counts; - logml = logml - totalMuutos; - LOGDIFF = logdiff; - kokeiltu(round)=1; - end - end - clear partition; clear sumcounts; clear counts; clear poplogml; - - elseif round == 7 - emptyPop = findEmptyPop(npops); - j = 0; - pops = randperm(npops); - muutoksiaNyt = 0; - if emptyPop == -1 - j = npops; - end - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION == pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - sumcounts = SUMCOUNTS; - counts = COUNTS; - poplogml = POP_LOGML; - logdiff = LOGDIFF; - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - T2 = cluster_own(Z2, 2); - muuttuvat = inds2(find(T2 == 1)); - - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop); - totalMuutos = muutokset(1, emptyPop); - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop); - - muutettu = 1; - while (muutettu == 1) - muutettu = 0; - % Siirretään yksilöitä populaatioiden välillä - muutokset = laskeMuutokset5(inds2, rows, data, ... - adjprior, priorTerm, pop, emptyPop); - - [maxMuutos, indeksi] = max(muutokset); - - muuttuva = inds2(indeksi); - if (PARTITION(muuttuva) == pop) - i2 = emptyPop; - else - i2 = pop; - end - - if maxMuutos > 1e-5 - rivit = rows(muuttuva,1):rows(muuttuva,2); - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - updateGlobalVariables3(muuttuva,diffInCounts, ... - adjprior, priorTerm, i2); - muutettu = 1; - totalMuutos = totalMuutos + maxMuutos; - end - - end - - if totalMuutos > 1e-5 - muutoksia = 1; - logml = logml + totalMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - if muutoksiaNyt == 0 - if dispText - disp('Action 7'); - end - muutoksiaNyt = 1; - end - kokeiltu = zeros(nRoundTypes,1); - j = npops; - else - %palutetaan vanhat arvot - PARTITION = partition; - SUMCOUNTS = sumcounts; - COUNTS = counts; - POP_LOGML = poplogml; - LOGDIFF = logdiff; - end - - end - - end - - if muutoksiaNyt == 0 - kokeiltu(round)=1; - end - - end - end - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - vaihe = 4; - elseif vaihe==4; - vaihe = 5; - elseif vaihe==5 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes = [2 1]; - elseif vaihe==3 - roundTypes=[5 5 7]; - elseif vaihe==4 - roundTypes=[4 3 1]; - elseif vaihe==5 - roundTypes=[6 7 2 3 4 1]; - end - end - end - - % TALLENNETAAN - - npops = poistaTyhjatPopulaatiot(npops); - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - if dispText - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - end - - if logml>logmlBest - % Päivitetään parasta löydettyä partitiota. - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - countsBest = COUNTS; - sumCountsBest = SUMCOUNTS; - pop_logmlBest = POP_LOGML; - logdiffbest = LOGDIFF; - end - -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -COUNTS = countsBest; -SUMCOUNTS = sumCountsBest; -POP_LOGML = pop_logmlBest; -LOGDIFF = logdiffbest; - -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialPopCounts(data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); - -for i=1:npops - for j=1:nloci - i_rivit = rows(i,1):rows(i,2); - havainnotLokuksessa = find(data(i_rivit,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(i_rivit,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -logml = laskeLoggis(counts, sumcounts, adjprior); - -%----------------------------------------------------------------------- - - -function loggis = laskeLoggis(counts, sumcounts, adjprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior))) - ... - sum(sum(gammaln(1+sumcounts))); -loggis = logml2; - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; -%-------------------------------------------------------------------------- - - -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilö ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättävä -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. -% -% Lisäys 25.9.2007: -% Otettu käyttöön globaali muuttuja LOGDIFF, johon on tallennettu muutokset -% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -npops = size(COUNTS,3); -muutokset = LOGDIFF(ind,:); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -muutokset(i1) = 0; - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen. -i2 = setdiff(i2,i1); -i2_logml = POP_LOGML(i2); - -ni2 = length(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; -LOGDIFF(ind,:) = muutokset; - - -%---------------------------------------------------------------------- - - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveillä rows. rows pitää olla vaakavektori. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - -%------------------------------------------------------------------------ - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksilö ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%-------------------------------------------------------------------------- -%-- - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, globalRows, data, adjprior, priorTerm); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = []; -for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; -end - -diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, diffInCounts, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%-------------------------------------------------------------------------- -%---- - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mikä olisi muutos logml:ssä, jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mistä - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global LOGDIFF; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- -function mjono = logml2String(logml) -% Palauttaa logml:n string-esityksen. - -mjono = ' '; -if abs(logml)<10000 - %Ei tarvita e-muotoa - mjono(7) = palautaYks(abs(logml),-1); - mjono(6) = '.'; - mjono(5) = palautaYks(abs(logml),0); - mjono(4) = palautaYks(abs(logml),1); - mjono(3) = palautaYks(abs(logml),2); - mjono(2) = palautaYks(abs(logml),3); - pointer = 2; - while mjono(pointer)=='0' & pointer<7 - mjono(pointer) = ' '; - pointer=pointer+1; - end - if logml<0 - mjono(pointer-1) = '-'; - end -else - suurinYks = 4; - while abs(logml)/(10^(suurinYks+1)) >= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters, Z) - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -koot = rows(:,1) - rows(:,2) + 1; -maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%initializeGammaln(ninds, maxSize, max(noalle)); - -logml = laskeLoggis(counts, sumcounts, adjprior); - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksessä siten, että ensimmäisenä on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end diff --git a/matlab/independent/indMix_fixK.m b/matlab/independent/indMix_fixK.m deleted file mode 100644 index 568c741..0000000 --- a/matlab/independent/indMix_fixK.m +++ /dev/null @@ -1,1337 +0,0 @@ -function [logml, npops, partitionSummary]=indMix_fixK(c,npops,nruns,dispText) -% Greedy search algorithm with fixed number of classes for regular -% clustering. -% Input npops is only used if result is needed for spatialMixture. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; -clearGlobalVars; - -noalle = c.noalle; rows = c.rows; data = c.data; -adjprior = c.adjprior; priorTerm = c.priorTerm; rowsFromInd = c.rowsFromInd; - -if isfield(c,'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - -if nargin < 2 - dispText = 1; - npopstext = []; - teksti = {'Number of populations:', ... - 'Number of runs:'}; - def = {'20', '1'}; - - npopstextExtra = inputdlg(teksti ,... - 'Input parameters for the computation algorithm',1,def); - - if isempty(npopstextExtra) % cancel has been pressed - dispCancel - logml = 1; partitionSummary=1; npops=1; - return - end - npopstext = npopstextExtra{1}; - nrunstext = npopstextExtra{2}; - - clear teksti npopstextExtra; - if isempty(npopstext) - return - else - npopsTable = str2num(npopstext); - npops = npopsTable(1); - - if npops==1 - logml = 1; partitionSummary=1; npops=1; - return - end - nrunsTable = str2num(nrunstext); - nruns = nrunsTable(1); - end -else - dispText = 0; - npopsTaulu = npops; - nruns = 1; -end - -initData = data; -data = data(:,1:end-1); - -logmlBest = -1e50; -partitionSummary = -1e50*ones(30,2); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1) = zeros(30,1); -worstLogml = -1e50; worstIndex = 1; - -logml = 1; - -%summary = cell(nruns,5); -for run = 1:nruns - if dispText - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - end - ninds = size(rows,1); - - initialPartition = admixture_initialization(initData, npops, Z); - [sumcounts, counts, logml] = ... - initialCounts(initialPartition, data, npops, rows, noalle, adjprior); - PARTITION = zeros(ninds, 1); - for i=1:ninds - apu = rows(i); - PARTITION(i) = initialPartition(apu(1)); - end - COUNTS = counts; SUMCOUNTS = sumcounts; - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - LOGDIFF = repmat(-Inf,ninds,npops); - - clear initialPartition; clear counts; clear sumcounts; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - - % PARHAAN MIXTURE-PARTITION ETSIMINEN - nRoundTypes = 7; - kokeiltu = zeros(nRoundTypes, 1); - roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan. - ready = 0; vaihe = 1; - - if dispText - disp(' '); - disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); - end - - while ready ~= 1 - muutoksia = 0; - if dispText - disp(['Performing steps: ' num2str(roundTypes)]); - end - for n = 1:length(roundTypes) - - round = roundTypes(n); - - if kokeiltu(round) == 1 %Askelta kokeiltu viime muutoksen jälkeen - - elseif round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon. - inds = randperm(ninds); - muutosNyt = 0; - - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - [maxMuutos, i2] = max(muutokset); - - if (i1~=i2 & maxMuutos>1e-5) - % Tapahtui muutos - muutoksia = 1; - if muutosNyt == 0 - if dispText - disp('action 1'); - end - muutosNyt = 1; - kokeiltu = zeros(nRoundTypes,1); - end - - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - - logml = logml+maxMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - end - end - end - if muutosNyt == 0 - kokeiltu(round) = 1; - end - - elseif round==2 %Populaatioiden yhdistäminen ja jakaminen. - maxMuutos = -1e50; - poplogml = POP_LOGML; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - logdiff = LOGDIFF; - - % Yhdistetään ensin kaksi populaatiota - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm); - muutokset(pop)=-1e50; - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - totalMuutos = maxMuutos; - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - adjprior, priorTerm); - - % Siirretään osa jostain populaatiosta tyhjentyneen - % populaation tilalle. - - emptyPop = i1; - maxMuutos = -1e50; - ninds = size(rows,1); - muuttuvat = []; % added by Lu Cheng, 11.11.2012 - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>1 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - npops2 = 2; %Moneenko osaan jaetaan - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop); - isoin = muutokset(1,emptyPop); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvat = inds2(find(T2==1)); - end - end - end - - % added by Lu Cheng, 11.11.2012 - if isempty(muuttuvat) - PARTITION = partition; - SUMCOUNTS = sumcounts; - POP_LOGML = poplogml; - COUNTS = counts; - LOGDIFF = logdiff; - kokeiltu(round) = 1; - continue; - end - - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit; lisa']; - %rivit = [rivit; rows(ind)']; - end - diffInCounts = computeDiffInCounts(rivit', size(COUNTS,1), ... - size(COUNTS,2), data); - pop = PARTITION(muuttuvat(1)); - inds2 = find(PARTITION == pop); - - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop); - totalMuutos = totalMuutos + maxMuutos; - - % Siirretään yksilöit?populaatioiden välill? - inds = randperm(ninds); - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - - [maxMuutos, i2] = max(muutokset); - if (i1~=i2 & maxMuutos>1e-5) - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - - totalMuutos = totalMuutos+maxMuutos; - end - end - end - - if totalMuutos>1e-5 - if dispText - disp('Action 2'); - end - kokeiltu = zeros(nRoundTypes,1); - logml = logml + totalMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - - else - PARTITION = partition; - SUMCOUNTS = sumcounts; - POP_LOGML = poplogml; - COUNTS = counts; - LOGDIFF = logdiff; - kokeiltu(round) = 1; - end - - - elseif round==3 || round==4 %Populaation jakaminen osiin. - maxMuutos = 0; - ninds = size(rows,1); - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = max(min(20, floor(ninds2/2)),2); - elseif round==4 - npops2 = 2; %Moneenko osaan jaetaan - end - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - kokeiltu = zeros(nRoundTypes,1); - %rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat)); - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit; lisa']; - %rivit = [rivit; rows(ind)']; - end - diffInCounts = computeDiffInCounts(rivit', size(COUNTS,1), ... - size(COUNTS,2), data); - i1 = PARTITION(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); - logml = logml + maxMuutos; - if dispText - if round==3 - disp('action 3'); - else - disp('action 4'); - end - end - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - kokeiltu(round)=1; - end - - elseif round == 5 || round == 6 - j=0; - muutettu = 0; - poplogml = POP_LOGML; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - logdiff = LOGDIFF; - - pops = randperm(npops); - while (j < npops & muutettu == 0) - j = j+1; - pop = pops(j); - totalMuutos = 0; - inds = find(PARTITION==pop); - if round == 5 - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, ... - rows, data, adjprior, priorTerm); - end - - i = 0; - - while (length(inds) > 0 & i < length(inds) - 1) - i = i+1; - ind =inds(i); - - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - muutokset(pop) = -1e50; % Varmasti ei suurin!!! - [maxMuutos, i2] = max(muutokset); - - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - - totalMuutos = totalMuutos+maxMuutos; - logml = logml+maxMuutos; - if round == 6 - % Lopetetaan heti kun muutos on positiivinen. - if totalMuutos > 1e-5 - i=length(inds); - end - end - end - - if totalMuutos>1e-5 - kokeiltu = zeros(nRoundTypes,1); - if dispText - disp('Action 6'); - end - muutettu=1; - muutoksia = 1; % Ulompi kirjanpito. - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - - else - % Missään vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION = partition; - SUMCOUNTS = sumcounts; - POP_LOGML = poplogml; - COUNTS = counts; - logml = logml - totalMuutos; - LOGDIFF = logdiff; - kokeiltu(round)=1; - end - end - clear partition; clear sumcounts; clear counts; clear poplogml; - - elseif round == 7 % Jaetaan populaatio kahtia ja yhdistetään - emptyPop = npops + 1; - j = 0; - pops = randperm(npops); - muutoksiaNyt = 0; - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION == pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - sumcounts = SUMCOUNTS; - counts = COUNTS; - poplogml = POP_LOGML; - logdiff = LOGDIFF; - - % Luodaan väliaikaisesti uusi populaatio - npops = npops + 1; - POP_LOGML(npops) = 0; - COUNTS(:,:,npops) = zeros(size(COUNTS(:,:,1))); - SUMCOUNTS(npops,:) = zeros(size(SUMCOUNTS(1,:))); - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - T2 = cluster_own(Z2, 2); - muuttuvat = inds2(find(T2 == 1)); - - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop); - totalMuutos = muutokset(1, emptyPop); - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop); - - % Siirretään yksilöit?populaatioiden välill? - inds = randperm(ninds); - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm); - - [maxMuutos, i2] = max(muutokset); - if (i1~=i2 & maxMuutos>1e-5) - updateGlobalVariables(ind, i2, diffInCounts,... - adjprior, priorTerm); - totalMuutos = totalMuutos+maxMuutos; - end - end - end - - % Yhdistetään kaksi populaatiota, jos populaatioita - % liikaa - if length(find(any(SUMCOUNTS,2))) == npops - maxMuutos = -1e50; - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm); - muutokset(pop)=-1e50; - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - adjprior, priorTerm); - totalMuutos = totalMuutos + maxMuutos; - end - - if totalMuutos > 1e-5 - if dispText - disp('Action 7'); - end - muutoksia = 1; - % Poistetaan tyhj?populaatio - npops = poistaTyhjatPopulaatiot(npops); - POP_LOGML(abs(POP_LOGML)<1e-5) = []; - logml = logml + totalMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - if muutoksiaNyt == 0 - muutoksiaNyt = 1; - end - kokeiltu = zeros(nRoundTypes,1); - j = npops; - else - %palutetaan vanhat arvot - PARTITION = partition; - SUMCOUNTS = sumcounts; - COUNTS = counts; - POP_LOGML = poplogml; - LOGDIFF = logdiff; - npops = npops - 1; - end - - end - - end - - if muutoksiaNyt == 0 - kokeiltu(round)=1; - end - - end - end - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - vaihe = 4; - elseif vaihe==4; - vaihe = 5; - elseif vaihe==5 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes=[2]; - elseif vaihe==3 - roundTypes=[6 7]; - elseif vaihe==4 - roundTypes=[4 3 1]; - elseif vaihe==5 - roundTypes=[6 7 2 3 4 1]; - end - end - end - - % TALLENNETAAN - - npops = poistaTyhjatPopulaatiot(npops); - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - if dispText - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - end - - %summary{run,1} =npops; - %summary{run,2} =POP_LOGML; - %summary{run,3} =logml; - %summary{run,4} =PARTITION; - %summary{run,5} = COUNTS; - - if logml>logmlBest - % Päivitetään parasta löydetty?partitiota. - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - countsBest = COUNTS; - sumCountsBest = SUMCOUNTS; - pop_logmlBest = POP_LOGML; - logdiffbest = LOGDIFF; - end - -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -COUNTS = countsBest; -SUMCOUNTS = sumCountsBest; -POP_LOGML = pop_logmlBest; -LOGDIFF = logdiffbest; - -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialPopCounts(data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); - -for i=1:npops - for j=1:nloci - i_rivit = rows(i,1):rows(i,2); - havainnotLokuksessa = find(data(i_rivit,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(i_rivit,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -logml = laskeLoggis(counts, sumcounts, adjprior); - -%----------------------------------------------------------------------- - - -function loggis = laskeLoggis(counts, sumcounts, adjprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior))) - ... - sum(sum(gammaln(1+sumcounts))); -loggis = logml2; - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%-------------------------------------------------------------------------- - - -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -npops = size(COUNTS,3); -muutokset = LOGDIFF(ind,:); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -muutokset(i1) = 0; - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen. -i2 = setdiff(i2,i1); -i2_logml = POP_LOGML(i2); - -ni2 = length(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; -if length(muutokset)>npops - keyboard -end - -LOGDIFF(ind,1:npops) = muutokset(1:npops); - - - -%---------------------------------------------------------------------- - - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. rows pitää olla vaakavektori. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - -%------------------------------------------------------------------------ - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%-------------------------------------------------------------------------- -%-- - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, globalRows, data, adjprior, priorTerm); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -% muutokset = zeros(npops,1); -muutokset = ones(npops,1)*-Inf; - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = []; -for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; -end - -diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, diffInCounts, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%-------------------------------------------------------------------------- -%---- - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global LOGDIFF; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- -function mjono = logml2String(logml) -% Palauttaa logml:n string-esityksen. - -mjono = ' '; -if abs(logml)<10000 - %Ei tarvita e-muotoa - mjono(7) = palautaYks(abs(logml),-1); - mjono(6) = '.'; - mjono(5) = palautaYks(abs(logml),0); - mjono(4) = palautaYks(abs(logml),1); - mjono(3) = palautaYks(abs(logml),2); - mjono(2) = palautaYks(abs(logml),3); - pointer = 2; - while mjono(pointer)=='0' & pointer<7 - mjono(pointer) = ' '; - pointer=pointer+1; - end - if logml<0 - mjono(pointer-1) = '-'; - end -else - suurinYks = 4; - while abs(logml)/(10^(suurinYks+1)) >= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters, Z) - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -%koot = rows(:,1) - rows(:,2) + 1; -%maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%initializeGammaln(ninds, maxSize, max(noalle)); - -logml = laskeLoggis(counts, sumcounts, adjprior); - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end diff --git a/matlab/independent/myxlsread.m b/matlab/independent/myxlsread.m deleted file mode 100644 index 06040ea..0000000 --- a/matlab/independent/myxlsread.m +++ /dev/null @@ -1,49 +0,0 @@ -function [A B] = myxlsread(file) -% This function read a tab ('\t') seperated txt file -% input file structure: -% first row: title -% sencond to end row: first column, sample ID -% second column, cluster label -% other columns, gene sequences -% Lu Cheng -% 26.06.2010 - -% there can be multiple numeric columns in the input file -% Lu Cheng, 25.11.2010 - -delimiter = '\t'; - -if exist(file,'file')~=2 - error('The input file %s does not exist!', file); -end - -lines = textread(file,'%s','delimiter','\n'); - -title = strread(lines{1},'%s','delimiter',delimiter); -nRow = length(lines); -nCol = length(title); - -% determine numeric Columns -tmp = strread(lines{2},'%s','delimiter',delimiter); -numCols = []; -for i = 1:length(tmp) - if ~isnan(str2double(tmp{i})) - numCols(end+1) = i; %#ok - end -end - -A = cell(nRow-1, length(numCols)); -B = cell(nRow, nCol); - -B(1,:) = title; -for i=2:nRow - if isempty(lines{i}) - B(i,:) = []; - A(i-1,:) = []; - else - B(i,:) = strread(lines{i},'%s','delimiter',delimiter); - A(i-1,:) = B(i,numCols); - end -end - -A = cellfun(@str2double,A); \ No newline at end of file diff --git a/matlab/independent/myxlswrite.m b/matlab/independent/myxlswrite.m deleted file mode 100644 index 34aa408..0000000 --- a/matlab/independent/myxlswrite.m +++ /dev/null @@ -1,19 +0,0 @@ -function myxlswrite(file, A) -% A is a cell matrix, each element is a string -% Lu Cheng, 25.11.2010 - -h = fopen(file,'w+'); -[nRow nCol] = size(A); - -for i=1:nRow - %tmpLine = ''; - for j=1:nCol-1 - if isnumeric(A{i,j}) - A{i,j} = num2str(A{i,j}); - end - fprintf(h,'%s\t',A{i,j}); - end - fprintf(h,'%s\n',A{i,nCol}); -end - -fclose(h); \ No newline at end of file diff --git a/matlab/independent/preprocessXLS.m b/matlab/independent/preprocessXLS.m deleted file mode 100644 index 8b094ef..0000000 --- a/matlab/independent/preprocessXLS.m +++ /dev/null @@ -1,177 +0,0 @@ -function c = preprocessXLS(xlsfile,varargin) -% This function preprocesses the input xlsfile -% File structure: first line - title -% else - first column, name of individuals -% - second to end column, sequences of the given genes - -% Lu Cheng, 16.02.2010 - -%% check file names - -file_suff = '.xls'; % endings of the input file -if ~(exist(xlsfile,'file')==2) - fprintf('Input file %s does not exists, quit!\n',xlsfile); - return; -end - -if ~strcmp(xlsfile(end-length(file_suff)+1:end),file_suff) - fprintf('Input file %s does not end with %s, quit!\n',xlsfile,file_suff); - return; -end - -%% process the xls file - -% Here we assume there is no missing values, if so, the missing values are -% indicated by 0 -[data, component_mat, popnames] = processxls(xlsfile); - -% missing data 0 is transformed to |alphabet|+1, in the [ACGT] case, '-' is 5 -[data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - -c.data = data; c.rowsFromInd = rowsFromInd; -c.alleleCodes = alleleCodes; c.noalle=noalle; -c.adjprior = adjprior; -% c.priorTerm = c.priorTerm; - -c.component_mat = component_mat; -c.popnames = popnames; - -%% count the cliques and separators - -index = data(:,end); - -if isempty(varargin) - [data_clique, data_separator, noalle_clique, noalle_separator, codes_cq, codes_sp, info_cq_loci, info_sp_loci] = ... - transform5(data, component_mat); -else - c_train = varargin{1}; - - if ~all(all(c_train.component_mat == component_mat)) - disp('The gene lengths are different between the training data and the test data!'); - return; - end - - [data_clique, data_separator, noalle_clique, noalle_separator, codes_cq, codes_sp, info_cq_loci, info_sp_loci] = ... - transform5(data, component_mat, c_train.info_cq_loci,c_train.info_sp_loci); -end -data_clique = [data_clique index]; -data_separator = [data_separator index]; - -% Count the data, note that the order of the alphabets keeps the same -[counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq] ... - = allfreqsnew2(data_clique, double(noalle_clique)); -[counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp] ... - = allfreqsnew2(data_separator, double(noalle_separator)); - -clear prior_cq prior_sp nalleles_cq nalleles_sp genotypes_cq genotypes_sp; - -counts_cq = uint16(counts_cq); -counts_sp = uint16(counts_sp); - -c.counts_cq = counts_cq; -c.counts_sp = counts_sp; - -c.adjprior_cq = adjprior_cq; -c.adjprior_sp = adjprior_sp; - -c.codes_cq = codes_cq; -c.codes_sp = codes_sp; - -c.info_cq_loci = info_cq_loci; -c.info_sp_loci = info_sp_loci; - -%-------------------------------------------------------------------------- -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, miltä yksilöltä -% kyseinen rivi on peräisin. Funktio tutkii ensin, että montako -% riviä maksimissaan on peräisin yhdeltä yksilöltä, jolloin saadaan -% tietää onko kyseessä haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhjiä rivejä niille yksilöille, joilta on peräisin vähemmän -% rivejä kuin maksimimäärä. -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytössä oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, että yhden lokuksen j -% koodit saavat arvoja välillä 1,...,noalle(j). - -% English Comments added -% Small modifications have been added -% Lu Cheng, 17.02.2010 - -% Last column are the indexes of the samples, the raw_data is supposed to -% be unit16 type, 0 indicates missing value -data = raw_data; -nloci=size(raw_data,2)-1; - -% Replace missing value with the |alphabet|+1, thus 0 is replaced by 5 for -% DNA dataset -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end - -% stores all different alleles at each loci, construct the allle codes matrix -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -%-----------------modified by Lu Cheng 17.02.2010--------------------------% -% NOTE: Here we do not want to change the alpahbets, thus the following -% lines are commented - -% replace the index of an allele to replace the allele -% for loc = 1:nloci -% for all = 1:noalle(loc) -% data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; -% end; -% end; -%-----------------modified end.....----------------------------------------% - -% handle diploid situation -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivejä - miss = maxRowsFromInd-rowsFromInd(ind); % Tältä yksilöltä puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -% calculate the prior for each loci, priorTerm is a constant term in the -% formula, which is precalclulateed for speeding up the program -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end \ No newline at end of file diff --git a/matlab/independent/processxls.m b/matlab/independent/processxls.m deleted file mode 100644 index be7a375..0000000 --- a/matlab/independent/processxls.m +++ /dev/null @@ -1,81 +0,0 @@ -function [data, component_mat, popnames] = processxls(filename) -% -% a bug in line 64-68 was fixed -data = []; -component_mat = []; -popnames = []; -try - if ispc - [A,B] = xlsread(filename); - else - [A,B] = myxlsread(filename); - end -catch - display('*** ERROR: Wrong Excel format'); - return -end - -if size(A,2)~=1 % more than one columns containing numeric ST values - display('*** ERROR: multiple columns of numeric values'); - data = []; component_mat = []; popnames = []; - return -end - -if size(A,1)~=size(B,1)-1 - display('*** ERROR: Wrong format'); - data = []; component_mat = []; popnames = []; - return -end - -B = deblank(B); % remove any trailing blanks -nstrains = size(B,1)-1; -nheader = size(B,2); -for i = 1:nheader - if strcmpi('ST',B{1,i}) ix_ST = i; end - if strcmpi('Strain', B{1,i}) || strcmpi('Isolate',B{1,i}) - ix_Strain = i; - end -end -if ~exist('ix_ST') - display('*** ERROR: ST column needed'); - data = []; component_mat = []; popnames = []; - return -end - -if ~exist('ix_Strain') - ix_gene = setdiff([1:nheader],ix_ST); -else - ix_gene = setdiff([1:nheader],[ix_ST ix_Strain]); -end - -ngenes = length(ix_gene); - -C = cell(nstrains,ngenes); -if ~isempty(A) - for i=1:nstrains - B{i+1,ix_ST}=num2str(A(i)); - for j=1:ngenes - C{i,j}=uint16(i_encode_n(B{i+1,ix_gene(j)})); % save the memory. - end - end -end -genesize=cellfun('size',C(1,:),2); -data=cell2mat(C); -data=[data uint16([1:nstrains]')]; -component_mat = zeros(ngenes,max(genesize)); -cum = cumsum(genesize); -component_mat(1,[1:genesize(1)]) = [1:cum(1)]; -for i=2:ngenes - component_mat(i,[1:genesize(i)]) = [(cum(i-1)+1):cum(i)]; -end - -if ~exist('ix_Strain') - popnames = num2cell(B([2:end],ix_ST)); -else % store the strain names only - popnames = num2cell(B([2:end],ix_Strain)); -end -popnames(:,2)=num2cell([1:nstrains]'); - -display('---------------------------------------------------'); -display(['# of strains: ', num2str(nstrains)]); -display(['# of genes: ', num2str(ngenes)]); \ No newline at end of file diff --git a/matlab/independent/semiReadScript.m b/matlab/independent/semiReadScript.m deleted file mode 100644 index 29ae812..0000000 --- a/matlab/independent/semiReadScript.m +++ /dev/null @@ -1,128 +0,0 @@ -function paras = semiReadScript(script_file) -% This function extracts parameter information from the script file -% Script Command Table -% datafile('train|test','c:\BAPS5\DATA.xls'); Here only .xls and .mat file -% input file is supported. -% savePreproFile('train|test','c:\BAPS5\predata.mat'); -% setK('16 17 18'); -% outputmat('c:\BAPS5\output.mat') -% Lu Cheng, 11.03.2010 - -paras.train_file_format = []; -paras.train_file_name = []; - -paras.save_prepro_train_data = []; paras.save_prepro_train_data = 'No'; -paras.train_prepro_file = []; - -paras.test_file_format = []; -paras.test_file_name = []; - -paras.save_prepro_test_data = []; paras.save_prepro_test_data = 'No'; -paras.test_prepro_file = []; - -paras.cluster_num_upperbounds = []; - -paras.save_results = []; paras.save_results = 'No'; -paras.result_file = []; - -T = readfile(script_file); - -n = length(T); -for i=1:n - %line = regexprep(T{i},'\s+',''); - line = T{i}; - [res toks] = regexp(line,'(.+)\((.+)\)','once','match','tokens'); - - if isempty(res) - continue; - else - %toks - paras = parseCmd(toks{1}, toks{2}, paras); - end -end - -% ------------------------------------------------------------------------- -function prog_paras = parseCmd(cmd, paras, prog_paras) -% cmd is the script command -% paras are the parameters of the script command -% prog_paras is a stucture of the global parameters - -switch cmd - case 'datafile' - paras = regexprep(paras,'\s+',''); - toks = regexp(paras,'''([^,]+)''','tokens'); - option = toks{1}{:}; - filename = toks{2}{:}; - if exist(filename,'file')~=2 - error(cat(2,'File not exist! File: ',filename)); - end - filetype = getFileType(filename); - if isequal(option,'train') - prog_paras.train_file_format = filetype; - prog_paras.train_file_name = filename; - elseif isequal(option,'test') - prog_paras.test_file_format = filetype; - prog_paras.test_file_name = filename; - else - error(cat(2,'Unkown option: ',option,'! Expect train or test.')); - end - - case 'savePreprocFile' - paras = regexprep(paras,'\s+',''); - toks = regexp(paras,'''([^,]+)''','tokens'); - option = toks{1}{:}; - filename = toks{2}{:}; - - filetype = getFileType(filename); - if ~isequal(filetype,'.mat') - error(cat(2,'The saved file should end with .mat! ',filename)); - end - - if isequal(option,'train') - prog_paras.save_prepro_train_data = 'Yes'; - prog_paras.train_prepro_file = filename; - elseif isequal(option,'test') - prog_paras.save_prepro_test_data = 'Yes'; - prog_paras.test_prepro_file = filename; - else - error(cat(2,'Unkown option: ',option,'! Expect train or test.')); - end - case 'setK' - prog_paras.cluster_num_upperbounds = paras(2:end-1); - case 'outputmat' - filename = paras(2:end-1); - filetype = getFileType(filename); - if ~isequal(filetype,'.mat') - error(cat(2,'The saved file should end with .mat! ',filename)); - end - prog_paras.save_results = 'Yes'; - prog_paras.result_file = filename; - otherwise - error('Can not parse the cmd: %s in the script!', cmd); -end - -% ------------------------------------------------------------------------- -function filetype = getFileType(filename) -filetype = filename(end-3:end); -if ~isequal(filetype,'.xls') && ~isequal(filetype,'.mat') - error(cat(2,'Unknown option: ', filename, '! Expect .xls or .mat file')); -end - -% ------------------------------------------------------------------------- -function T = readfile(filename) -f = fopen(filename,'r'); -if f == -1 - error(cat(2,'*** ERROR: invalid input file: ',filename)); - T = []; - return -end - -i = 1; -while 1 - clear line; - line = fgetl(f); - if ~ischar(line), break, end - T{i} = line; - i = i+1; -end -fclose(f); \ No newline at end of file diff --git a/matlab/independent/semi_linkageMix.m b/matlab/independent/semi_linkageMix.m deleted file mode 100644 index 639301b..0000000 --- a/matlab/independent/semi_linkageMix.m +++ /dev/null @@ -1,1470 +0,0 @@ -function c_result = semi_linkageMix(c_train, c_test, npopsTable) -% Greedy search algorithm with unknown number of classes for linkage -% clustering. - -% Modified from linkageMix.m by Lu Cheng, 16.02.2010 -% Input: preprocessed training data and test data, as well as the -% population number, npopsTable is a vector -% NOTE, here we assume that c_train contains all the correct priors - -% IS_SINGLE_SAMPLE is introduced to handle the case of only one test sample -% Lu Cheng, 07.03.2011 - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -global LOGDIFF; - -global TRAIN_CQ_COUNTS; -global TRAIN_SP_COUNTS; -global TRAIN_CQ_SUMCOUNTS; -global TRAIN_SP_SUMCOUNTS; - -clearGlobalVars; - -% noalle = c_train.noalle; -% adjprior = c_train.adjprior; %priorTerm = c.priorTerm; -% rowsFromInd = c_train.rowsFromInd; -adjprior_cq = c_train.adjprior_cq; -adjprior_sp = c_train.adjprior_sp; - -counts_cq = c_test.counts_cq; -counts_sp = c_test.counts_sp; - -% UPDATE the priors by the following constants from the training data -% Lu Cheng, 17.02.2010 ------------------------ % -clusters = unique(c_train.cluster_labels); -n_train_clusters = length(clusters); -if ~all(clusters'==1:n_train_clusters) %labels should cover 1 to n_train_clusters - error(cat(2,'Error! Cluster labels should range from 1 to ',n_train_clusters,'!')); -end -[d1_cq d2_cq d3_cq] = size(c_train.counts_cq); -[d1_sp d2_sp d3_sp] = size(c_train.counts_sp); -TRAIN_CQ_COUNTS = zeros(d1_cq,d2_cq,n_train_clusters); -TRAIN_SP_COUNTS = zeros(d1_sp,d2_sp,n_train_clusters); -for i = 1:n_train_clusters - cluster_inds = (c_train.cluster_labels == clusters(i)); - TRAIN_CQ_COUNTS(:,:,i) = sum(c_train.counts_cq(:,:,cluster_inds),3); - TRAIN_SP_COUNTS(:,:,i) = sum(c_train.counts_sp(:,:,cluster_inds),3); -end - -train_counts_cq = TRAIN_CQ_COUNTS; -train_counts_sp = TRAIN_SP_COUNTS; - -clear clusters cluster_inds i -clear d1_cq d2_cq d3_cq d1_sp d2_sp d3_sp cluster_inds -% --------------------------------------------- % - -IS_SINGLE_SAMPLE = false; -if isfield(c_test,'dist') - dist = c_test.dist; Z = c_test.Z; -else - IS_SINGLE_SAMPLE = true; -end -clear c_test c_train; - -nruns = length(npopsTable); -logmlBest = -1e50; -partitionSummary = -1e50*ones(100,2); % 100 best partitions (npops and logml) -partitionSummary(:,1) = zeros(100,1); -worstLogml = -1e50; worstIndex = 1; - -for run = 1:nruns - if IS_SINGLE_SAMPLE && (npopsTable(run)>n_train_clusters+1) - npops = n_train_clusters + 1; - else - npops = npopsTable(run); - end - - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - - %----- added by Lu Cheng, 23.02.2010------% - % modify the training data so it fits the dimension of given population - % number - %n_train_clusters = size(train_counts_cq,3); - if npopsworstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - - %%%% - % Finding the best partition with the greedy search algorithm - %%%% - nRoundTypes = 7; - tested = zeros(nRoundTypes,1); - roundTypes = [1 1]; - ready = 0; phase = 1; - ninds = length(PARTITION); % number of individuals - LOGDIFF = repmat(-Inf,ninds,npops); - - disp(' '); - disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); - - while ready ~= 1 - changesMade = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - round = roundTypes(n); - % pack; - if tested(round) == 1 - - elseif round==1 % Moving one individual to another population - inds = randperm(ninds); % random order - changesMadeNow = 0; - for ind = inds - i1 = PARTITION(ind); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - [maxChange, i2] = max(changesInLogml); - - if (i1~=i2 && maxChange>1e-5) - - %disp(changesInLogml); - - % Individual is moved - changesMade = 1; - if changesMadeNow == 0 - disp('action 1'); - changesMadeNow = 1; - tested = zeros(nRoundTypes,1); - end - updateGlobalVariables(ind, i2, indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml+maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - end - end - - if changesMadeNow == 0 - tested(round) = 1; - end - - - %disp(PARTITION'); % for test - - elseif round==2 % Combining two populations - maxChange = 0; - - %n_train_clusters % see line 32, Lu Cheng, 04.03.2010 - -% for pop = 1:npops -% changesInLogml = computeChanges2(pop, adjprior_cq, adjprior_sp); % all inds in 'pop' is moved to other clusters -% [biggest, index] = max(changesInLogml); -% if biggest>maxChange -% maxChange = biggest; -% i1 = pop; -% i2 = index; -% fprintf('moving population %d to population %d.\n',i1,i2); -% end -% disp(changesInLogml'); -% end - - % modified by Lu Cheng, 04.03.2010 - % here we only combine the outer clusters with other - % outerclusters or training clusters. In case there is only - % one outer cluster in the PARTITION, it will be combined with - % some training cluster, depending on the logml - if npops>n_train_clusters - for pop = n_train_clusters+1:npops - if any(PARTITION==pop) - changesInLogml = computeChanges2(pop, adjprior_cq, adjprior_sp); % all inds in 'pop' is moved to other clusters - [biggest, index] = max(changesInLogml); - if biggest>maxChange - maxChange = biggest; - i1 = pop; - i2 = index; - %fprintf('moving population %d to population %d.\n',i1,i2); - end - end - end - - %fprintf('maxChange: %d\n',maxChange); - end - - - if maxChange>1e-5 - disp('action 2'); - changesMade = 1; - tested = zeros(nRoundTypes,1); - - updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp); %all inds in i1 are moved to i2 - logml = logml + maxChange; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - - %disp(PARTITION'); % for test - - else - tested(round) = 1; - end - - - elseif (round==3 || round==4) && ~IS_SINGLE_SAMPLE % Splitting population into smaller groups - maxChange = 0; - - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>5 - % Computing the distance between individuals inds2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - - % Number of groups: - if round==3 - npops2 = min(20, floor(ninds2 / 5)); - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); % npops2 * npops matrix - [biggest, index] = max(changesInLogml(1:end)); - if biggest > maxChange - maxChange = biggest; - movingGroup = rem(index,npops2); % The group, which is moved - if movingGroup==0, movingGroup = npops2; end - movingInds = inds2(logical(T2==movingGroup)); - i2 = ceil(index/npops2); % pop where movingGroup would be moved - end - end - end - if maxChange>1e-5 - changesMade = 1; - tested = zeros(nRoundTypes,1); - if round==3 - disp('action 3'); - else - disp('action 4'); - end - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, i2,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml + maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - tested(round) = 1; - end - - elseif (round == 5 || round == 6) && ~IS_SINGLE_SAMPLE - %Moving individuals out of population until positive change - %in logml has occured - pop=0; - changesMadeNow = 0; - %Saving old values - poplogml = POP_LOGML; - partition = PARTITION; - cq_counts = CQ_COUNTS; - sp_counts = SP_COUNTS; - cq_sumcounts = CQ_SUMCOUNTS; - sp_sumcounts = SP_SUMCOUNTS; - logdiff = LOGDIFF; - - while (pop < npops && changesMadeNow == 0) - pop = pop+1; - totalChangeInLogml = 0; - inds = find(PARTITION==pop); - if round == 5 - %Random order - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp); - end - - i=0; - - while (length(inds)>0 && i 1e-5 - i=length(inds); - end - end - end - - if totalChangeInLogml>1e-5 - if round == 5 - disp('action 5'); - elseif round == 6 - disp('action 6'); - end - tested = zeros(nRoundTypes,1); - changesMadeNow=1; - changesMade = 1; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - logml = logml - totalChangeInLogml; - end - end - clear partition; clear poplogml; - if changesMadeNow == 0 - tested(round) = 1; - end - - elseif round == 7 && ~IS_SINGLE_SAMPLE - emptyPop = findEmptyPop(npops); - j = 0; - pops = randperm(npops); - % totalChangeInLogml = 0; - if emptyPop == -1 - j = npops; - end - changesMadeNow = 0; - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION == pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - cq_sumcounts = CQ_SUMCOUNTS; - cq_counts = CQ_COUNTS; - sp_sumcounts = SP_SUMCOUNTS; - sp_counts = SP_COUNTS; - poplogml = POP_LOGML; - logdiff = LOGDIFF; - % pack; - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - T2 = cluster_own(Z2, 2); - % movingInds = inds2(find(T2 == 1)); - movingInds = inds2(logical(T2 == 1)); - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - totalChangeInLogml = changesInLogml(1, emptyPop); - - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, emptyPop,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - - changed = 1; - - while (changed == 1) - changed = 0; - - changesInLogml = computeChanges5(inds2, pop, emptyPop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - [maxChange, index] = max(changesInLogml); - moving = inds2(index); - if (PARTITION(moving) == pop) - i2 = emptyPop; - else - i2 = pop; - end - - if maxChange > 1e-5 - indCqCounts = uint16(counts_cq(:,:,moving)); - indSpCounts = uint16(counts_sp(:,:,moving)); - updateGlobalVariables3(moving, i2,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - changed = 1; - totalChangeInLogml = totalChangeInLogml + maxChange; - end - end - - if totalChangeInLogml > 1e-5 - changesMade = 1; - changesMadeNow = 1; - logml = logml + totalChangeInLogml; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_clusters); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - disp('action 7'); - tested = zeros(nRoundTypes, 1); - j = npops; - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - %logml = logml - totalChangeInLogml; - end - end - end - if changesMadeNow == 0 - tested(round) = 1; - end - end - - end - - - if changesMade == 0 - if phase==1 - phase = 2; - elseif phase==2 - phase = 3; - elseif phase==3 - phase = 4; - elseif phase==4; - phase = 5; - elseif phase==5 - ready = 1; - end - else - changesMade = 0; - end - - if ready==0 - if phase==1 - roundTypes=[1]; - elseif phase==2 - roundTypes=[2]; - elseif phase==3 - roundTypes=[5 5 7]; - elseif phase==4 - roundTypes=[4 3 1 1]; - elseif phase==5 - roundTypes=[6 2 7 3 4 1]; - end - end - - end - % Saving results - - npops = removeEmptyPops(n_train_clusters,npops); - POP_LOGML = computePopulationLogml(1:npops, adjprior_cq, adjprior_sp); - - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - if logml>logmlBest - % Updating the best found partition - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - cq_countsBest = CQ_COUNTS; - sp_countsBest = SP_COUNTS; - cq_sumcountsBest = CQ_SUMCOUNTS; - sp_sumcountsBest = SP_SUMCOUNTS; - pop_logmlBest = POP_LOGML; - logdiffbest = LOGDIFF; - end -end - -c_result.logml = logmlBest; -c_result.npops = npopsBest; -c_result.PARTITION = partitionBest; -c_result.CQ_COUNTS = cq_countsBest; -c_result.SP_COUNTS = sp_countsBest; -c_result.CQ_SUMCOUNTS = cq_sumcountsBest; -c_result.SP_SUMCOUNTS = sp_sumcountsBest; -c_result.POP_LOGML = pop_logmlBest; -c_result.LOGDIFF = logdiffbest; -c_result.partitionSummary = partitionSummary; - -% Calculate the posterior probabilities if a sample i is moved to cluster -% j, from LOGDIFF (nsample*npops matrix) -% Each row of LOGDIFF represents logml changes if the sample is moved to -% the corresponding cluster -% See page 24 in BAPS manual -% added by Lu Cheng, 29.03.2010 -[ninds npops] = size(c_result.LOGDIFF); -clusterProbTable = zeros(ninds, npops); -for i=1:ninds - tmpRow = exp(c_result.LOGDIFF(i,:)); - clusterProbTable(i,:) = tmpRow/sum(tmpRow); -end -c_result.clusterProbTable = clusterProbTable; -% ------------ - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition = admixture_initialization(nclusters,Z) -initial_partition = cluster_own(Z,nclusters); - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -%npops = size(CQ_COUNTS,3); deleted by Lu Cheng, 24.20.2010 -changes = LOGDIFF(ind,:); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -changes(i1) = 0; - -sumCq = uint16(sum(indCqCounts,1)); %n_loci * 1 vector -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - -i2 = find(changes==-Inf); -% i2 = setdiff(i2,i1); % deleted by Lu Cheng, 24.02.2010, since changes(i1) = 0 -i2_logml = POP_LOGML(i2); - -ni2 = length(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[ni2 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[ni2 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; -LOGDIFF(ind,:) = changes; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; - -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml + new_i2_logml - i2_logml; - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; - -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(T2==pop2); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -%inx = [find(PARTITION==i1); find(PARTITION==i2)]; -inx = (PARTITION==i1 | PARTITION==i2); % modified by Lu Cheng, 24.02.2010 -LOGDIFF(inx,:) = -Inf; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(PARTITION==i1) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -%inx = [find(PARTITION==i1); find(PARTITION==i2)]; -inx = (PARTITION==i1 | PARTITION==i2); % modified by Lu Cheng, 24.02.2010 -LOGDIFF(inx,:) = -Inf; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(muuttuvat(1)); %Why only one individual is moved? Lu Cheng, 24.02.2010 -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global LOGDIFF; LOGDIFF = []; - -global TRAIN_CQ_COUNTS; TRAIN_CQ_COUNTS=[]; -global TRAIN_CQ_SUMCOUNTS; TRAIN_CQ_SUMCOUNTS = []; -global TRAIN_SP_COUNTS; TRAIN_SP_COUNTS = []; -global TRAIN_SP_SUMCOUNTS; TRAIN_SP_SUMCOUNTS = []; - - -%-------------------------------------------------------------------------- - -function new_npops = removeEmptyPops(n_train_clusters, npops) -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -% This function is modified by Lu Cheng, 08.03.2010 -% n_train_clusters: number of clusters in the training data -% npops: the user defined max potential population number - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; -global LOGDIFF; - -global TRAIN_CQ_COUNTS; -global TRAIN_CQ_SUMCOUNTS; -global TRAIN_SP_COUNTS; -global TRAIN_SP_SUMCOUNTS; - -%---------ORIGINAL CODES START------------------% -% notEmpty = find(any(CQ_SUMCOUNTS,2)); -% CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -% CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -% SP_COUNTS = SP_COUNTS(:,:,notEmpty); -% SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); -% LOGDIFF = LOGDIFF(:,notEmpty); -% -% for n=1:length(notEmpty) -% PARTITION(PARTITION==notEmpty(n)) = n; -% end -% npops = length(notEmpty); -%---------ORIGINAL CODES END---------------------% - -pops = unique(PARTITION); -pops = pops(pops>n_train_clusters); -if isempty(pops) && npops>n_train_clusters - CQ_COUNTS(:,:,n_train_clusters+1:npops) = []; - CQ_SUMCOUNTS(n_train_clusters+1:npops,:) = []; - SP_COUNTS(:,:,n_train_clusters+1:npops) = []; - SP_SUMCOUNTS(n_train_clusters+1:npops,:) = []; - - TRAIN_CQ_COUNTS(:,:,n_train_clusters+1:npops) = []; - TRAIN_CQ_SUMCOUNTS(n_train_clusters+1:npops,:) = []; - TRAIN_SP_COUNTS(:,:,n_train_clusters+1:npops) = []; - TRAIN_SP_SUMCOUNTS(n_train_clusters+1:npops,:) = []; - - LOGDIFF(:,n_train_clusters+1:npops) = []; - new_npops = n_train_clusters; - return -elseif isempty(pops) - % do nothing - - new_npops = npops; - return -else - n_nonempty = length(pops); %the pops should be in ascending order - for i=1:n_nonempty - PARTITION(PARTITION==pops(i))= n_train_clusters+i; - CQ_COUNTS(:,:,n_train_clusters+i) = CQ_COUNTS(:,:,pops(i)); - CQ_SUMCOUNTS(n_train_clusters+i,:) = CQ_SUMCOUNTS(pops(i),:); - SP_COUNTS(:,:,n_train_clusters+i) = SP_COUNTS(:,:,pops(i)); - SP_SUMCOUNTS(n_train_clusters+i,:) = SP_SUMCOUNTS(pops(i),:); - - LOGDIFF(:,n_train_clusters+i) = LOGDIFF(:,pops(i)); - end - - CQ_COUNTS(:,:,n_train_clusters+n_nonempty+1:npops) = []; - CQ_SUMCOUNTS(n_train_clusters+n_nonempty+1:npops,:) = []; - SP_COUNTS(:,:,n_train_clusters+n_nonempty+1:npops) = []; - SP_SUMCOUNTS(n_train_clusters+n_nonempty+1:npops,:) = []; - - TRAIN_CQ_COUNTS(:,:,n_train_clusters+n_nonempty+1:npops) = []; - TRAIN_CQ_SUMCOUNTS(n_train_clusters+n_nonempty+1:npops,:) = []; - TRAIN_SP_COUNTS(:,:,n_train_clusters+n_nonempty+1:npops) = []; - TRAIN_SP_SUMCOUNTS(n_train_clusters+n_nonempty+1:npops,:) = []; - - LOGDIFF(:,n_train_clusters+n_nonempty+1:npops) = []; - - new_npops = n_train_clusters+n_nonempty; -end - - - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex, n_train_pops) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. - -% global PARTITION; -% apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -% if apu -% % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. -% npops = length(unique(PARTITION)); -% partitionSummary(worstIndex,1) = npops; -% partitionSummary(worstIndex,2) = logml; -% added = 1; -% else -% added = 0; -% end - -% Modified by Lu Cheng, 11.03.2010 -% The population number should be greater than or equal to the training -% population number -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - npops = sum(unique(PARTITION)>n_train_pops)+n_train_pops; - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = initialCounts(ind_counts) - -global PARTITION; - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = (PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = calCounts(ind_counts, npops) -% This function calculates the counts for each cluster from the sample data -% 'ind_counts', which are the counts for each individual -% The input 'npops' is a user defined value, which means the max number of -% potential populations in the data. Usually it is bigger than the number -% of populations given by 'PARTITION'. - -% modified by Lu Cheng, 08.03.2010 - -global PARTITION; % n_samples * 1 vector, indicate the cluster_id for each sample - -pops = unique(PARTITION); -pops_len = length(pops); - -[n_alle n_loci n_inds] = size(ind_counts); -counts = zeros(n_alle, n_loci, npops,'uint16'); -sumcounts = zeros(npops, n_loci,'uint16'); - -for i = 1:pops_len - cluster = pops(i); - inds = (PARTITION == cluster); - counts(:,:,cluster) = sum(ind_counts(:,:,inds), 3); - sumcounts(cluster,:) = squeeze(sum(counts(:,:,cluster),1)); -end - -%-------------------------------------------------------------------------- - -function logml = computeLogml(adjprior_cq, adjprior_sp) - -% calculate the sum logml for all the populations - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -global TRAIN_CQ_COUNTS; -global TRAIN_SP_COUNTS; -global TRAIN_CQ_SUMCOUNTS; -global TRAIN_SP_SUMCOUNTS; - -npops = size(TRAIN_CQ_COUNTS,3); % Modified by Lu Cheng, 08.03.2010 -cq_counts = TRAIN_CQ_COUNTS + double(CQ_COUNTS); -cq_sumcounts = TRAIN_CQ_SUMCOUNTS + double(CQ_SUMCOUNTS); -sp_counts = TRAIN_SP_COUNTS + double(SP_COUNTS); -sp_sumcounts = TRAIN_SP_SUMCOUNTS + double(SP_SUMCOUNTS); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -% calculate logml for each population given by pops -% returns a length(pops)*1 vector - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -global TRAIN_CQ_COUNTS; -global TRAIN_SP_COUNTS; -global TRAIN_CQ_SUMCOUNTS; -global TRAIN_SP_SUMCOUNTS; - -%npops = size(TRAIN_CQ_COUNTS,3); % Modified by Lu Cheng, 08.03.2010 -cq_counts = TRAIN_CQ_COUNTS + double(CQ_COUNTS); -cq_sumcounts = TRAIN_CQ_SUMCOUNTS + double(CQ_SUMCOUNTS); -sp_counts = TRAIN_SP_COUNTS + double(SP_COUNTS); -sp_sumcounts = TRAIN_SP_SUMCOUNTS + double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS,1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - - diff --git a/matlab/independent/semi_linkageMixture_speed.m b/matlab/independent/semi_linkageMixture_speed.m deleted file mode 100644 index 07083d7..0000000 --- a/matlab/independent/semi_linkageMixture_speed.m +++ /dev/null @@ -1,651 +0,0 @@ -function semi_linkageMixture_speed(c_train, c_test) -% This function process adjusts the priors of the training data accoring to -% the test data. Based on the adjusted priors, the test data is clustered. - -% modified from linkageMixture_speed.m by Lu Cheng, 16.02.2010 - -% Update by Lu Cheng, 07.03.2011 -% case of only 1 sample in the test data has been handled - -% added by Lu Cheng, 11.03.2010 -global SCRIPT_MODE; -global PARAMETERS; -if isempty(SCRIPT_MODE) - SCRIPT_MODE = false; -end -% ----------------- - - -%% compare the training data and test data, adjust priors - -%1% Compare the training data and test data to adjust the prior -if ~all(all(c_train.component_mat == c_test.component_mat)) - disp('The gene lengths are different between the training data and the test data!'); - return; -end - -flag = false; % whether the trained priors should be adjusted -n_loci = size(c_train.alleleCodes,2); - -if c_train.rowsFromInd ~= c_test.rowsFromInd - error('Inconsistant rows from each individual. Train: %d Test: %d. Quit! \n', ... - c_train.rowsFromInd, c_test.rowsFromInd); - return; -elseif c_train.rowsFromInd > 1 - error('Data must be haploid. Quit! rowsFromInd: %d.\n', c_train.rowsFromInd); - return; -end - -for i=1:n_loci - if flag; break; end - a = setdiff(c_test.alleleCodes(:,i),c_train.alleleCodes(:,i)); - a = a(a~=0); - if ~isempty(a) - flag = true; - fprintf('New alleles are detected in the test data at loci: %d\n',i); - fprintf('The processing time will be much longer than usual. \n'); - end -end - -%2% reprocess the data for clustering -if flag - - % combing the training data and test data, adjust the priors - combine_data = [c_train.data; c_test.data]; - n_train = size(c_train.data,1); - n_samples = size(combine_data,1); - combine_data(:,end) = (1:n_samples)'; - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(combine_data); - - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, c_train.component_mat,'codon'); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - % Count the data - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - - counts_cq = uint16(counts_cq); - counts_sp = uint16(counts_sp); - - c_train.adjprior = adjprior; - - c_train.counts_cq = counts_cq(:,:,1:n_train); - c_train.counts_sp = counts_sp(:,:,1:n_train); - - c_test.counts_cq = counts_cq(:,:,n_train+1:end); - c_test.counts_sp = counts_sp(:,:,n_train+1:end); - - c_train.adjprior_cq = adjprior_cq; - c_train.adjprior_sp = adjprior_sp; - - c_train.alleleCodes = alleleCodes; - c_train.noalle = noalle; - - clear data rowsFromInd alleleCodes noalle adjprior priorTerm index - clear data_clique data_separator noalle_clique noalle_separator - clear counts_cq counts_sp nalleles_cq nalleles_sp prior_cq prior_sp adjprior_cq adjprior_sp - -else - - % KEY: adjust the test data to fit the configuration of the training data - % the 'codes_cq' and 'codes_sp' are directly translated from DNA sequence - % SEE 'i_encode_n.m' under the linkage folder - - num_cq = size(c_train.counts_cq, 1); num_sp = size(c_train.counts_sp, 1); - n_loci_cq = size(c_train.counts_cq, 2); n_loci_sp = size(c_train.counts_sp, 2); - n_inds = size(c_test.counts_cq, 3); - - counts_cq = zeros(num_cq, n_loci_cq, n_inds); - counts_sp = zeros(num_sp, n_loci_sp, n_inds); - - % mapping the indexes of cliques and separators of the test data to the - % indexes of the training data - for k = 1:n_inds - for j = 1:n_loci_cq - [c, ia, ib] = intersect(c_test.codes_cq{j}, c_train.codes_cq{j},'rows'); - counts_cq(ib,j,k) = c_test.counts_cq(ia,j,k); - end - - for j = 1:n_loci_sp - [c, ia, ib] = intersect(c_test.codes_sp{j}, c_train.codes_sp{j},'rows'); - counts_sp(ib,j,k) = c_test.counts_sp(ia,j,k); - end - end - - c_test.counts_cq = counts_cq; - c_test.counts_sp = counts_sp; - - clear c ia ib k j i; - -end - -%% cluster the test data - -% case of only 1 sample in the test data, added by Lu Cheng, 07.03.2011 -if size(c_test.data,1)~=1 - [Z,dist] = newGetDistances(c_test.data, c_test.rowsFromInd); - c_test.Z = Z; - c_test.dist = dist; -end - -clear Z dist; - -message = cat(2,'There are currently ',num2str(length(unique(c_train.cluster_labels))),' clusters in the training data, please input upper bounds of cluster numbers in the test data.'); - -if SCRIPT_MODE - cluster_nums = str2num(PARAMETERS.cluster_num_upperbounds); -else - cluster_nums = inputdlg(message); - if isempty(cluster_nums) == 1 - return; - else - cluster_nums = str2num(cluster_nums{:}); - end -end - -% % Test purpose, Check the input data, there should be 1 allele in each loci (column) -% % Lu Cheng, 25.02.2010 -% if ~all(all(squeeze(sum(c_train.counts_cq,1)))) -% disp('Missing cq value of some sample in counts_cq of the training data'); -% return; -% elseif ~all(all(squeeze(sum(c_train.counts_sp,1)))) -% disp('Missing sp value of some sample in counts_sp of the training data'); -% return; -% elseif ~all(all(squeeze(sum(c_test.counts_cq,1)))) -% disp('Missing cq value of some sample in counts_cq of the test data'); -% return; -% elseif ~all(all(squeeze(sum(c_test.counts_sp,1)))) -% disp('Missing sp value of some sample in counts_sp of the test data'); -% return; -% end - -tic -semi_res = semi_linkageMix(c_train, c_test, cluster_nums); -toc - -semi_res.popnames = c_test.popnames; - -writeMixtureInfo(semi_res); - -% save the results -if SCRIPT_MODE - save_results = PARAMETERS.save_results; -else - save_results = questdlg('Do you wish to save the results?',... - 'Save Results','Yes','No','Yes'); -end - -if isequal(save_results,'Yes') - if SCRIPT_MODE - save(PARAMETERS.result_file,'semi_res','-v7.3'); - else - [filename, pathname] = uiputfile('*.mat','Save the results as'); - if (sum(filename)==0) || (sum(pathname)==0) - % do nothing - else - save(strcat(pathname,filename),'semi_res','-v7.3'); - end - end -end; - -% ----------------------------------------------------------------------- - - - -%-------------------------------------------------------------------------- -%% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); - -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -%------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(double(ninds),2); - -% empties = find(data<0); -% data(empties)=0; -data(logical(data<0)) = 0; -data = uint16(data); - -pariTaulu = zeros(riviLkm,2); -aPointer=1; - -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; - pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); - aPointer = aPointer+double(ninds-a); -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -eka = uint16(eka); -toka = uint16(toka); - -clear pariTaulu; clear miinus; - -summa = uint16(zeros(riviLkm,1)); -vertailuja = uint16(zeros(riviLkm,1)); - -x = zeros(size(eka)); x = uint16(x); -y = zeros(size(toka)); y = uint16(y); -% fprintf(1,'%%10'); -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa + uint16(lisays); - end - end - % fprintf(1,'\b\b'); - % fprintf(1,'%d',floor(10+80*j/nloci)); -end - -clear x; clear y; clear vertailutNyt; -clear eka; clear toka; clear data; clear lisays; -dist = zeros(length(vertailuja),1); -% nollat = find(vertailuja==0); -% dist(nollat) = 1; -dist(logical(vertailuja==0)) = 1; -muut = find(vertailuja>0); -dist(muut) = double(summa(muut))./double(vertailuja(muut)); -clear summa; clear vertailuja; clear muut; - -Z = computeLinkage(dist'); -% fprintf(1,'\b\b'); -% fprintf(1,'%d\n',100); -%-------------------------------------------------------------------------- - -function writeMixtureInfo(c) - -outputFile = 'baps5_semi_output.txt'; - -% output the semi-supervised clustering results to the outputFile -% modified by Lu Cheng, 28.03.2010 - -ninds = length(c.PARTITION); -npops = c.npops; -popnames = c.popnames; -logml = c.logml; -partition = c.PARTITION; -partitionSummary = c.partitionSummary; - -if ~isempty(outputFile) - fid = fopen(outputFile,'w'); -else - fid = -1; - %diary('baps5_semi_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); - fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); - fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); - fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -end - -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); -end -for m=1:npops - indsInM = find(partition==m); - - if isempty(indsInM) - continue; - end - - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - text = [text '}']; - - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - end - end; -end - -names = true; - -clusterProbTable = c.clusterProbTable; -if npops == 1 - clusterProbTable = []; -else - disp(''); - disp('Posterior probability of assignment into clusters:'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Posterior probability of assignment into clusters: '); fprintf(fid, '\n'); - end - - text = sprintf('%10s','ind'); - for ii = 1:npops - tmpstr = sprintf('%10s',num2str(ii)); - text = [text tmpstr]; - end - - disp(text); - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - - for ii = 1:ninds - text = sprintf('%10s',popnames{ii}{:}); - for jj = 1:npops - tmpstr = sprintf('%10s',num2str(clusterProbTable(ii,jj),'%10.6f')); - text = [text tmpstr]; - end - - if ii<100 - disp(text); - elseif ii==101 - disp('.......................................'); - disp('..........see output file..............'); - end - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - text = []; - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n\n', ' '); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n=0)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %K�y l�pi ne yksil�t, joilta puuttuu rivejï¿? - miss = maxRowsFromInd-rowsFromInd(ind); % T�ltï¿?yksil�ltï¿?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksil�iden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - -%-------------------------------------------------------------------------- diff --git a/matlab/independent/tmpscript.txt b/matlab/independent/tmpscript.txt deleted file mode 100644 index 9937a8e..0000000 --- a/matlab/independent/tmpscript.txt +++ /dev/null @@ -1,5 +0,0 @@ -datafile('train','C:\BAPS5\burk_test\train_data.mat') -datafile('test','C:\BAPS5\burk_test\testdata\testdata_51_5.xls') -savePreproFile('test','C:\BAPS5\burk_test\testpreproc\testpreproc_51_5.mat') -setK('16') -outputmat('C:\BAPS5\burk_test\testres\testres_51_5.mat') diff --git a/matlab/independent/trainedMix.m b/matlab/independent/trainedMix.m deleted file mode 100644 index 0869457..0000000 --- a/matlab/independent/trainedMix.m +++ /dev/null @@ -1,2293 +0,0 @@ -function trainedMix - -% LASKENNAN ALKUARVOJEN M��RITT�MINEN - -global SCRIPT_MODE; - -if isempty(SCRIPT_MODE) - SCRIPT_MODE = false; -end - -if SCRIPT_MODE - input_type = 'MLST-format'; -else - input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'MLST-format', 'GenePop-format','MLST-format'); -end - -switch input_type - case 'MLST-format' - disp('MLST-format'); - processMLST - case 'GenePop-format' - disp('GenePop-format'); - processGenePop -end - -%-------------------------------------------------------------------------- - -function processMLST -% note that this version only works for windows with Excel installed -% Lu Cheng, 02.02.2010 -% lu.cheng@helsinki.fi - -% added by Lu Cheng, 11.03.2010 -global SCRIPT_MODE; -global PARAMETERS; -if isempty(SCRIPT_MODE) - SCRIPT_MODE = false; -end -% ---------- - -tmp_train_file = 'tmp8972_train.xls'; -if exist(tmp_train_file,'file')==2 - delete(tmp_train_file); -end - -%% process both the training data and test data - -% Format of the training excel file -% column 1: sample ID -% column 2: cluster label of each sample, an integer from 1 to K -% column 3-n: sequences of each gene -format1 = 'MS EXCEL FORMAT'; -format2 = 'PREPROCESSED FORMAT'; - -if SCRIPT_MODE - if isequal(PARAMETERS.train_file_format,'.xls') - input_type = format1; - elseif isequal(PARAMETERS.train_file_format,'.mat') - input_type = format2; - end -else - input_type = questdlg('Specify the format of your training data: ',... - 'Specify Data Format', format1, format2, format1); -end - -switch input_type - case format1 - - if SCRIPT_MODE - trained_file = PARAMETERS.train_file_name; - else - [filename, pathname] = uigetfile('*.xls', strcat('Load training data in',' ',format1)); - if filename==0 - return; - end - trained_file = strcat(pathname,filename); - end - - [A B] = xlsread(trained_file); - if size(B,1) == length(A)+1 - B(2:end,1) = num2cell(A(:,1)); - else - B(:,1) = num2cell(A(:,1)); - end - - train_xls = B(:,[1 3:end]); - cluster_labels = A(:,2); - - % the unique labels should be tightly from 1 to K - % added by Lu Cheng, 22.06.2010 - unique_labels = unique(cluster_labels); - if max(unique_labels)~=length(unique_labels) - error('The cluster labels are wrong, should be from 1 to %s !', num2str(length(unique_labels))); - end - - xlswrite(tmp_train_file,train_xls); - clear A B trained_file unique_labels - - c_train = preprocessXLS(tmp_train_file); - c_train.cluster_labels = cluster_labels; - delete(tmp_train_file); - - if SCRIPT_MODE - save_preproc = PARAMETERS.save_prepro_train_data; - else - save_preproc = questdlg('Do you wish to save the pre-processed training data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - end - - if isequal(save_preproc,'Yes'); - if SCRIPT_MODE -% save(PARAMETERS.train_prepro_file,'c_train'); - save(PARAMETERS.train_prepro_file,'c_train', '-v7.3'); % added by Lu Cheng, 08.06.2012 - else - [filename, pathname] = uiputfile('*.mat','Save pre-processed training data as'); - if (sum(filename)==0) || (sum(pathname)==0) - % do nothing - else -% save(strcat(pathname,filename,'.mat'),'c_train'); - save(strcat(pathname,filename,'.mat'),'c_train','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - end - end; - - case format2 - disp(format2); - - if SCRIPT_MODE - trained_file = PARAMETERS.train_file_name; - else - [filename, pathname] = uigetfile('*.mat', strcat('Load training data in',' ',format2)); - if filename==0 - return; - end - trained_file = strcat(pathname,filename); - end - - clear c_train - load('-mat',trained_file); - - otherwise - return; -end - -%% process with test data - -if SCRIPT_MODE - if isequal(PARAMETERS.test_file_format,'.xls') - input_type = format1; - elseif isequal(PARAMETERS.test_file_format,'.mat') - input_type = format2; - end -else - input_type = questdlg('Specify the format of your test data: ',... - 'Specify Data Format', format1, format2, format1); -end - -switch input_type - case format1 - if SCRIPT_MODE - test_file = PARAMETERS.test_file_name; - else - [filename, pathname] = uigetfile('*.xls', 'Load test data (unlabeled) in MLST-format'); - if filename==0 - return; - end - test_file = strcat(pathname,filename); - end - c_test = preprocessXLS(test_file,c_train); - - if SCRIPT_MODE - save_preproc = PARAMETERS.save_prepro_test_data; - else - save_preproc = questdlg('Do you wish to save the pre-processed test data?',... - 'Save pre-processed data?','Yes','No','Yes'); - end - - if isequal(save_preproc,'Yes'); - if SCRIPT_MODE -% save(PARAMETERS.test_prepro_file,'c_test'); - save(PARAMETERS.test_prepro_file,'c_test','-v7.3'); % added by Lu Cheng, 08.06.2012 - else - [filename, pathname] = uiputfile('*.mat','Save pre-processed test data as'); - if (sum(filename)==0) || (sum(pathname)==0) - % do nothing - else -% save(strcat(pathname,filename,'.mat'),'c_test'); - save(strcat(pathname,filename,'.mat'),'c_test','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - end - end; - - case format2 - if SCRIPT_MODE - test_file = PARAMETERS.test_file_name; - else - [filename, pathname] = uigetfile('*.mat', cat(2,'Load test data (unlabeled) in ',format2)); - if filename==0 - return; - end - test_file = strcat(pathname,filename); - end - load('-mat',test_file,'c_test'); - - otherwise - return; -end - -%% compare the preprocessed training and test data and further steps - -semi_linkageMixture_speed(c_train, c_test); - -%-------------------------------------------------------------------------- -function processGenePop -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global ADJPRIOR; -global PRIORTERM; -global SUMPRIOR; -global LOGDIFF; -clearGlobalVars; - -[filename, pathname] = uigetfile('*.txt', 'Load prior data in GenePop-format'); -if filename==0 - return; -end -kunnossa = testaaGenePopData([pathname filename]); -if kunnossa==0 - return -end - -waitALittle; -[filename2, pathname2] = uigetfile('*.txt', 'Load sampling units in GenePop-format'); -if filename2==0 - return; -end -kunnossa = testaaGenePopData([pathname2 filename2]); -if kunnossa==0 - return -end -clear kunnossa; - -[pData, pNames, pIndNames]=lueGenePopDataPop([pathname filename]); -[suData, suNames, suIndNames] = lueGenePopDataPop([pathname2 filename2]); -if size(pData,2) ~= size(suData,2) - disp('Incorrect input'); - return -end -inp = [filename ' & ' filename2]; -h0 = findobj('Tag','filename1_text'); -set(h0,'String',inp); -clear h0; clear inp; -clear filename; clear filename2; clear pathname; clear pathname2; - -[alleleCodes, noalle, suData, pData] = examineAlleles(suData, pData); - -rows = initializeRows(suData); % Samplin unit:ien rivit kertova muuttuja. -rowsFromInd = 2; %Tiedet��n GenePop:in tapauksessa. -data = suData(:,1:end-1); %Klusteroitavat "yksil�t" -priorLastCol = pData(:,end); -priorPartition = priorLastCol(1:rowsFromInd:end); % Prioriyksil�iden partitio -clear suData; clear priorLastCol; %Ei tarvita. Kai...? - -npopstext = []; -ready = false; -teksti = 'Input upper bound to the number of populations (possibly multiple values): '; -while ready == false - npopstextExtra = inputdlg(teksti ,... - 'Input maximum number of populations',1,{'20'}); - if isempty(npopstextExtra) % Painettu Cancel:ia - return - end - npopstextExtra = npopstextExtra{1}; - if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; - else - npopstext = [npopstext ' ' npopstextExtra]; - ready = true; - end -end -clear ready; clear teksti; -if isempty(npopstext) | length(npopstext)==1 - return -else - npopsTaulu = str2num(npopstext); - clear npopstext; - if length(npopsTaulu)<1 - disp('Incorrect input'); - return - end - if any(npopsTaulu < size(pNames,1)) - disp('Incorrect input'); - return - end -end - -nruns = length(npopsTaulu); - -logmlBest = -1e50; -partitionSummary = -1e50*ones(30,2); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1) = zeros(30,1); -worstLogml = -1e50; worstIndex = 1; -Z = []; -for run = 1:nruns - npops = npopsTaulu(run); - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - - disp(['Simulation started with ' num2str(npops) ' initial populations.']); - adjprior = computePriors(pData, npops, noalle); %adjprior on yhden populaation, jossa ei havaintoja. - COUNTS = zeros(size(ADJPRIOR)); - SUMCOUNTS = zeros(size(SUMPRIOR)); - POP_LOGML = zeros(npops,1); - - POP_LOGML = computePopulationLogml(1:npops); - logml = initialPopCounts(data, npops, rows, noalle); %Alustetaan COUNTS, PARTITION ... - - if isempty(Z) % Lasketaan vain ensimm�isellï¿?kierroksella. - if size(rows,1)==1 - Z = []; - dist = []; - else - [Z,dist] = getPopDistancesByKL(data, rows, noalle, adjprior); %Lasketaan sampling unit:ien v�liset et�isyydet. - end - end - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - - % PARHAAN MIXTURE-PARTITION ETSIMINEN - nRoundTypes = 7; - kokeiltu = zeros(nRoundTypes, 1); - - roundTypes = [1 1]; %Ykk�svaiheen sykli kahteen kertaan. - ready = 0; vaihe = 1; - ninds = length(PARTITION); % num of sampling units - LOGDIFF = repmat(-Inf,ninds,npops); - - disp(' '); - while ready ~= 1 - muutoksia = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - - round = roundTypes(n); - kivaluku=0; - - if kokeiltu(round) == 1 - - elseif round==0 | round==1 %Yksil�n siirt�minen toiseen populaatioon. - inds = 1:ninds; - aputaulu = [inds' rand(ninds,1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - - muutosNyt = 0; - for ind = inds - i1 = PARTITION(ind); - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data); - - if round==1, [maxMuutos, i2] = max(muutokset); - end - - if (i1~=i2 & maxMuutos>1e-5) - % Tapahtui muutos - if muutosNyt == 0 - disp('Action 1'); - muutosNyt = 1; - kokeiltu = zeros(nRoundTypes,1); - end - muutoksia = 1; - kivaluku = kivaluku+1; - updateGlobalVariables(ind, i2, diffInCounts); - logml = logml+maxMuutos; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - end - end - if muutosNyt == 0 - kokeiltu(round) = 1; - end - - elseif round==2 & ~isempty(dist) %Populaation yhdist�minen toiseen. - maxMuutos = 0; - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data); - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - if maxMuutos>1e-5 - muutoksia = 1; - disp('Action 2'); - kokeiltu = zeros(nRoundTypes,1); - updateGlobalVariables2(i1,i2, diffInCountsBest); - logml = logml + maxMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - kokeiltu(round) = 1; - end - - - elseif (round==3 | round==4) & ~isempty(dist)%Populaation jakaminen osiin. - maxMuutos = 0; - ninds = size(rows,1); - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = min(20, floor(ninds2 / 5)); - elseif round==4 - npops2 = 2; %Moneenko osaan jaetaan - end - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rows, data, pop); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - disp(['Action ' num2str(round)]); - kokeiltu = zeros(nRoundTypes,1); - %rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat)); - rivit = []; - for ind = muuttuvat - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit; lisa']; - %rivit = [rivit; rows(ind)']; - end - diffInCounts = computeDiffInCounts(rivit', size(COUNTS,1), ... - size(COUNTS,2), data); - i1 = PARTITION(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, diffInCounts, i2); - logml = logml + maxMuutos; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - kokeiltu(round)=1; - end - - - elseif round == 5 & ~isempty(dist) - % K�y l�pi populaatioita. - % Yritï¿?poistaa niistï¿?yksil�itï¿?yksi - % kerrallaan. Lopeta heti, kun jonkin - % yksil�iden joukon poistaminen jostain - % populaatiosta aiheuttaa positiivisen - % muutoksen logml:��n. - - pop=0; - muutettu = 0; - poplogml = POP_LOGML; partition = PARTITION; - counts = COUNTS; sumcounts = SUMCOUNTS; - logdiff = LOGDIFF; - - while (pop < npops & muutettu == 0) - pop = pop+1; - totalMuutos = 0; - inds = find(PARTITION==pop)'; - inds = returnInOrder(inds, pop, rows, data); - - i=0; - while (length(inds)>0 & i 0 - i = i+1; - ind = inds(i); - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, data); - muutokset(pop) = -1e50; % Varmasti ei suurin!!! - [maxMuutos, i2] = max(muutokset); - updateGlobalVariables(ind, i2, diffInCounts); - totalMuutos = totalMuutos+maxMuutos; - logml = logml+maxMuutos; - end - - if totalMuutos>1e-5 - disp('action 5'); - muutettu=1; - kokeiltu = zeros(nRoundTypes,1); - muutoksia = 1; % Ulompi kirjanpito. - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) [worstLogml, worstIndex] = min(partitionSummary(:,2)); end - end - else - % Miss��n vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION = partition; - SUMCOUNTS = sumcounts; - POP_LOGML = poplogml; - COUNTS = counts; - LOGDIFF = logdiff; - logml = logml - totalMuutos; - kokeiltu(round)=1; - end - end - clear partition; clear sumcounts; clear counts; clear poplogml; - end - end - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes=[2 1]; - elseif vaihe==3 - roundTypes=[5 4 3 1 2]; - end - end - end - - - % TALLENNETAAN - - prioriPopLkm = size(pNames,1); - npops = poistaTyhjatPopulaatiot(prioriPopLkm); - POP_LOGML = computePopulationLogml(1:npops); - - n_clust_with_su = length(unique(PARTITION)); - disp(['Found partition with sampling units in ' num2str(n_clust_with_su) ' clusters.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - if logml>logmlBest - % P�ivitet��n parasta l�ydettyï¿?partitiota. - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - countsBest = COUNTS; - sumCountsBest = SUMCOUNTS; - pop_logmlBest = POP_LOGML; - adjPriorBest = ADJPRIOR; - priorTermBest = PRIORTERM; - sumPriorBest = SUMPRIOR; - logdiffbest = LOGDIFF; - end -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -COUNTS = countsBest; -SUMCOUNTS = sumCountsBest; -POP_LOGML = pop_logmlBest; -ADJPRIOR = adjPriorBest; -PRIORTERM = priorTermBest; -SUMPRIOR = sumPriorBest; -LOGDIFF = logdiffbest; - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); outp = get(h0,'String'); -writeTrainedMixtureInfo(logml, rows, data, outp, inp, ... - suIndNames, suNames, pIndNames, pNames, partitionSummary); - -fiksaaPartitioYksiloTasolle(rows, rowsFromInd); -[data, popnames] = muokkaaMuuttujat(adjprior, rowsFromInd, ... - pNames, suNames, priorPartition, pData, data); -viewMixPartition(PARTITION, popnames); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior = adjprior; - c.rowsFromInd = rowsFromInd; c.popnames = popnames; - c.data = data; c.npops = npops; c.noalle = noalle; - c.mixtureType = 'trained'; -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 -else - if exist('baps4_output.baps','file') - delete('baps4_output.baps') - end -end - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedet��n, ettï¿?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssï¿?ei vielï¿?ole -% annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja -% nykyistï¿?partitiota vastaava nclusters:in arvo. Muutoin ei tehdï¿?mit��n. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt l�ydetty partitio ei ole vielï¿?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - - -function [data, popnames] = muokkaaMuuttujat(adjprior, rowsFromInd, ... - pNames, suNames, priorPartition, pData, data) -% Muokkaa kaikki tarvittavat muuttujat mixture result-file -% muotoisiksi. - -global PARTITION; global COUNTS; -global SUMCOUNTS; global ADJPRIOR; -nloci = size(data,2); -npops = size(COUNTS, 3); - -data = [pData(:,1:nloci) ; data]; -PARTITION = [priorPartition; PARTITION]; -priorCounts = ADJPRIOR-repmat(adjprior, [1 1 npops]); -COUNTS = COUNTS+priorCounts; -SUMCOUNTS = (squeeze(sum(COUNTS)))'; - -priorNinds = length(priorPartition); -for k = 1:size(suNames,1) - suNames{k,2} = suNames{k,2} + priorNinds; -end -popnames = [pNames; suNames]; - - -%------------------------------------------------------------------------- - - -function [alleleCodes, noalle, suData, pData] = examineAlleles(suData, pData) -% Poistetaan nollat molemmista datoista. Selvitet��n noalle ja -% alleleCodes ja muutetaan molemmat datat vastaamaan alleleCodes:ia. -% T�ssï¿?vaiheessa datojen viimeinen sarake kertoo yksik�n, jolle -% rivi kuuluu. -data = [pData; suData]; -nrows_prior = size(pData,1); -nloci = size(suData,2)-1; - -dataApu = data(:,1:nloci); %poistetaan nollat -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); %selvitet��n noalle -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end - -alleleCodes = zeros(max(noalle),nloci); %selvitet��n alleleCodes -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci %muutetaan alleelien koodit vastaamaan alleleCodes:ia - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; -pData = data(1:nrows_prior , :); -suData = data(nrows_prior+1:end , :); - - -%---------------------------------------------------------------------- - -function adjprior = computePriors(pData, npops, noalle) -global ADJPRIOR; -global SUMPRIOR; -global PRIORTERM; -nloci = size(pData,2)-1; -max_noalle = max(noalle); -ADJPRIOR = zeros(max_noalle, nloci, npops); -PRIORTERM = zeros(npops, 1); -SUMPRIOR = zeros(npops, nloci); - -adjprior = zeros(max_noalle,nloci); -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; -end - -data = pData(:, 1:nloci); -for i = 1:npops - rivit = find(pData(:,end) == i)'; %Pit�� olla vaakavektori. - if ~isempty(rivit) - diffInCounts = computeDiffInCounts(rivit, max_noalle, nloci, data); - ADJPRIOR(:,:,i) = diffInCounts; - end - ADJPRIOR(:,:,i) = ADJPRIOR(:,:,i) + adjprior; - for j=1:nloci - SUMPRIOR(i,j) = sum(squeeze(ADJPRIOR(1:noalle(j), j , i))); - PRIORTERM(i) = PRIORTERM(i)+gammaln(SUMPRIOR(i,j)); - PRIORTERM(i) = PRIORTERM(i)-sum(gammaln(squeeze(ADJPRIOR(1:noalle(j),j,i)))); - end -end - - -%-------------------------------------------------------------- - -function rows = initializeRows(data) -% Lasketaan rows-muuttuja. T�ssï¿?vaiheessa datan -% viimeisessï¿?sarakkeessa on vielï¿?yksik�n kertova -% indeksi. -nind = max(data(:,end)); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end - - -%---------------------------------------------------------------- - - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global ADJPRIOR; ADJPRIOR = []; -global PRIORTERM; PRIORTERM = []; -global SUMPRIOR; SUMPRIOR = []; -global LOGDIFF; LOGDIFF = []; - - -%-------------------------------------------------------------------- - -function [Z,distances] = getPopDistancesByKL(data, rows, noalle, adjprior) -% Laskee populaatioille et�isyydet -% k�ytt�en KL-divergenssiï¿? - -npops = size(rows,1); %Samplin unit:tien lkm -nloci=size(data,2); -maxnoalle = max(noalle); -counts = zeros(maxnoalle,nloci,npops); % Tilap�istï¿?k�ytt�� varten -sumcounts = zeros(npops,nloci); - -for i=1:npops - for j=1:nloci - i_rivit = rows(i,1):rows(i,2); - havainnotLokuksessa = find(data(i_rivit,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(i_rivit,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -distances = zeros(nchoosek(npops,2),1); - -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yhtï¿?alleelia. -prior(1,nollia)=1; - -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(counts(:,:,pop1))+prior) ./ repmat(sum(squeeze(counts(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -pointer = 1; -for pop1 = 1:npops-1 - for pop2 = pop1+1:npops - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - distances(pointer) = div; - pointer = pointer+1; - end -end -Z=linkage(distances'); - -%-------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------- - - -function logml = initialPopCounts(data, npops, rows, noalle) - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global POP_LOGML; -global ADJPRIOR; -global SUMPRIOR; -nloci=size(data,2); -ninds = size(rows,1); -COUNTS = zeros(max(noalle),nloci,npops); -SUMCOUNTS = zeros(npops,nloci); -PARTITION = zeros(1,ninds); - -inds = 1:ninds; -aputaulu = [inds' rand(ninds,1)]; -aputaulu = sortrows(aputaulu,2); -inds = aputaulu(:,1)'; -%omaPartitio = 1:6; %POIS!!!!!!!! -%omaPartitio = omaPartitio'; -%omaPartitio = omaPartitio(:,ones(30,1)); -%omaPartitio = omaPartitio'; -%omaPartitio = omaPartitio(:); %POIS -%keyboard; -for ind = inds % Sijoitetaan yksil�t yksi kerrallaan. - [muutokset, diffInCounts] = ... - laskePrioriMuutokset(ind, rows, data); - [maxMuutos, i2] = max(muutokset); - %i2 = omaPartitio(ind) %POIS - PARTITION(ind) = i2; - COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - POP_LOGML(i2) = computePopulationLogml(i2); - -end - -logml = laskeLoggis(COUNTS, SUMCOUNTS, ADJPRIOR, SUMPRIOR); - -%keyboard; - -%----------------------------------------------------------------------- - - -function loggis = laskeLoggis(counts, sumcounts, adjprior, sumprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+adjprior)))) ... - - sum(sum(sum(gammaln(adjprior)))) ... - - sum(sum(gammaln(sumcounts+sumprior))) ... - + sum(sum(gammaln(sumprior))); -loggis = logml2; - - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimm�inen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi t�ll�in lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin neljï¿?t�ytyy sis�lt�� pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedet��n, ettï¿?pys�htyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun j�lkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan j�lkeen - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin t�ytyy sis�lt�� pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedet��n, ettï¿?pys�htyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun j�lkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%-------------------------------------------------------------------- - - -function [data, popnames, indnames] = lueGenePopDataPop(tiedostonNimi) -% Data annetaan muodossa, jossa viimeinen sarake kertoo ryhm�n. -% popnames on kuten ennenkin. - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimm�inen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -indnames = cell(100,1); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %N�in se on greedyMix:iss�kin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - if ninds>size(indnames,1) - indnames = [indnames; cell(100,1)]; - end - indnames{ninds} = {nimi}; - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - nimi = lueNimi(line); - data = addAlleles(data, ninds, line, divider); - - if ninds>size(indnames,1) - indnames = [indnames; cell(100,1)]; - end - indnames{ninds} = {nimi}; - end -end - -indnames = indnames(1:ninds); -fclose(fid); -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -npops = size(popnames,1); -ind = 1; -for pop = 1:npops - if pop=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - -%------------------------------------------------------------------------ - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts) -% Suorittaa globaalien muuttujien muutokset, kun yksilï¿?ind -% on siirret��n koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2]); - - -%-------------------------------------------------------------------------- -%-- - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, globalRows, data); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikï¿?olisi -% muutos logml:ssï¿? mik�li korin i1 kaikki yksil�t siirret��n -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = []; -for ind = inds - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; -end - -diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, diffInCounts); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksil�t siirret��n koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2); - - -%-------------------------------------------------------------------------- -%---- - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mikï¿?olisi muutos logml:ssï¿? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirret��n koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for ind = inds - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, i2); -% Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' -% siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2]); - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 -% v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��rï¿? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(prioriPopLkm) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista, ADJPRIOR:ista ja SUMPRIOR:ista. -% P�ivitt�� npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global ADJPRIOR; -global SUMPRIOR; -global LOGDIFF; - -notEmpty = union(find(any(SUMCOUNTS,2)) , 1:prioriPopLkm); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -ADJPRIOR = ADJPRIOR(:,:,notEmpty); -SUMPRIOR = SUMPRIOR(notEmpty,:); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile) - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = 0; -end - - -%------------------------------------------------------------------------- - - -function [popnames2, rowsFromInd] = findOutRowsFromInd(popnames, rows) - -ploidisuus = questdlg('Specify the type of individuals in the data: ',... - 'Individual type?', 'Haploid', 'Diploid', 'Tetraploid', ... - 'Diploid'); - -switch ploidisuus -case 'Haploid' - rowsFromInd = 1; -case 'Diploid' - rowsFromInd = 2; -case 'Tetraploid' - rowsFromInd = 4; -end - -if ~isempty(popnames) - for i = 1:size(rows,1) - popnames2{i,1} = popnames{i,1}; - rivi = rows(i,1):rows(i,2); - popnames2{i,2} = (rivi(rowsFromInd))/rowsFromInd; - end -else - popnames2 = []; -end - -%------------------------------------------------------------------ - -function fiksaaPartitioYksiloTasolle(rows, rowsFromInd) - -global PARTITION; -totalRows = 0; -for ind = 1:size(rows,1) - totalRows = totalRows + (rows(ind,2)-rows(ind,1)+1); -end -partitio2 = zeros(totalRows/rowsFromInd,1); - -for ind = 1:size(rows,1) - kaikkiRivit = rows(ind,1):rows(ind,2); - for riviNumero = rowsFromInd:rowsFromInd:length(kaikkiRivit) - %for riviNumero = rowsFromInd:rowsFromInd:length(rows{ind}) - %rivi = rows{ind}(riviNumero); - rivi = kaikkiRivit(riviNumero); - partitio2(rivi/rowsFromInd) = PARTITION(ind); - end -end -PARTITION = partitio2; - -%--------------------------------------------------------------- - - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF TRAINED MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered groups: ' ownNum2Str(ninds)]); -disp(['Number of populations having prior information: ' ownNum2Str(size(pNames,1))]); -disp(['In the optimal partition the samling units were in ' ownNum2Str(n_clust_with_su) ' clusters.']); -disp(['Log(marginal likelihood) of the optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', [' ']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['RESULTS OF TRAINED MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of populations having prior information: ' ownNum2Str(size(pNames,1))]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['In the optimal partition the sampling units were in ' ownNum2Str(n_clust_with_su) ' clusters.']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of the optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); - fprintf(fid,'\n'); -end - -%cluster_count = length(unique(PARTITION)); -cluster_count = size(COUNTS,3); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - susInM = find(PARTITION==m); %Sampling units in pop m. - text = ['Cluster ' num2str(m) ': {']; - length_of_beginning = 11 + floor(log10(m)); - - if m < size(pNames,1) - % populaatiolle on allokoitu prioriyksil�itï¿? - text = [text '[']; - k = pNames{m,2}; - text = [text pIndNames{k}{1}]; - for k = pNames{m,2}+1:pNames{m+1,2}-1 - text = [text ', ' pIndNames{k}{1}]; - end - text = [text '], ']; - elseif m == size(pNames,1) - text = [text '[']; - k = pNames{m,2}; - text = [text pIndNames{k}{1}]; - for k = pNames{m,2}+1:length(pIndNames) - text = [text ', ' pIndNames{k}{1}]; - end - text = [text '], ']; - end - - cluster_size = length(susInM); - - for k = 1:cluster_size % K�y l�pi m:��n kuuluvat samling unit:it - text = [text '[']; - su = susInM(k); % sampling unit su kuuluu populaatioon m. - ekaNimi = suNames{su,2}; - if su58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end; -end - -if npops > 1 - - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if sampling unit i is moved to cluster j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if sampling unit i is moved to cluster j:']); fprintf(fid, '\n'); - end - - ekarivi = 'group '; - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - end - - for ind = 1:ninds - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, data); - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end - end - - disp(' '); disp(' '); - disp('KL-divergence matrix in PHYLIP format:'); - dist_mat = zeros(npops, npops); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); - end - - maxnoalle = size(COUNTS,1); - nloci = size(COUNTS,2); - d = zeros(maxnoalle, nloci, npops); - for pop1 = 1:npops - prior = ADJPRIOR(:,:,pop1); - prior(find(prior==1))=0; - nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yhtï¿?alleelia. - prior(1,nollia)=1; - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end - ekarivi = num2str(npops); - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); - end - - for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); -% end - end - -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values:'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values:']); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end - -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('Probabilities for number of clusters: (#clusters: prob)'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters: (#clusters: prob)']); fprintf(fid, '\n'); -end - -npopsTaulu = unique(partitionSummary(:,1)); -len = length(npopsTaulu); -probs = zeros(len,1); -partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); -sumtn = sum(exp(partitionSummary(:,2))); -for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; -end -for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ': ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Py�rist�� luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inï¿? -% yks t�ytyy olla kokonaisluku, joka on -% v�hint��n -1:n suuruinen. Pienemmillï¿? -% luvuilla tapahtuu jokin py�ristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%----------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisessï¿?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:��n asti. -%Tarkastaa lis�ksi, ettï¿?on v�hint��n 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - - - -%-------------------------------------------------------------------------- -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). - -% added by Lu Cheng, without modification, 16.02.2010 - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -% dataApu = []; -% nollat = []; -% isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - %alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(double(ninds),2); - -% empties = find(data<0); -% data(empties)=0; -data(logical(data<0)) = 0; -data = uint16(data); - -pariTaulu = zeros(riviLkm,2); -aPointer=1; - -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; - pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); - aPointer = aPointer+double(ninds-a); -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -eka = uint16(eka); -toka = uint16(toka); - -clear pariTaulu; clear miinus; - -summa = uint16(zeros(riviLkm,1)); -vertailuja = uint16(zeros(riviLkm,1)); - -x = zeros(size(eka)); x = uint16(x); -y = zeros(size(toka)); y = uint16(y); -% fprintf(1,'%%10'); -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa + uint16(lisays); - end - end - % fprintf(1,'\b\b'); - % fprintf(1,'%d',floor(10+80*j/nloci)); -end - -clear x; clear y; clear vertailutNyt; -clear eka; clear toka; clear data; clear lisays; -dist = zeros(length(vertailuja),1); -% nollat = find(vertailuja==0); -% dist(nollat) = 1; -dist(logical(vertailuja==0)) = 1; -muut = find(vertailuja>0); -dist(muut) = double(summa(muut))./double(vertailuja(muut)); -clear summa; clear vertailuja; clear muut; - -Z = computeLinkage(dist'); \ No newline at end of file diff --git a/matlab/linkage/CSEFlagDialog.m b/matlab/linkage/CSEFlagDialog.m deleted file mode 100644 index c03b0e6..0000000 --- a/matlab/linkage/CSEFlagDialog.m +++ /dev/null @@ -1,404 +0,0 @@ -function out = CSEFlagDialog(items, varargin) -%CSEFLAGDIALOG makes a GUI dialog to select from options of diverse types -% (checkboxes, radiobuttons, text-inputs and popup lists). Some options can -% be linked, i.e. be mutually exclusive or be only enabled according to the -% value of another option. -% It is an extension of CSEOptionDialog. -% -% out = CSEFlagDialog(items) -% out = CSEFlagDialog(items, title) -% out = CSEFlagDialog(items, title, msg) -% out = CSEFlagDialog(items, title, msg, options) -% -% items is an array structure containing the options, in the following fields: -% .name is a string (the name of the option) -% .values is a cell array of possible answer. -% if empty, it is considered to contain "Yes" and "No", i.e. -% the option is a checkbox or a radio-button. -% if it contains only one numeric element, it is considered a -% header, and not an active control. -% if it contains only one text element, it is considered to be a -% text input field, with the .values as default value. -% Otherwise, it is a popup list with the choices given in values. -% .linked [optional field] is an array of index in items indicating which -% options are linked with the option. Linked options -% will be grayed out when the option is set to "No". -% If .linked contains negative elements, those will be grayed -% out when the option is set to "Yes". -% .exclusive [optional field] is an array of index in items indicating which -% options are mutually exclusive with each option. When the option -% is set to "Yes", mutually exclusive options are set to "No". If -% the field does not exist, or is empty, the control is a checkbox -% otherwise it is a radio-button. -% Both .linked and .exclusive behaviour are only implemented for -% "Yes"/"No" fields - but any field can be in "linked" and will -% be grayed out. -% .default [optional field] is an integer indicating the default value for -% each option (0: No, 1: Yes, -1: grayed out). Note that in the -% case of text input field, this field is irrelevant and the default -% is given in the "values" field; In the case of popup lists, the -% default is an index in .values, or -1 (grayed out). -% "Linked" and "Exclusive" are NOT evaluated in the initial layout -% of the dialog, hence the default must be consequent with the rules, -% e.g. linked fields of a "No" field must be grayed out. -% .indent [optional field] allows an indentation (proportional to the value -% of this field) from the position of the control. May be used -% to graphically make "groups" as no "frames" are used. -% .help [optional field] contains tooltips help texts. Can contain -% "\n" to make multi-line help fields. -% title is the window title (string) -% msg is a string appearing on the top of the dialog -% options is an optional structure containing options: -% .center = 0|1 (center text msg) -% .bold = 'light'|'normal'|'demi'|'bold' -% indicates how headers (see .values) must be printed out. -% .fixed = 0|1: FixedWidth font for lists -% [more to come in future versions] -% -% The controls will be display in the order they appear in items. -% -% out contains an array of structure of answers: -% out(i).answer = value of the control. -% values are: -% 1 or 0 for "Yes"/"No" controls, -% the index of the chosen item for list controls. -% the text given for text inputs. -% out is empty if cancel was chosen, negative integer if error. -% -% Written by L.Cavin, 07.12.2003, (c) CSE -% This code is free to use and modify for non-commercial purposes. -% Web address: http://ltcmail.ethz.ch/cavin/CSEDBLib.html#FLAGDIALOG -% -% ===================================================================== -% An example of usage is: (in items, elements not mentionned are empty; -% e.g. items(1).linked is empty). -% items(1).name = 'Contact:'; -% items(1).default = 0; -% items(1).values = {'email@address'}; -% items(1).help = 'Enter your email address.'; -% items(2).name = 'I will be coming!'; -% items(2).default = 1; -% items(2).linked = [3 4 5 6]; -% items(3).name = 'With my family'; -% items(3).default = 1; -% items(3).exclusive = 4; -% items(3).indent = 1; -% items(4).name = 'Alone'; -% items(4).default = 0; -% items(4).exclusive = 3; -% items(4).indent = 1; -% items(5).name = 'Transportation:'; -% items(5).indent = 1; -% items(5).values = {1}; -% items(6).name = 'Coming by'; -% items(6).default = 1; -% items(6).indent = 2; -% items(6).values = {'Train'; 'Bus'; 'Foot'}; -% items(6).help = 'Cars are polluting.\nUse public transportation whenever possible!'; -% items(7).name = 'I''ll sure give a phone call!'; -% items(7).default = 0; -% -% title = 'Birthday party incsription'; -% -% msg = sprintf(['Dear friends,\nAs you all know, I am turning 30 next april.\nThis ' ... -% 'seems a worthy occasion to party a bit!\n\nWill you be able to ' ... -% 'attend?']); -% -% out = CSEFlagDialog(items, title, msg) - - -persistent handles; - -if ischar(items) - % this ain't no cell, so must be a callback - callback_type = items; - out = []; -elseif ~isa(items, 'struct') - % this is an error - warn('CSE:Dialog', 'The first parameter must be a stucture of items.'); - out = -1; - return; -else - % we must generate the dialog - callback_type = 'create_dialog'; -end - -% first we do the callbacks, then we handle the initial call. -switch callback_type - case 'item_click' % this is an action on a control - ctrl_idx = get(gcbo, 'UserData'); - % should we "zero" other controls? - if ~isempty(handles.items(ctrl_idx).exclusive) & get(gcbo, 'Value')==1 - for i = 1:length(handles.items(ctrl_idx).exclusive) - set(handles.items_obj(handles.items(ctrl_idx).exclusive(i)), 'Value', 0); - end - end - % should we disable or enable other elements - if ~isempty(handles.items(ctrl_idx).linked) - if get(gcbo, 'Value')==1 - % enable - en_st = 'on'; - ds_st = 'off'; - else - % disable - en_st = 'off'; - ds_st = 'on'; - end - for i = 1:length(handles.items(ctrl_idx).linked) - tp = handles.items(ctrl_idx).linked(i); - if tp > 0 - set(handles.items_obj(tp), 'Enable', en_st); - if handles.head_obj(tp) > 0 - set(handles.head_obj(tp), 'Enable', en_st); - end - else - tp = -1 * tp; - set(handles.items_obj(tp), 'Enable', ds_st); - if handles.head_obj(tp) > 0 - set(handles.head_obj(tp), 'Enable', ds_st); - end - end - end - end - case 'ok_click' % this is a click on the OK button - for i = 1:length(handles.items_obj) - if strcmp(get(handles.items_obj(i), 'Style'), 'edit') - out(i).answer = cell2mat(get(handles.items_obj(i), 'String')); - else - out(i).answer = get(handles.items_obj(i), 'Value'); - end - end - set(gcbo,'userdata',out); - case 'cancel_click' % click on cancel button - out = []; - if ~isempty(handles) - set(handles.ok_button,'userdata',out); - else handles = get(0,'CurrentFigure'); - delete(handles); - end - case 'create_dialog' % now this is actually the initial call - options = []; - msg = ''; - title = 'CSE Flag Dialog'; - if nargin > 1 - title = varargin{1}; - if nargin > 2 - msg = varargin{2}; - if nargin > 3 - options = varargin{3}; - end - end - end - handles.options = options; - % precompute width and height of dialog: - % width is 10 + the longest chain of characters + 10; - % height is 1 + the number of lines in msg + 2 + number of items*2 - % + 2 + 2 + 2 (sum: ... + 9) - longest_chain = 0; - item_length = 0; - for i = 1:length(items) - if ~isfield(items(i), 'indent') | isempty(items(i).indent) - items(i).indent = 0; - end - if ~isfield(items(i), 'help') - items(i).help = []; - end - if ~isfield(items(i), 'linked') - items(i).linked = []; - end - if ~isfield(items(i), 'default') - items(i).default = []; - end - if ~isfield(items(i), 'exclusive') - items(i).exclusive = []; - end - val_length = 0; - if length(items(i).values) > 0 - for j = 1:length(items(i).values) - if length(items(i).values{j}) > val_length - val_length = length(items(i).values{j}); - end - end - if length(items(i).values) == 1 - val_length = val_length + 10; - end - end - items(i).mxlgt = val_length; - if length(items(i).name)+val_length+items(i).indent*4 > item_length - item_length = length(items(i).name)+val_length+items(i).indent*4; - end - if length(items(i).name)+val_length+items(i).indent*4 > longest_chain-10 - longest_chain = length(items(i).name)+10+val_length+items(i).indent*4; - end - end - handles.items = items; - % a = [regexp(msg, '\n') length(msg)]; % for > R13 - a = [strfind(msg,sprintf('\n')) length(msg)]; % for R12 - for i = 2:length(a) - if a(i)-a(i-1) > longest_chain - longest_chain = a(i)-a(i-1); - end - end - dial_width = max(longest_chain + 20, 50); - if isfield(handles.options, 'fixed') - % much wider on average... - dial_width = dial_width * 1.25; - end - dial_height = 9 + length(a)-1 + length(items)*2; - if length(msg) == 0 - dial_height = dial_height -2; - end - item_length = (dial_width-item_length-10)/2; - - % A) create window: (invisible, for now) - handles.dialog = dialog( 'Visible', 'off', ... - 'Units', 'characters', ... - 'Position', [10 10 dial_width dial_height], ... - 'CloseRequestFcn', 'CSEFlagDialog(''cancel_click'');', ... - 'WindowStyle', 'modal', ... - 'Name', title); - % COMPATIBILITY TOWARDS R12: repeating instructions for safety: - set(handles.dialog, 'Units', 'characters'); - set(handles.dialog, 'Position', [10 10 dial_width dial_height]); - - % B) create buttons: - handles.ok_button = uicontrol( 'Units', 'characters', ... - 'Parent', handles.dialog, ... - 'Position', [dial_width/2-20 2 10 2], ... - 'Callback', 'CSEFlagDialog(''ok_click'');', ... - 'String', 'OK' ); - handles.cancel_button = uicontrol( 'Units', 'characters', ... - 'Parent', handles.dialog, ... - 'Position', [dial_width/2+10 2 10 2], ... - 'Callback', 'CSEFlagDialog(''cancel_click'');', ... - 'String', 'Cancel' ); - - % C) create items: - for i = 1:length(items) - pos = length(items)-i+1; - if isempty(items(i).values) - ftype = 'radiobutton'; - if isempty(items(i).exclusive) - ftype = 'checkbox'; - end - handles.head_obj(i) = -1; - handles.items_obj(i) = uicontrol( 'Style', ftype, ... - 'Parent', handles.dialog, ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length 4+2*pos length(items(i).name)+10 1], ... - 'Callback', 'CSEFlagDialog(''item_click'');', ... - 'UserData', i, ... - 'String', items(i).name ); - if ~isempty(items(i).default) & items(i).default > 0 - if strcmp(get(handles.items_obj(i), 'Style'), 'popupmenu') - if items(i).default > length(get(handles.items_obj(i), 'String')) - warn('CSE:Dialog', 'Initial value impossible for listbox %s.', items(i).name); - items(i).default = length(get(handles.items_obj(i), 'String')); - end - end - set(handles.items_obj(i), 'Value', items(i).default); - end - else - if length(items(i).values) == 1 - if isnumeric(items(i).values{1}) - ftype = 'none'; - else - ftype = 'edit'; - end - else - ftype = 'popupmenu'; - end - % needs a "header": - handles.head_obj(i) = uicontrol( 'Style', 'text', ... - 'Parent', handles.dialog, ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length 4+2*pos (length(items(i).name)+2)*1.5 1], ... - 'String', items(i).name, ... - 'HorizontalAlignment', 'left'); - if ~strcmp(ftype, 'none') - handles.items_obj(i) = uicontrol( 'Style', ftype, ... - 'Parent', handles.dialog, ... - 'BackgroundColor', 'white', ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length+length(items(i).name)+3 4+2*pos-0.2 items(i).mxlgt+10 1.4], ... - 'Callback', 'CSEFlagDialog(''item_click'');', ... - 'UserData', i, ... - 'String', items(i).values, ... - 'HorizontalAlignment', 'left' ); - if ~isempty(items(i).default) & items(i).default > 0 - if strcmp(get(handles.items_obj(i), 'Style'), 'popupmenu') - if items(i).default > length(get(handles.items_obj(i), 'String')) - warn('CSE:Dialog', 'Initial value impossible for listbox %s.', items(i).name); - items(i).default = length(get(handles.items_obj(i), 'String')); - end - end - set(handles.items_obj(i), 'Value', items(i).default); - end - if isfield(handles.options, 'fixed') - set(handles.items_obj(i), 'FontName', 'FixedWidth'); - set(handles.items_obj(i), 'Position', [items(i).indent*4+item_length+length(items(i).name)+3 4+2*pos-0.2 (items(i).mxlgt+10)*1.3 1.4]); - end - else - % was actually the "header" :-) - if isfield(handles.options, 'bold') - bld = handles.options.bold; - else - bld = 'normal'; - end - set(handles.head_obj(i), 'FontWeight', bld); - handles.items_obj(i) = handles.head_obj(i); - end - end - if ~isempty(items(i).default) & items(i).default < 0 - set(handles.head_obj(i), 'Enable', 'off'); % RECENTLY ADDED - set(handles.items_obj(i), 'Enable', 'off'); - end - if ~isempty(items(i).help) - set(handles.items_obj(i), 'Tooltip', sprintf(items(i).help)); - end - end - - % D) create message: - if length(msg) > 0 - if isfield(handles.options, 'center') & handles.options.center == 1 - algn = 'center'; - else - algn = 'left'; - end - handles.text = uicontrol( 'Units', 'characters', ... - 'HorizontalAlignment', algn, ... - 'Style', 'text', ... - 'Parent', handles.dialog, ... - 'Position', [5 dial_height-length(a)-1 longest_chain+10 length(a)], ... - 'String', msg ); - end - - % E) "center" and show dialog: - movegui(handles.dialog, 'center'); - set(handles.dialog, 'Visible', 'on'); - drawnow % NB! for R14 - % F) run dialog: - waitfor(handles.ok_button,'userdata'); - - % G) finish call: - out = get(handles.ok_button,'userdata'); - delete(handles.dialog); - clear handles; - - otherwise - % this is a mistake - warn('CSE:Dialog', 'Unknown Callback. Even if there is only one option, it must be passed as a cell.'); - out = -1; -end - - -%=====HELPER FUNCTION -function warn(tag, msg, varargin) -% patch for the warning function to make R13 calls compatible with R12 - -if str2num(version('-release')) < 13 - warning(sprintf(msg, varargin{:})); -else - warning(tag, msg, varargin{:}); -end - \ No newline at end of file diff --git a/matlab/linkage/CSEFlagDialog1.m b/matlab/linkage/CSEFlagDialog1.m deleted file mode 100644 index f7274c7..0000000 --- a/matlab/linkage/CSEFlagDialog1.m +++ /dev/null @@ -1,404 +0,0 @@ -function out = CSEFlagDialog1(items, varargin) -%CSEFLAGDIALOG makes a GUI dialog to select from options of diverse types -% (checkboxes, radiobuttons, text-inputs and popup lists). Some options can -% be linked, i.e. be mutually exclusive or be only enabled according to the -% value of another option. -% It is an extension of CSEOptionDialog. -% -% out = CSEFlagDialog(items) -% out = CSEFlagDialog(items, title) -% out = CSEFlagDialog(items, title, msg) -% out = CSEFlagDialog(items, title, msg, options) -% -% items is an array structure containing the options, in the following fields: -% .name is a string (the name of the option) -% .values is a cell array of possible answer. -% if empty, it is considered to contain "Yes" and "No", i.e. -% the option is a checkbox or a radio-button. -% if it contains only one numeric element, it is considered a -% header, and not an active control. -% if it contains only one text element, it is considered to be a -% text input field, with the .values as default value. -% Otherwise, it is a popup list with the choices given in values. -% .linked [optional field] is an array of index in items indicating which -% options are linked with the option. Linked options -% will be grayed out when the option is set to "No". -% If .linked contains negative elements, those will be grayed -% out when the option is set to "Yes". -% .exclusive [optional field] is an array of index in items indicating which -% options are mutually exclusive with each option. When the option -% is set to "Yes", mutually exclusive options are set to "No". If -% the field does not exist, or is empty, the control is a checkbox -% otherwise it is a radio-button. -% Both .linked and .exclusive behaviour are only implemented for -% "Yes"/"No" fields - but any field can be in "linked" and will -% be grayed out. -% .default [optional field] is an integer indicating the default value for -% each option (0: No, 1: Yes, -1: grayed out). Note that in the -% case of text input field, this field is irrelevant and the default -% is given in the "values" field; In the case of popup lists, the -% default is an index in .values, or -1 (grayed out). -% "Linked" and "Exclusive" are NOT evaluated in the initial layout -% of the dialog, hence the default must be consequent with the rules, -% e.g. linked fields of a "No" field must be grayed out. -% .indent [optional field] allows an indentation (proportional to the value -% of this field) from the position of the control. May be used -% to graphically make "groups" as no "frames" are used. -% .help [optional field] contains tooltips help texts. Can contain -% "\n" to make multi-line help fields. -% title is the window title (string) -% msg is a string appearing on the top of the dialog -% options is an optional structure containing options: -% .center = 0|1 (center text msg) -% .bold = 'light'|'normal'|'demi'|'bold' -% indicates how headers (see .values) must be printed out. -% .fixed = 0|1: FixedWidth font for lists -% [more to come in future versions] -% -% The controls will be display in the order they appear in items. -% -% out contains an array of structure of answers: -% out(i).answer = value of the control. -% values are: -% 1 or 0 for "Yes"/"No" controls, -% the index of the chosen item for list controls. -% the text given for text inputs. -% out is empty if cancel was chosen, negative integer if error. -% -% Written by L.Cavin, 07.12.2003, (c) CSE -% This code is free to use and modify for non-commercial purposes. -% Web address: http://ltcmail.ethz.ch/cavin/CSEDBLib.html#FLAGDIALOG -% -% ===================================================================== -% An example of usage is: (in items, elements not mentionned are empty; -% e.g. items(1).linked is empty). -% items(1).name = 'Contact:'; -% items(1).default = 0; -% items(1).values = {'email@address'}; -% items(1).help = 'Enter your email address.'; -% items(2).name = 'I will be coming!'; -% items(2).default = 1; -% items(2).linked = [3 4 5 6]; -% items(3).name = 'With my family'; -% items(3).default = 1; -% items(3).exclusive = 4; -% items(3).indent = 1; -% items(4).name = 'Alone'; -% items(4).default = 0; -% items(4).exclusive = 3; -% items(4).indent = 1; -% items(5).name = 'Transportation:'; -% items(5).indent = 1; -% items(5).values = {1}; -% items(6).name = 'Coming by'; -% items(6).default = 1; -% items(6).indent = 2; -% items(6).values = {'Train'; 'Bus'; 'Foot'}; -% items(6).help = 'Cars are polluting.\nUse public transportation whenever possible!'; -% items(7).name = 'I''ll sure give a phone call!'; -% items(7).default = 0; -% -% title = 'Birthday party incsription'; -% -% msg = sprintf(['Dear friends,\nAs you all know, I am turning 30 next april.\nThis ' ... -% 'seems a worthy occasion to party a bit!\n\nWill you be able to ' ... -% 'attend?']); -% -% out = CSEFlagDialog(items, title, msg) - - -persistent handles; - -if ischar(items) - % this ain't no cell, so must be a callback - callback_type = items; - out = []; -elseif ~isa(items, 'struct') - % this is an error - warn('CSE:Dialog', 'The first parameter must be a stucture of items.'); - out = -1; - return; -else - % we must generate the dialog - callback_type = 'create_dialog'; -end - -% first we do the callbacks, then we handle the initial call. -switch callback_type - case 'item_click' % this is an action on a control - ctrl_idx = get(gcbo, 'UserData'); - % should we "zero" other controls? - if ~isempty(handles.items(ctrl_idx).exclusive) & get(gcbo, 'Value')==1 - for i = 1:length(handles.items(ctrl_idx).exclusive) - set(handles.items_obj(handles.items(ctrl_idx).exclusive(i)), 'Value', 0); - end - end - % should we disable or enable other elements - if ~isempty(handles.items(ctrl_idx).linked) - if get(gcbo, 'Value')==1 || get(gcbo, 'Value')==2 - % enable - en_st = 'on'; - ds_st = 'off'; - else - % disable - en_st = 'off'; - ds_st = 'on'; - end - for i = 1:length(handles.items(ctrl_idx).linked) - tp = handles.items(ctrl_idx).linked(i); - if tp > 0 - set(handles.items_obj(tp), 'Enable', en_st); - if handles.head_obj(tp) > 0 - set(handles.head_obj(tp), 'Enable', en_st); - end - else - tp = -1 * tp; - set(handles.items_obj(tp), 'Enable', ds_st); - if handles.head_obj(tp) > 0 - set(handles.head_obj(tp), 'Enable', ds_st); - end - end - end - end - case 'ok_click' % this is a click on the OK button - for i = 1:length(handles.items_obj) - if strcmp(get(handles.items_obj(i), 'Style'), 'edit') - out(i).answer = cell2mat(get(handles.items_obj(i), 'String')); - else - out(i).answer = get(handles.items_obj(i), 'Value'); - end - end - set(gcbo,'userdata',out); - case 'cancel_click' % click on cancel button - out = []; - if ~isempty(handles) - set(handles.ok_button,'userdata',out); - else handles = get(0,'CurrentFigure'); - delete(handles); - end - case 'create_dialog' % now this is actually the initial call - options = []; - msg = ''; - title = 'CSE Flag Dialog'; - if nargin > 1 - title = varargin{1}; - if nargin > 2 - msg = varargin{2}; - if nargin > 3 - options = varargin{3}; - end - end - end - handles.options = options; - % precompute width and height of dialog: - % width is 10 + the longest chain of characters + 10; - % height is 1 + the number of lines in msg + 2 + number of items*2 - % + 2 + 2 + 2 (sum: ... + 9) - longest_chain = 0; - item_length = 0; - for i = 1:length(items) - if ~isfield(items(i), 'indent') | isempty(items(i).indent) - items(i).indent = 0; - end - if ~isfield(items(i), 'help') - items(i).help = []; - end - if ~isfield(items(i), 'linked') - items(i).linked = []; - end - if ~isfield(items(i), 'default') - items(i).default = []; - end - if ~isfield(items(i), 'exclusive') - items(i).exclusive = []; - end - val_length = 0; - if length(items(i).values) > 0 - for j = 1:length(items(i).values) - if length(items(i).values{j}) > val_length - val_length = length(items(i).values{j}); - end - end - if length(items(i).values) == 1 - val_length = val_length + 10; - end - end - items(i).mxlgt = val_length; - if length(items(i).name)+val_length+items(i).indent*4 > item_length - item_length = length(items(i).name)+val_length+items(i).indent*4; - end - if length(items(i).name)+val_length+items(i).indent*4 > longest_chain-10 - longest_chain = length(items(i).name)+10+val_length+items(i).indent*4; - end - end - handles.items = items; - % a = [regexp(msg, '\n') length(msg)]; % for > R13 - a = [strfind(msg,sprintf('\n')) length(msg)]; % for R12 - for i = 2:length(a) - if a(i)-a(i-1) > longest_chain - longest_chain = a(i)-a(i-1); - end - end - dial_width = max(longest_chain + 20, 50); - if isfield(handles.options, 'fixed') - % much wider on average... - dial_width = dial_width * 1.25; - end - dial_height = 9 + length(a)-1 + length(items)*2; - if length(msg) == 0 - dial_height = dial_height -2; - end - item_length = (dial_width-item_length-10)/2; - - % A) create window: (invisible, for now) - handles.dialog = dialog( 'Visible', 'off', ... - 'Units', 'characters', ... - 'Position', [10 10 dial_width dial_height], ... - 'CloseRequestFcn', 'CSEFlagDialog(''cancel_click'');', ... - 'WindowStyle', 'modal', ... - 'Name', title); - % COMPATIBILITY TOWARDS R12: repeating instructions for safety: - set(handles.dialog, 'Units', 'characters'); - set(handles.dialog, 'Position', [10 10 dial_width dial_height]); - - % B) create buttons: - handles.ok_button = uicontrol( 'Units', 'characters', ... - 'Parent', handles.dialog, ... - 'Position', [dial_width/2-20 2 10 2], ... - 'Callback', 'CSEFlagDialog1(''ok_click'');', ... - 'String', 'OK' ); - handles.cancel_button = uicontrol( 'Units', 'characters', ... - 'Parent', handles.dialog, ... - 'Position', [dial_width/2+10 2 10 2], ... - 'Callback', 'CSEFlagDialog1(''cancel_click'');', ... - 'String', 'Cancel' ); - - % C) create items: - for i = 1:length(items) - pos = length(items)-i+1; - if isempty(items(i).values) - ftype = 'radiobutton'; - if isempty(items(i).exclusive) - ftype = 'checkbox'; - end - handles.head_obj(i) = -1; - handles.items_obj(i) = uicontrol( 'Style', ftype, ... - 'Parent', handles.dialog, ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length 4+2*pos length(items(i).name)+10 1], ... - 'Callback', 'CSEFlagDialog1(''item_click'');', ... - 'UserData', i, ... - 'String', items(i).name ); - if ~isempty(items(i).default) & items(i).default > 0 - if strcmp(get(handles.items_obj(i), 'Style'), 'popupmenu') - if items(i).default > length(get(handles.items_obj(i), 'String')) - warn('CSE:Dialog', 'Initial value impossible for listbox %s.', items(i).name); - items(i).default = length(get(handles.items_obj(i), 'String')); - end - end - set(handles.items_obj(i), 'Value', items(i).default); - end - else - if length(items(i).values) == 1 - if isnumeric(items(i).values{1}) - ftype = 'none'; - else - ftype = 'edit'; - end - else - ftype = 'popupmenu'; - end - % needs a "header": - handles.head_obj(i) = uicontrol( 'Style', 'text', ... - 'Parent', handles.dialog, ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length 4+2*pos (length(items(i).name)+2)*1.5 1], ... - 'String', items(i).name, ... - 'HorizontalAlignment', 'left'); - if ~strcmp(ftype, 'none') - handles.items_obj(i) = uicontrol( 'Style', ftype, ... - 'Parent', handles.dialog, ... - 'BackgroundColor', 'white', ... - 'Units', 'characters', ... - 'Position', [items(i).indent*4+item_length+length(items(i).name)+3 4+2*pos-0.2 items(i).mxlgt+10 1.4], ... - 'Callback', 'CSEFlagDialog1(''item_click'');', ... - 'UserData', i, ... - 'String', items(i).values, ... - 'HorizontalAlignment', 'left' ); - if ~isempty(items(i).default) & items(i).default > 0 - if strcmp(get(handles.items_obj(i), 'Style'), 'popupmenu') - if items(i).default > length(get(handles.items_obj(i), 'String')) - warn('CSE:Dialog', 'Initial value impossible for listbox %s.', items(i).name); - items(i).default = length(get(handles.items_obj(i), 'String')); - end - end - set(handles.items_obj(i), 'Value', items(i).default); - end - if isfield(handles.options, 'fixed') - set(handles.items_obj(i), 'FontName', 'FixedWidth'); - set(handles.items_obj(i), 'Position', [items(i).indent*4+item_length+length(items(i).name)+3 4+2*pos-0.2 (items(i).mxlgt+10)*1.3 1.4]); - end - else - % was actually the "header" :-) - if isfield(handles.options, 'bold') - bld = handles.options.bold; - else - bld = 'normal'; - end - set(handles.head_obj(i), 'FontWeight', bld); - handles.items_obj(i) = handles.head_obj(i); - end - end - if ~isempty(items(i).default) & items(i).default < 0 - set(handles.head_obj(i), 'Enable', 'off'); % RECENTLY ADDED - set(handles.items_obj(i), 'Enable', 'off'); - end - if ~isempty(items(i).help) - set(handles.items_obj(i), 'Tooltip', sprintf(items(i).help)); - end - end - - % D) create message: - if length(msg) > 0 - if isfield(handles.options, 'center') & handles.options.center == 1 - algn = 'center'; - else - algn = 'left'; - end - handles.text = uicontrol( 'Units', 'characters', ... - 'HorizontalAlignment', algn, ... - 'Style', 'text', ... - 'Parent', handles.dialog, ... - 'Position', [5 dial_height-length(a)-1 longest_chain+10 length(a)], ... - 'String', msg ); - end - - % E) "center" and show dialog: - movegui(handles.dialog, 'center'); - set(handles.dialog, 'Visible', 'on'); - drawnow % NB! for R14 - % F) run dialog: - waitfor(handles.ok_button,'userdata'); - - % G) finish call: - out = get(handles.ok_button,'userdata'); - delete(handles.dialog); - clear handles; - - otherwise - % this is a mistake - warn('CSE:Dialog', 'Unknown Callback. Even if there is only one option, it must be passed as a cell.'); - out = -1; -end - - -%=====HELPER FUNCTION -function warn(tag, msg, varargin) -% patch for the warning function to make R13 calls compatible with R12 - -if str2num(version('-release')) < 13 - warning(sprintf(msg, varargin{:})); -else - warning(tag, msg, varargin{:}); -end - \ No newline at end of file diff --git a/matlab/linkage/allfreqsnew.m b/matlab/linkage/allfreqsnew.m deleted file mode 100644 index 086b52a..0000000 --- a/matlab/linkage/allfreqsnew.m +++ /dev/null @@ -1,63 +0,0 @@ -function [counts,noalle,prior,adjprior,rawalleles,rawdata] = allfreqsnew(rawdata) -% Filename: allfreqsnew.m -% [counts,noalle,prior,adjprior,rawalleles] = allfreqsnew(rawdata) -% -% Description: -% rawdata has n rows (2 x #individuals) and n(l) first -% colums are the loci. The last column is the subpopindex -% prior is a created matrix of positive Dirichlet hyperparameters -% missing data is filtered out -% !!!NEW!!!zeros are accepted as allele codes and any negative numbers as missing data. - -% Modified by: Jing Tang - -SCALE = 1; -dime=size(rawdata); -noalle=zeros(dime(2)-1,1); -rawalleles=cell(1,dime(2)-1); -for i=1:dime(2)-1 - noalle(i)=length(unique(rawdata(:,i))); -end -for i=1:dime(2)-1 - if length(find(rawdata(:,i)<=0))>0 % filtering out the missing data - noalle(i)=noalle(i)-1; - end -end -for i=1:dime(2)-1 - rawalles=unique(rawdata(:,i)); - if rawalles(1)<=0 - rawalles(1)=-999; - end - rawalleles{i} = rawalles; % rawalleles!!! - if rawalles(1)<=0 - for j=2:noalle(i)+1 - rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - end - else - for j=1:noalle(i) - rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - end -end -end - - -counts=zeros(max(noalle),dime(2)-1,max(rawdata(:,dime(2)))); - -for i=1:dime(1) - for j=1:dime(2)-1 - if rawdata(i,j)>0 - counts(rawdata(i,j),j,rawdata(i,dime(2)))=... - counts(rawdata(i,j),j,rawdata(i,dime(2)))+1; - end - end -end - -prior=[]; -for i=1:dime(2)-1 - prior=[prior [SCALE*ones(noalle(i),1)/noalle(i);zeros(max(noalle)-noalle(i),1)]]; -end - -adjprior=prior; -for i=1:dime(2)-1 - adjprior(:,i)=adjprior(:,i)+[zeros(noalle(i),1);ones(max(noalle)-noalle(i),1)]; -end diff --git a/matlab/linkage/allfreqsnew2.m b/matlab/linkage/allfreqsnew2.m deleted file mode 100644 index 6746391..0000000 --- a/matlab/linkage/allfreqsnew2.m +++ /dev/null @@ -1,89 +0,0 @@ -function [counts,noalle_est,prior,adjprior,rawalleles] = allfreqsnew2(rawdata, noalle_est) -% Filename: allfreqsnew2.m -% [counts,noalle,prior,adjprior,rawalleles] = allfreqsnew(rawdata) -% -% Description: -% rawdata has n rows (2 x #individuals) and n(l) first -% colums are the loci. The last column is the subpopindex -% prior is a created matrix of positive Dirichlet hyperparameters -% missing data is filtered out -% !!!NEW!!!zeros are accepted as allele codes and any negative numbers as missing data. - -% Modified by: Jing Tang -SCALE = 1; -dime=size(rawdata); -noalle=zeros(dime(2)-1,1); -rawalleles=cell(1,dime(2)-1); -for i=1:dime(2)-1 - noalle(i)=length(unique(rawdata(:,i))); -end -for i=1:dime(2)-1 - if length(find(rawdata(:,i)<=0))>0 - noalle(i)=noalle(i)-1; - end -end - -% Fomulate the raw data such that the value i in a entry denotes the ith -% alleles. -for i=1:dime(2)-1 - [rawalles ix iy]=unique(rawdata(:,i)); %modified by Lu Cheng - % rawalles = [1:noalle(i)]'; - if rawalles(1)<=0 - rawalles(1)=-999; - end - rawalleles{i} = rawalles; %rawalleles!!! - if rawalles(1)<0 - for j=2:noalle(i)+1 - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - end - else - %rawdata(:,i) = iy; - for j=1:noalle(i) - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - end - end -end - -% ALLOWED_MEMORY = 50; % in unit of megabyte. -% n1 = max(noalle_est); -% n2 = dime(2)-1; -% n3 = double(max(rawdata(:,dime(2)))); -% ncells = n1*n2*n3; -% memory_used = ncells/(1024*1024); % using uint8 format. -% if memory_used < ALLOWED_MEMORY -% counts=zeros(n1,n2,n3,'uint8'); -% else -% nbatches = ceil(memory_used/ALLOWED_MEMORY); -% n3_in_batch = ceil(n3/nbatches); -% counts = cell(nbatches,1); -% for i=1:nbatches-1 -% % counts = cat(3,counts,uint16(zeros(n1,n2,n3_in_batch))); -% counts{i} = zeros(n1,n2,n3_in_batch,'uint8'); -% end -% % counts = cat(3, counts, uint16(zeros(n1,n2,n3-n3_in_batch*(nbatches-1)))); -% counts{i} = zeros(n1,n2,n3-n3_in_batch*(nbatches-1),'uint8'); -% end - - - -counts = zeros(max(noalle_est),dime(2)-1,max(rawdata(:,dime(2))),'uint8'); -for i=1:dime(1) - for j=1:dime(2)-1 - if rawdata(i,j)>0 - counts(rawdata(i,j),j,rawdata(i,dime(2)))=... - counts(rawdata(i,j),j,rawdata(i,dime(2)))+1; - end - end -end - -maxnoalle = max(noalle_est); -% prior = []; -prior=zeros(maxnoalle,dime(2)-1); -for i=1:dime(2)-1 - prior(:,i) = [SCALE*ones(noalle_est(i),1)/noalle_est(i);zeros(maxnoalle-noalle_est(i),1)]; -end - -adjprior=prior; -for i=1:dime(2)-1 - adjprior(:,i)=adjprior(:,i)+[zeros(noalle_est(i),1);ones(maxnoalle-noalle_est(i),1)]; -end diff --git a/matlab/linkage/allfreqsnew3.m b/matlab/linkage/allfreqsnew3.m deleted file mode 100644 index 086b52a..0000000 --- a/matlab/linkage/allfreqsnew3.m +++ /dev/null @@ -1,63 +0,0 @@ -function [counts,noalle,prior,adjprior,rawalleles,rawdata] = allfreqsnew(rawdata) -% Filename: allfreqsnew.m -% [counts,noalle,prior,adjprior,rawalleles] = allfreqsnew(rawdata) -% -% Description: -% rawdata has n rows (2 x #individuals) and n(l) first -% colums are the loci. The last column is the subpopindex -% prior is a created matrix of positive Dirichlet hyperparameters -% missing data is filtered out -% !!!NEW!!!zeros are accepted as allele codes and any negative numbers as missing data. - -% Modified by: Jing Tang - -SCALE = 1; -dime=size(rawdata); -noalle=zeros(dime(2)-1,1); -rawalleles=cell(1,dime(2)-1); -for i=1:dime(2)-1 - noalle(i)=length(unique(rawdata(:,i))); -end -for i=1:dime(2)-1 - if length(find(rawdata(:,i)<=0))>0 % filtering out the missing data - noalle(i)=noalle(i)-1; - end -end -for i=1:dime(2)-1 - rawalles=unique(rawdata(:,i)); - if rawalles(1)<=0 - rawalles(1)=-999; - end - rawalleles{i} = rawalles; % rawalleles!!! - if rawalles(1)<=0 - for j=2:noalle(i)+1 - rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - end - else - for j=1:noalle(i) - rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - end -end -end - - -counts=zeros(max(noalle),dime(2)-1,max(rawdata(:,dime(2)))); - -for i=1:dime(1) - for j=1:dime(2)-1 - if rawdata(i,j)>0 - counts(rawdata(i,j),j,rawdata(i,dime(2)))=... - counts(rawdata(i,j),j,rawdata(i,dime(2)))+1; - end - end -end - -prior=[]; -for i=1:dime(2)-1 - prior=[prior [SCALE*ones(noalle(i),1)/noalle(i);zeros(max(noalle)-noalle(i),1)]]; -end - -adjprior=prior; -for i=1:dime(2)-1 - adjprior(:,i)=adjprior(:,i)+[zeros(noalle(i),1);ones(max(noalle)-noalle(i),1)]; -end diff --git a/matlab/linkage/allfreqsnew4.m b/matlab/linkage/allfreqsnew4.m deleted file mode 100644 index edf0109..0000000 --- a/matlab/linkage/allfreqsnew4.m +++ /dev/null @@ -1,90 +0,0 @@ -function [rawdata,noalle_est,prior,adjprior] = allfreqsnew4(rawdata, noalle_est) -% Filename: allfreqsnew4.m -% [counts,noalle,prior,adjprior,rawalleles] = allfreqsnew(rawdata) -% -% Description: -% rawdata has n rows (2 x #individuals) and n(l) first -% colums are the loci. The last column is the subpopindex -% prior is a created matrix of positive Dirichlet hyperparameters -% missing data is filtered out -% !!!NEW!!!zeros are accepted as allele codes and any negative numbers as missing data. - -% Modified by: Jing Tang -SCALE = 1; -dime=size(rawdata); -noalle=zeros(dime(2)-1,1); -rawalleles=cell(1,dime(2)-1); -for i=1:dime(2)-1 - noalle(i)=length(unique(rawdata(:,i))); -end -for i=1:dime(2)-1 - if length(find(rawdata(:,i)<=0))>0 - noalle(i)=noalle(i)-1; - end -end - -% Fomulate the raw data such that the value i in a entry denotes the ith -% alleles. -for i=1:dime(2)-1 - rawalles=unique(rawdata(:,i)); - % rawalles = [1:noalle(i)]'; - if rawalles(1)<=0 - rawalles(1)=-999; - end - rawalleles{i} = rawalles; %rawalleles!!! - if rawalles(1)<0 - for j=2:noalle(i)+1 - %rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*(j-1); - end - else - for j=1:noalle(i) - % rawdata(find(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - rawdata(logical(rawdata(:,i)==rawalles(j)),i)=ones(length(find(rawdata(:,i)==rawalles(j))),1)*j; - end - end -end - -% ALLOWED_MEMORY = 50; % in unit of megabyte. -% n1 = max(noalle_est); -% n2 = dime(2)-1; -% n3 = double(max(rawdata(:,dime(2)))); -% ncells = n1*n2*n3; -% memory_used = ncells/(1024*1024); % using uint8 format. -% if memory_used < ALLOWED_MEMORY -% counts=zeros(n1,n2,n3,'uint8'); -% else -% nbatches = ceil(memory_used/ALLOWED_MEMORY); -% n3_in_batch = ceil(n3/nbatches); -% counts = cell(nbatches,1); -% for i=1:nbatches-1 -% % counts = cat(3,counts,uint16(zeros(n1,n2,n3_in_batch))); -% counts{i} = zeros(n1,n2,n3_in_batch,'uint8'); -% end -% % counts = cat(3, counts, uint16(zeros(n1,n2,n3-n3_in_batch*(nbatches-1)))); -% counts{i} = zeros(n1,n2,n3-n3_in_batch*(nbatches-1),'uint8'); -% end - - - -% counts = zeros(max(noalle_est),dime(2)-1,max(rawdata(:,dime(2))),'uint8'); -% for i=1:dime(1) -% for j=1:dime(2)-1 -% if rawdata(i,j)>0 -% counts(rawdata(i,j),j,rawdata(i,dime(2)))=... -% counts(rawdata(i,j),j,rawdata(i,dime(2)))+1; -% end -% end -% end - -maxnoalle = max(noalle_est); -% prior = []; -prior=zeros(maxnoalle,dime(2)-1); -for i=1:dime(2)-1 - prior(:,i) = [SCALE*ones(noalle_est(i),1)/noalle_est(i);zeros(maxnoalle-noalle_est(i),1)]; -end - -adjprior=prior; -for i=1:dime(2)-1 - adjprior(:,i)=adjprior(:,i)+[zeros(noalle_est(i),1);ones(maxnoalle-noalle_est(i),1)]; -end diff --git a/matlab/linkage/askSeq.m b/matlab/linkage/askSeq.m deleted file mode 100644 index 7b29574..0000000 --- a/matlab/linkage/askSeq.m +++ /dev/null @@ -1,26 +0,0 @@ -function [isOK,returnvalue] = askSeq() - -items(1).name = 'Yes. Continue loading the sequence profile.'; -items(1).default = 1; -items(1).exclusive = [2]; -items(1).values = []; - -items(2).name = 'No. Stop loading data.'; -items(2).default = 0; -items(2).exclusive = [1]; -items(2).values = []; - -title = 'Load Sequence?'; -msg = sprintf(['The allelic profile has been loaded.\nWould you like to continue loading the corresponding gene sequence data?']); -out = CSEFlagDialog(items, title, msg); -if ~(isempty(out)), - if(out(1).answer==1) - returnvalue = 1; - elseif(out(2).answer==1) - returnvalue = 2; - end - isOK = 1; -else - isOK = 0; - returnvalue = 0; -end \ No newline at end of file diff --git a/matlab/linkage/choosebox.m b/matlab/linkage/choosebox.m deleted file mode 100644 index 151315a..0000000 --- a/matlab/linkage/choosebox.m +++ /dev/null @@ -1,353 +0,0 @@ -function [selection,value] = choosebox(varargin) -%CHOOSEBOX Two-listed item selection dialog box. -% [SELECTION,OK] = CHOOSEBOX('ListString',S) creates a modal dialog box -% which allows you to select a string or multiple strings from a list. -% Single or multiple strings can be transferred from a base list to a -% selection list using an arrow-button. Single strings also can be -% transferred by double-clicking; single or multiple strings are -% transferred by pressing . -% SELECTION is a vector of indices of the selected strings (length 1 -% in the single selection mode). The indices will be in the order of -% selection from the base list. If a group of multiple strings is -% selected, its order inside the group will not change, but different -% groups are ordered by their selection. -% OK is 1 if you push the OK button, or 0 if you push the Cancel -% button or close the figure. In that case SELECTION will be [], -% regardless to the actual selection list. -% Important parameter is 'ChooseMode', see list below. -% -% Inputs are in parameter,value pairs: -% -% Parameter Description -% 'ChooseMode' string; can be 'pick' or 'copy'. -% When set to 'pick', transferred string items from the -% base list box are removed. When retransferred, they -% again are listed in their initial positions. -% When set to 'copy', transferred string items remain in -% the base list and can be transferred several times. -% default is 'pick'. -% 'ListString' cell array of strings for the base list box. -% 'SelectionMode' string; can be 'single' or 'multiple'; defaults to -% 'multiple'. -% 'ListSize' [width height] of listbox in pixels; defaults -% to [160 300]. -% 'InitialValue' vector of indices of which items of the list box -% are initially selected; defaults to none []. -% 'Name' String for the figure's title. Defaults to ''. -% 'PromptString' string matrix or cell array of strings which appears -% as text above the base list box. Defaults to {}. -% 'SelectString' string matrix or cell array of strings which appears -% as text above the selection list box. Defaults to {}. -% 'OKString' string for the OK button; defaults to 'OK'. -% 'CancelString' string for the Cancel button; defaults to 'Cancel'. -% 'uh' uicontrol button height, in pixels; default = 18. -% 'fus' frame/uicontrol spacing, in pixels; default = 8. -% 'ffs' frame/figure spacing, in pixels; default = 8. -% -% Example: -% d = dir; -% str = {d.name}; -% [s,v] = choosebox('Name','File deletion',... -% 'PromptString','Files remaining in this directory:',... -% 'SelectString','Files to delete:',... -% 'ListString',str) -% -% inspired by listdlg.m from Mathworks. -% -% programmed by Peter Wasmeier, Technical University of Munich -% p.wasmeier@bv.tum.de -% 11-12-03 - -% Original listdlg file by -% T. Krauss, 12/7/95, P.N. Secakusuma, 6/10/97 -% Copyright 1984-2002 The MathWorks, Inc. -% $Revision: 1.20 $ $Date: 2002/04/09 01:36:06 $ - -% Test: d = dir;[s,v] = choosebox('Name','File deletion','PromptString','Files remaining in this directory:','SelectString','Files to delete:','ListString',{d.name}); - - - -error(nargchk(1,inf,nargin)) - -arrow=[... - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 - 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 - 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 - 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 - 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 - 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 - 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]; -for i=1:3 - rarrow(:,:,i)=arrow; - larrow(:,:,i)=fliplr(arrow); -end -clear arrow; - -figname = ''; -smode = 2; % (multiple) -cmode = 1; % remove from left hand side -promptstring = {}; -selectstring={}; -liststring = []; -listsize = [160 300]; -initialvalue = []; -okstring = 'Ok'; -cancelstring = 'Cancel'; -fus = 8; -ffs = 8; -uh = 18; - -if mod(length(varargin),2) ~= 0 - % input args have not com in pairs, woe is me - error('Arguments to LISTDLG must come param/value in pairs.') -end -for i=1:2:length(varargin) - switch lower(varargin{i}) - case 'name' - figname = varargin{i+1}; - case 'promptstring' - promptstring = varargin{i+1}; - case 'selectstring' - selectstring = varargin{i+1}; - case 'selectionmode' - switch lower(varargin{i+1}) - case 'single' - smode = 1; - case 'multiple' - smode = 2; - end - case 'choosemode' - switch lower(varargin{i+1}) - case 'pick' - cmode = 1; - case 'copy' - cmode = 2; - end - case 'listsize' - listsize = varargin{i+1}; - case 'liststring' - liststring = varargin{i+1}; - case 'initialvalue' - initialvalue = varargin{i+1}; - case 'uh' - uh = varargin{i+1}; - case 'fus' - fus = varargin{i+1}; - case 'ffs' - ffs = varargin{i+1}; - case 'okstring' - okstring = varargin{i+1}; - case 'cancelstring' - cancelstring = varargin{i+1}; - otherwise - error(['Unknown parameter name passed to LISTDLG. Name was ' varargin{i}]) - end -end - -if isstr(promptstring) - promptstring = cellstr(promptstring); -end - -if isstr(selectstring) - selectstring = cellstr(selectstring); -end - -if isempty(initialvalue) - initialvalue = 1; -end - -if isempty(liststring) - error('ListString parameter is required.') -end - -ex = get(0,'defaultuicontrolfontsize')*3.0; % height extent per line of uicontrol text (approx) - % Default: 1.7 -fp = get(0,'defaultfigureposition'); -w = 4*fus +2*ffs+2*listsize(1)+50; -h = 2*ffs+7*fus+ex*length(promptstring)+listsize(2)+2*uh; -fp = [fp(1) fp(2)+fp(4)-h w h]; % keep upper left corner fixed - -fig_props = { ... - 'name' figname ... - 'resize' 'off' ... - 'numbertitle' 'off' ... - 'menubar' 'none' ... - 'windowstyle' 'modal' ... - 'visible' 'off' ... - 'createfcn' '' ... - 'position' fp ... - 'closerequestfcn' 'delete(gcbf)' ... - }; - -ad.fromstring=cellstr(liststring); -ad.tostring=''; -ad.pos_left=[1:size(ad.fromstring,2)]'; -ad.pos_right=[]; -ad.value=0; -ad.cmode=cmode; -setappdata(0,'ListDialogAppData',ad) - -fig = figure(fig_props{:}); - -uicontrol('style','frame',... - 'position',[1 1 fp([3 4])]) -uicontrol('style','frame',... - 'position',[ffs ffs 2*fus+listsize(1) 2*fus+uh]) -uicontrol('style','frame',... - 'position',[ffs+2*fus+50+listsize(1) ffs 2*fus+listsize(1) 2*fus+uh]) -uicontrol('style','frame',... - 'position',[ffs ffs+3*fus+uh 2*fus+listsize(1) ... - listsize(2)+3*fus+ex*length(promptstring)+(uh+fus)*(smode==2)]) -uicontrol('style','frame',... - 'position',[ffs+2*fus+50+listsize(1) ffs+3*fus+uh 2*fus+listsize(1) ... - listsize(2)+3*fus+ex*length(promptstring)+(uh+fus)*(smode==2)]) - -if length(promptstring)>0 - prompt_text = uicontrol('style','text','string',promptstring,... - 'horizontalalignment','left','units','pixels',... - 'position',[ffs+fus fp(4)-(ffs+fus+ex*length(promptstring)) ... - listsize(1) ex*length(promptstring)]); -end -if length(selectstring)>0 - select_text = uicontrol('style','text','string',selectstring,... - 'horizontalalignment','left','units','pixels',... - 'position',[ffs+3*fus+listsize(1)+50 fp(4)-(ffs+fus+ex*length(promptstring)) ... - listsize(1) ex*length(selectstring)]); -end - -btn_wid = listsize(1); - -leftbox = uicontrol('style','listbox',... - 'position',[ffs+fus ffs+uh+4*fus listsize(1) listsize(2)+25],... - 'string',ad.fromstring,... - 'backgroundcolor','w',... - 'max',2,... - 'tag','leftbox',... - 'value',initialvalue, ... - 'callback',{@doFromboxClick}); - -rightbox = uicontrol('style','listbox',... - 'position',[ffs+3*fus+listsize(1)+50 ffs+uh+4*fus listsize(1) listsize(2)+25],... - 'string',ad.tostring,... - 'backgroundcolor','w',... - 'max',2,... - 'tag','rightbox',... - 'value',[], ... - 'callback',{@doToboxClick}); - -ok_btn = uicontrol('style','pushbutton',... - 'string',okstring,... - 'position',[ffs+fus ffs+fus btn_wid uh],... - 'callback',{@doOK}); - -cancel_btn = uicontrol('style','pushbutton',... - 'string',cancelstring,... - 'position',[ffs+3*fus+btn_wid+50 ffs+fus btn_wid uh],... - 'callback',{@doCancel}); - -toright_btn = uicontrol('style','pushbutton',... - 'position',[ffs+2*fus+10+listsize(1) ffs+uh+4*fus+(smode==2)*(fus+uh)+listsize(2)/2-25 30 30],... - 'cdata',rarrow,... - 'callback',{@doRight}); - -toleft_btn = uicontrol('style','pushbutton',... - 'position',[ffs+2*fus+10+listsize(1) ffs+uh+4*fus+(smode==2)*(fus+uh)+listsize(2)/2+25 30 30],... - 'cdata',larrow,... - 'callback',{@doLeft}); - - -try - set(fig, 'visible','on'); - uiwait(fig); -catch - if ishandle(fig) - delete(fig) - end -end - -if isappdata(0,'ListDialogAppData') - ad = getappdata(0,'ListDialogAppData'); - selection = ad.pos_right; - value = ad.value; - rmappdata(0,'ListDialogAppData') -else - % figure was deleted - selection = []; - value = 0; -end - -function doOK(varargin) -ad=getappdata(0,'ListDialogAppData'); -ad.value = 1; -setappdata(0,'ListDialogAppData',ad) -delete(gcbf); - -function doCancel(varargin) -ad.value = 0; -ad.pos_right = []; -setappdata(0,'ListDialogAppData',ad) -delete(gcbf); - -function doFromboxClick(varargin) -% if this is a doubleclick, doOK -if strcmp(get(gcbf,'SelectionType'),'open') - doRight; -end - -function doToboxClick(varargin) -% if this is a doubleclick, doOK -if strcmp(get(gcbf,'SelectionType'),'open') - doLeft; -end - -function doRight(varargin) -ad=getappdata(0,'ListDialogAppData'); -leftbox=findobj('Tag','leftbox'); -rightbox=findobj('Tag','rightbox'); -selection=get(leftbox,'Value'); -ad.pos_right=[ad.pos_right;ad.pos_left(selection)]; -ad.tostring=[ad.tostring ad.fromstring(selection)]; -if ad.cmode==1 % remove selected items - ad.pos_left(selection)=[]; - ad.fromstring(selection)=[]; -end -setappdata(0,'ListDialogAppData',ad) -set(leftbox,'String',ad.fromstring,'Value',[]); -set(rightbox,'String',ad.tostring,'Value',[]); - -function doLeft(varargin) -ad=getappdata(0,'ListDialogAppData'); -leftbox=findobj('Tag','leftbox'); -rightbox=findobj('Tag','rightbox'); -selection=get(rightbox,'Value'); -if ad.cmode==1 % if selected items had been removed - % Sort in the items from right hand side again - for i=1:length(selection) - next_item=min(find(ad.pos_left>ad.pos_right(selection(i)))); - if isempty(next_item) % Inserting item is last one - ad.pos_left(end+1)=ad.pos_right(selection(i)); - ad.fromstring(end+1)=ad.tostring(selection(i)); - elseif next_item==ad.pos_left(1) % Inserting item is first one - ad.pos_left=[ad.pos_right(selection(i));ad.pos_left]; - ad.fromstring=[ad.tostring(selection(i)) ad.fromstring]; - else % Inserting item is anywhere in the middle - ad.pos_left=[ad.pos_left(1:next_item-1);ad.pos_right(selection(i));ad.pos_left(next_item:end)]; - ad.fromstring=[ad.fromstring(1:next_item-1) ad.tostring(selection(i)) ad.fromstring(next_item:end)]; - end - end -end -ad.pos_right(selection)=[]; -ad.tostring(selection)=[]; -setappdata(0,'ListDialogAppData',ad) -set(leftbox,'String',ad.fromstring,'Value',[]); -set(rightbox,'String',ad.tostring,'Value',[]); \ No newline at end of file diff --git a/matlab/linkage/dendrogram_alpha.m b/matlab/linkage/dendrogram_alpha.m deleted file mode 100644 index 94af715..0000000 --- a/matlab/linkage/dendrogram_alpha.m +++ /dev/null @@ -1,379 +0,0 @@ -function [T,v] = dendrogram_alpha(Z,varargin) -%ADDED FEATURE -true=logical(1); -false=logical(0); - -%DENDROGRAM Generate dendrogram plot. -% DENDROGRAM(Z) generates a dendrogram plot of the hierarchical -% binary cluster tree Z. Z is an (M-1)-by-3 matrix, generated -% by the LINKAGE function, where M is the number of objects in the -% original dataset. -% -% A dendrogram consists of many U-shaped lines connecting objects -% in a hierarchical tree. Except for the Ward linkage (see LINKAGE), -% the height of each U represents the distance between the two -% objects being connected. -% -% DENDROGRAM(Z,P) generates a dendrogram with only the top P nodes. -% By default, DENDROGRAM uses 30 as the value of P. When there are -% more than 30 initial nodes, a dendrogram may look crowded. To -% display every node, set P = 0. -% -% H = DENDROGRAM(...) returns a vector of line handles. -% -% [H,T] = DENDROGRAM(...) generates a dendrogram and returns T, a vector of -% size M that contains the leaf node number for each object in the original -% dataset. T is useful when P is less than the total number of objects, so -% some leaf nodes in the display correspond to multiple objects. For -% example, to find out which objects are contained in leaf node k of the -% dendrogram, use find(T==k). When there are fewer than P objects in the -% original data, all objects are displayed in the dendrogram. In this case, -% T is the identity map, i.e., T = (1:M)', where each node contains only itself. -% -% [H,T,PERM] = DENDROGRAM(...) generates a dendrogram and returns -% the permutation vector of the node labels of the leaves of the -% dendrogram. PERM is ordered from left to right on a horizontal dendrogram -% and bottom to top for a vertical dendrogram. -% -% H = DENDROGRAM(...,'COLORTHRESHOLD',T) assigns a unique color -% to each group of nodes within the dendrogram whose linkage is less than -% the scalar value T where T is in the range 0 < T < max(Z(:,3)). If T is -% less than or equal to zero or if T is greater than the maximum linkage then -% the dendrogram will be drawn using only one color. T can also be set to -% 'default' in which case the threshold will be set to 70% of the maximum -% linkage i.e. 0.7 * max(Z(:,3)). -% -% H = DENDROGRAM(...,'ORIENTATION',ORIENT) will orient the dendrogram -% within the figure window. Options are: -% -% 'top' --- top to bottom (default) -% 'bottom' --- bottom to top -% 'left' --- left to right -% 'right' --- right to left -% -% Example: -% -% rand('seed',12); -% X = rand(100,2); -% Y = pdist(X,'cityblock'); -% Z = linkage(Y,'average'); -% [H, T] = dendrogram(Z); -% -% See also LINKAGE, PDIST, CLUSTER, CLUSTERDATA, INCONSISTENT. - -% Copyright 1993-2002 The MathWorks, Inc. -% $Revision: 1.15 $ - - -m = size(Z,1)+1; -if nargin < 2 - p = 30; -end - -if nargin == 2 - p = varargin{1}; -end - -orientation = 'd'; -horz = false; -color = false; -threshold = 0.7 * max(Z(:,3)); - -if nargin > 2 - if isnumeric(varargin{1}) - p = varargin{1}; - offset = 1; - else - p = 30; - offset = 0; - end - - if rem(nargin - offset,2)== 0 - error('Incorrect number of arguments to DENDROGRAM.'); - end - okargs = strvcat('orientation','colorthreshold'); - for j=(1 + offset):2:nargin-2 - pname = varargin{j}; - pval = varargin{j+1}; - k = strmatch(lower(pname), okargs); - if isempty(k) - error(sprintf('Unknown parameter name: %s.',pname)); - elseif length(k)>1 - error(sprintf('Ambiguous parameter name: %s.',pname)); - else - switch(k) - case 1 % orientation - if ~isempty(pval) & ischar(pval) - orientation = lower(pval(1)); - end - if ~ismember(orientation,{'t','b','d','r','l'}) - orientation = 'd'; - warning('Unknown orientation specified, using ''top''.'); - end - if ismember(orientation,{'r','l'}) - horz = true; - end - case 2 % colorthreshold - color = true; - if ischar(pval) - if ~strmatch(lower(pval),'default') - warning('Unknown threshold specified, using default'); - end - end - if isnumeric(pval) - threshold = pval; - end - end - end - end -end -Z = transz(Z); % convert from m+k indexing to min(i,j) indexing. -T = (1:m)'; - -% if there are more than p node, dendrogram looks crowded, the following code -% will make the last p link nodes as the leaf node. -if (m > p) & (p ~= 0) - - Y = Z((m-p+1):end,:); - - R = Y(:,1:2); - R = unique(R(:)); - Rlp = R(R<=p); - Rgp = R(R>p); - W(Rlp) = Rlp; - W(Rgp) = setdiff(1:p, Rlp); - W = W'; - T(R) = W(R); - - % computer all the leaf that each node (in the last 30 row) has - for i = 1:p - c = R(i); - T = clusternum(Z,T,W(c),c,m-p+1,0); % assign to it's leaves. - end - - - Y(:,1) = W(Y(:,1)); - Y(:,2) = W(Y(:,2)); - Z = Y; - - m = p; % reset the number of node to be 30 (row number = 29). -end - -A = zeros(4,m-1); -B = A; -n = m; -X = 1:n; -Y = zeros(n,1); -r = Y; - -% arrange Z into W so that there will be no crossing in the dendrogram. -W = zeros(size(Z)); -W(1,:) = Z(1,:); - -nsw = zeros(n,1); rsw = nsw; -nsw(Z(1,1:2)) = 1; rsw(1) = 1; -k = 2; s = 2; - -while (k < n) - i = s; - while rsw(i) | ~any(nsw(Z(i,1:2))) - if rsw(i) & i == s, s = s+1; end - i = i+1; - end - - W(k,:) = Z(i,:); - nsw(Z(i,1:2)) = 1; - rsw(i) = 1; - if s == i, s = s+1; end - k = k+1; -end - -g = 1; -for k = 1:m-1 % initialize X - i = W(k,1); - if ~r(i), - X(i) = g; - g = g+1; - r(i) = 1; - end - i = W(k,2); - if ~r(i), - X(i) = g; - g = g+1; - r(i) = 1; - end -end -[u,v]=sort(X); -% v is the third output value (PERM) -label = num2str(v'); - -% set up the color - -numColors = 1;theGroups = 1; -groups = 0; -cmap = [0 0 1]; - -if color - groups = sum(Z(:,3)< threshold); - if groups > 1 & groups < (m-1) - theGroups = zeros(m-1,1); - numColors = 0; - for count = groups:-1:1 - if (theGroups(count) == 0) - P = zeros(m-1,1); - P(count) = 1; - P = colorcluster(Z,P,Z(count,1),count); - P = colorcluster(Z,P,Z(count,2),count); - numColors = numColors + 1; - theGroups(logical(P)) = numColors; - end - end - cmap = hsv(numColors); - cmap(end+1,:) = [0 0 0]; - else - groups = 1; - end - -end - -% ---------------------------------------------------------- -% if isempty(get(0,'CurrentFigure')) | ishold -% figure; -% set(gcf,'Position', [50, 50, 800, 500]); -% else -% newplot; -% end -% -% col = zeros(m-1,3); -% h = zeros(m-1,1); -% -% for n = 1:(m-1) -% i = Z(n,1); j = Z(n,2); w = Z(n,3); -% A(:,n) = [X(i) X(i) X(j) X(j)]'; -% B(:,n) = [Y(i) w w Y(j)]'; -% X(i) = (X(i)+X(j))/2; Y(i) = w; -% if n <= groups -% col(n,:) = cmap(theGroups(n),:); -% else -% col(n,:) = cmap(end,:); -% end -% end -% -% -% ymin = min(Z(:,3)); -% ymax = max(Z(:,3)); -% margin = (ymax - ymin) * 0.05; -% n = size(label,1); -% -% if(~horz) -% for count = 1:(m-1) -% h(count) = line(A(:,count),B(:,count),'color',col(count,:)); -% end -% lims = [0 m+1 max(0,ymin-margin) (ymax+margin)]; -% set(gca, 'Xlim', [.5 ,(n +.5)], 'XTick', 1:n, 'XTickLabel', label, ... -% 'Box', 'off'); -% mask = logical([0 0 1 1]); -% if strcmp(orientation,'b') -% set(gca,'XAxisLocation','top','Ydir','reverse'); -% end -% else -% for count = 1:(m-1) -% h(count) = line(B(:,count),A(:,count),'color',col(count,:)); -% end -% lims = [max(0,ymin-margin) (ymax+margin) 0 m+1 ]; -% set(gca, 'Ylim', [.5 ,(n +.5)], 'YTick', 1:n, 'YTickLabel', label, ... -% 'Box', 'off'); -% mask = logical([1 1 0 0]); -% if strcmp(orientation, 'l') -% set(gca,'YAxisLocation','right','Xdir','reverse'); -% end -% end -% -% if margin==0 -% if ymax~=0 -% lims(mask) = ymax * [0 1.25]; -% else -% lims(mask) = [0 1]; -% end -% end -% axis(lims); - -% ----------------------------------------------------------- -function T = clusternum(X, T, c, k, m, d) -% assign leaves under cluster c to c. - -d = d+1; -n = m; flag = 0; -while n > 1 - n = n-1; - if X(n,1) == k % node k is not a leave, it has subtrees - T = clusternum(X, T, c, k, n,d); % trace back left subtree - T = clusternum(X, T, c, X(n,2), n,d); - flag = 1; break; - end -end - -n = size(X,1); -if flag == 0 & d ~= 1 % row m is leaf node. - T(X(m,1)) = c; - T(X(m,2)) = c; -end -% --------------------------------------- -function T = colorcluster(X, T, k, m) -% find local clustering - -n = m; -while n > 1 - n = n-1; - if X(n,1) == k % node k is not a leave, it has subtrees - T = colorcluster(X, T, k, n); % trace back left subtree - T = colorcluster(X, T, X(n,2), n); - break; - end -end -T(m) = 1; -% --------------------------------------- -function Z = transz(Z) -%TRANSZ Translate output of LINKAGE into another format. -% This is a helper function used by DENDROGRAM and COPHENET. - -% In LINKAGE, when a new cluster is formed from cluster i & j, it is -% easier for the latter computation to name the newly formed cluster -% min(i,j). However, this definition makes it hard to understand -% the linkage information. We choose to give the newly formed -% cluster a cluster index M+k, where M is the number of original -% observation, and k means that this new cluster is the kth cluster -% to be formmed. This helper function converts the M+k indexing into -% min(i,j) indexing. - -m = size(Z,1)+1; - -for i = 1:(m-1) - if Z(i,1) > m - Z(i,1) = traceback(Z,Z(i,1)); - end - if Z(i,2) > m - Z(i,2) = traceback(Z,Z(i,2)); - end - if Z(i,1) > Z(i,2) - Z(i,1:2) = Z(i,[2 1]); - end -end - - -function a = traceback(Z,b) - -m = size(Z,1)+1; - -if Z(b-m,1) > m - a = traceback(Z,Z(b-m,1)); -else - a = Z(b-m,1); -end -if Z(b-m,2) > m - c = traceback(Z,Z(b-m,2)); -else - c = Z(b-m,2); -end - -a = min(a,c); diff --git a/matlab/linkage/encodealn.m b/matlab/linkage/encodealn.m deleted file mode 100644 index 6d4a1be..0000000 --- a/matlab/linkage/encodealn.m +++ /dev/null @@ -1,31 +0,0 @@ -function [aln2] = encodealn(aln) -%ENCODEALN - Convert nucleotide in alignment to integer. -% -% Syntax: [aln2] = encodealn(aln) -% -% Inputs: -% aln - Alignment structure letter representation -% -% Outputs: -% aln2 - Alignment structure integer representation -% -% -% See also: CODONISESEQ, ENCODESEQ - -% Molecular Biology & Evolution Toolbox, (C) 2005 -% Author: James J. Cai -% Email: jamescai@hkusua.hku.hk -% Website: http://web.hku.hk/~jamescai/ -% Last revision: 5/28/2005 - -if ~(aln.seqtype) error('Do not know the type of sequence! ... BAPS'); end -aln2=aln; - -switch (aln2.seqtype) - case (1) - aln2.seq = i_encode_n(aln2.seq); - case (2) - aln2.seq = i_encode_n(aln2.seq); - case (3) - aln2.seq = i_encode_a(aln2.seq); -end \ No newline at end of file diff --git a/matlab/linkage/i_encode_n.m b/matlab/linkage/i_encode_n.m deleted file mode 100644 index cd3e619..0000000 --- a/matlab/linkage/i_encode_n.m +++ /dev/null @@ -1,61 +0,0 @@ -function S=i_encode_n(Seq) -%I_ENCODE_N - Convert a nucleotide sequence from a letter to an integer representation -%Internal function encodes the nucleotide sequences by digits -% -% Syntax: S=i_encode_n(Seq) -% -% Inputs: -% Seq - Letter representation of sequence -% -% Outputs: -% S - Integer representation of sequence -% -% -% See also: I_ENCODE_A - -% Molecular Biology & Evolution Toolbox, (C) 2005 -% Author: James J. Cai -% Email: jamescai@hkusua.hku.hk -% Website: http://web.hku.hk/~jamescai/ -% Last revision: 5/28/2005 -% seqcode = 'ACGTDI?-' -method=1; -Seq=upper(Seq); % Read lower case letters -[NT,AA] = seqcode; % NT = 'ACGTDI?-' -switch (method) - case (1) - [n,m]=size(Seq); - S = ones(n,m).*8; - Seq(find(Seq=='U'))='T'; %replace U with T - for (k=1:8), - S(find(Seq==NT(k)))=k; - end - S(find(Seq==NT(7))) = -999; % missing data denoted as -999. - S(find(Seq==NT(8))) = -999; % gap - case (2) - [n,m]=size(Seq); - S = zeros(n,m); i = 1; j = 1; - - code = zeros(256,1); - for o = 1:256 - code(o) = nan; - end - - % NT = 'ACGT-'; - - for o = 1:5 - code(abs(NT(o))) = o; - end - - for i=1:n - for j=1:m - if Seq(i,j) == 'U' - S(i,j) = code(abs('T')); - else - S(i,j) = code(abs(Seq(i,j))); - end; - end; - end; -end - -% S=uint8(S); \ No newline at end of file diff --git a/matlab/linkage/linkageMix.m b/matlab/linkage/linkageMix.m deleted file mode 100644 index f552d48..0000000 --- a/matlab/linkage/linkageMix.m +++ /dev/null @@ -1,1277 +0,0 @@ -function [logml, npops, partitionSummary] = linkageMix(c,npopsTable) -% Greedy search algorithm with unknown number of classes for linkage -% clustering. - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -global LOGDIFF; -clearGlobalVars; - -noalle = c.noalle; -adjprior = c.adjprior; %priorTerm = c.priorTerm; -rowsFromInd = c.rowsFromInd; -counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; -counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; - -if isfield(c,'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - -ninds = size(counts_cq,3); - -if nargin < 2 - - npopstext = []; - ready = false; - teksti = 'Input upper bound to the number of populations (possibly multiple values): '; - while ready == false - - if ninds>20 - default = 20; - else - default = floor(ninds/2); - end - - npopstextExtra = inputdlg(teksti ,... - 'Input maximum number of populations',1,{num2str(default)}); - % ------------------------------------------------------------------- - drawnow - if isempty(npopstextExtra) % cancel has been pressed - dispCancel - return - end - npopstextExtra = npopstextExtra{1}; - if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; - else - % ----------------------------------------------------- - % Set the limit of the input value. - % Modified by Jing Tang, 30.12.2005 - if str2num(npopstextExtra) > ninds - teksti = 'Values larger than the sample size are not accepted. Input new value: '; - else - npopstext = [npopstext ' ' npopstextExtra]; - ready = true; - end - end - end - clear ready; clear teksti; - if isempty(npopstext) || length(npopstext)==1 - return - else - npopsTable = str2num(npopstext); - % ykkoset = find(npopsTable==1); - npopsTable(logical(npopsTable==1)) = []; - if isempty(npopsTable) - logml = 1; partitionSummary=1; npops=1; - return - end - % clear ykkoset; - end -end - -nruns = length(npopsTable); - -logmlBest = -1e50; -partitionSummary = -1e50*ones(100,2); % 100 best partitions (npops and logml) -partitionSummary(:,1) = zeros(100,1); -worstLogml = -1e50; worstIndex = 1; - -for run = 1:nruns - npops = npopsTable(run); - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - - initialPartition = admixture_initialization(npops, Z); - PARTITION = initialPartition; - [cq_counts, cq_sumcounts] = initialCounts(counts_cq); - % clear counts_cq; - CQ_COUNTS = cq_counts; clear cq_counts; - CQ_SUMCOUNTS = cq_sumcounts; clear cq_sumcounts; - [sp_counts, sp_sumcounts] = initialCounts(counts_sp); - % clear counts_sp; - SP_COUNTS = sp_counts; clear sp_counts; - SP_SUMCOUNTS = sp_sumcounts; clear sp_sumcounts; - - logml = computeLogml(adjprior_cq, adjprior_sp); - POP_LOGML = computePopulationLogml(1:npops,adjprior_cq, adjprior_sp); - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - - %%%% - % Finding the best partition with the greedy search algorithm - %%%% - nRoundTypes = 7; - tested = zeros(nRoundTypes,1); - roundTypes = [1 1]; - ready = 0; phase = 1; - ninds = length(PARTITION); % number of individuals - LOGDIFF = repmat(-Inf,ninds,npops); - - disp(' '); - disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); - - while ready ~= 1 - changesMade = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - round = roundTypes(n); - % pack; - if tested(round) == 1 - - elseif round==1 % Moving one individual to another population - inds = randperm(ninds); % random order - changesMadeNow = 0; - for ind = inds - i1 = PARTITION(ind); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - [maxChange, i2] = max(changesInLogml); - - - if (i1~=i2 && maxChange>1e-5) - % Individual is moved - changesMade = 1; - if changesMadeNow == 0 - disp('action 1'); - changesMadeNow = 1; - tested = zeros(nRoundTypes,1); - end - updateGlobalVariables(ind, i2, indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml+maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - end - end - - if changesMadeNow == 0 - tested(round) = 1; - end - - elseif round==2 % Combining two populationgs - maxChange = 0; - for pop = 1:npops - changesInLogml = computeChanges2(pop, adjprior_cq, adjprior_sp); - [biggest, index] = max(changesInLogml); - if biggest>maxChange - maxChange = biggest; - i1 = pop; - i2 = index; - end - end - - if maxChange>1e-5 - disp('action 2'); - changesMade = 1; - tested = zeros(nRoundTypes,1); - updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp); - logml = logml + maxChange; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - tested(round) = 1; - end - - - elseif round==3 || round==4 % Splitting population into smaller groups - maxChange = 0; - - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>5 - % Computing the distance between individuals inds2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - - - % Number of groups: - if round==3 - npops2 = min(20, floor(ninds2 / 5)); - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - [biggest, index] = max(changesInLogml(1:end)); - if biggest > maxChange - maxChange = biggest; - movingGroup = rem(index,npops2); % The group, which is moved - if movingGroup==0, movingGroup = npops2; end - movingInds = inds2(logical(T2==movingGroup)); - i2 = ceil(index/npops2); % pop where movingGroup would be moved - end - end - end - if maxChange>1e-5 - changesMade = 1; - tested = zeros(nRoundTypes,1); - if round==3 - disp('action 3'); - else - disp('action 4'); - end - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, i2,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml + maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - tested(round) = 1; - end - - elseif round == 5 || round == 6 - %Moving individuals out of population until positive change - %in logml has occured - pop=0; - changesMadeNow = 0; - %Saving old values - poplogml = POP_LOGML; - partition = PARTITION; - cq_counts = CQ_COUNTS; - sp_counts = SP_COUNTS; - cq_sumcounts = CQ_SUMCOUNTS; - sp_sumcounts = SP_SUMCOUNTS; - logdiff = LOGDIFF; - - while (pop < npops && changesMadeNow == 0) - pop = pop+1; - totalChangeInLogml = 0; - inds = find(PARTITION==pop); - if round == 5 - %Random order - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp); - end - - i=0; - - while (length(inds)>0 && i 1e-5 - i=length(inds); - end - end - end - - if totalChangeInLogml>1e-5 - if round == 5 - disp('action 5'); - elseif round == 6 - disp('action 6'); - end - tested = zeros(nRoundTypes,1); - changesMadeNow=1; - changesMade = 1; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - logml = logml - totalChangeInLogml; - end - end - clear partition; clear poplogml; - if changesMadeNow == 0 - tested(round) = 1; - end - - elseif round == 7 - emptyPop = findEmptyPop(npops); - j = 0; - pops = randperm(npops); - % totalChangeInLogml = 0; - if emptyPop == -1 - j = npops; - end - changesMadeNow = 0; - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION == pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - cq_sumcounts = CQ_SUMCOUNTS; - cq_counts = CQ_COUNTS; - sp_sumcounts = SP_SUMCOUNTS; - sp_counts = SP_COUNTS; - poplogml = POP_LOGML; - logdiff = LOGDIFF; - % pack; - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - T2 = cluster_own(Z2, 2); - % movingInds = inds2(find(T2 == 1)); - movingInds = inds2(logical(T2 == 1)); - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - totalChangeInLogml = changesInLogml(1, emptyPop); - - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, emptyPop,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - - changed = 1; - - while (changed == 1) - changed = 0; - - changesInLogml = computeChanges5(inds2, pop, emptyPop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - [maxChange, index] = max(changesInLogml); - moving = inds2(index); - if (PARTITION(moving) == pop) - i2 = emptyPop; - else - i2 = pop; - end - - if maxChange > 1e-5 - indCqCounts = uint16(counts_cq(:,:,moving)); - indSpCounts = uint16(counts_sp(:,:,moving)); - updateGlobalVariables3(moving, i2,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - changed = 1; - totalChangeInLogml = totalChangeInLogml + maxChange; - end - end - - if totalChangeInLogml > 1e-5 - changesMade = 1; - changesMadeNow = 1; - logml = logml + totalChangeInLogml; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - disp('action 7'); - tested = zeros(nRoundTypes, 1); - j = npops; - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - %logml = logml - totalChangeInLogml; - end - end - end - if changesMadeNow == 0 - tested(round) = 1; - end - end - - end - - - if changesMade == 0 - if phase==1 - phase = 2; - elseif phase==2 - phase = 3; - elseif phase==3 - phase = 4; - elseif phase==4; - phase = 5; - elseif phase==5 - ready = 1; - end - else - changesMade = 0; - end - - if ready==0 - if phase==1 - roundTypes=[1]; - elseif phase==2 - roundTypes=[2]; - elseif phase==3 - roundTypes=[5 5 7]; - elseif phase==4 - roundTypes=[4 3 1 1]; - elseif phase==5 - roundTypes=[6 2 7 3 4 1]; - end - end - - end - % Saving results - - npops = removeEmptyPops; - POP_LOGML = computePopulationLogml(1:npops, adjprior_cq, adjprior_sp); - - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - if logml>logmlBest - % Updating the best found partition - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - cq_countsBest = CQ_COUNTS; - sp_countsBest = SP_COUNTS; - cq_sumcountsBest = CQ_SUMCOUNTS; - sp_sumcountsBest = SP_SUMCOUNTS; - pop_logmlBest = POP_LOGML; - logdiffbest = LOGDIFF; - end -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -CQ_COUNTS = cq_countsBest; -SP_COUNTS = sp_countsBest; -CQ_SUMCOUNTS = cq_sumcountsBest; -SP_SUMCOUNTS = sp_sumcountsBest; -POP_LOGML = pop_logmlBest; -LOGDIFF = logdiffbest; - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -npops = size(CQ_COUNTS,3); -changes = LOGDIFF(ind,:); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -changes(i1) = 0; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - -i2 = find(changes==-Inf); -i2 = setdiff(i2,i1); -i2_logml = POP_LOGML(i2); - -ni2 = length(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[ni2 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[ni2 1]); -% a = repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; -LOGDIFF(ind,:) = changes; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(logical(T2==pop2)); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(logical(PARTITION==i1)) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - -function npops = removeEmptyPops -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; -global LOGDIFF; - -notEmpty = find(any(CQ_SUMCOUNTS,2)); -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); -SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) -% apu = find(PARTITION==notEmpty(n)); -% PARTITION(apu)=n; -PARTITION(logical(PARTITION==notEmpty(n))) = n; -end -npops = length(notEmpty); - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = initialCounts(ind_counts) - -global PARTITION; - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = find(PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end - -%-------------------------------------------------------------------------- - -function logml = computeLogml(adjprior_cq, adjprior_sp) - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -npops = size(CQ_COUNTS, 3); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS,1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - - diff --git a/matlab/linkage/linkageMix_fixK.m b/matlab/linkage/linkageMix_fixK.m deleted file mode 100644 index 15cff7c..0000000 --- a/matlab/linkage/linkageMix_fixK.m +++ /dev/null @@ -1,1359 +0,0 @@ -function [logml, npops, partitionSummary] = linkageMix_fixK(c,npops,nruns) -% Greedy search algorithm with fixed number of classes for linkage -% clustering. - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -global LOGDIFF; -clearGlobalVars; - -noalle = c.noalle; -adjprior = c.adjprior; %priorTerm = c.priorTerm; -rowsFromInd = c.rowsFromInd; -counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; -counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; - -if isfield(c,'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - -ninds = size(counts_cq,3); - -if nargin < 2 - npopstext = []; - - if ninds>20 - default = 20; - else - default = floor(ninds/2); - end - - teksti = {'Number of populations:', ... - 'Number of runs:'}; - def = {num2str(default), '1'}; - - npopstextExtra = inputdlg(teksti ,... - 'Input parameters for the computation algorithm',1,def); - - if isempty(npopstextExtra) % cancel has been pressed - dispCancel - logml = 1; partitionSummary=1; npops=1; - return - end - npopstext = npopstextExtra{1}; - nrunstext = npopstextExtra{2}; - - clear teksti npopstextExtra; - if isempty(npopstext) - return - else - npopsTable = str2num(npopstext); - npops = npopsTable(1); - - if npops==1 - logml = 1; partitionSummary=1; npops=1; - return - end - nrunsTable = str2num(nrunstext); - nruns = nrunsTable(1); - end -end - -logmlBest = -1e50; -partitionSummary = -1e50*ones(100,2); % 100 best partitions (npops and logml) -partitionSummary(:,1) = zeros(100,1); -worstLogml = -1e50; worstIndex = 1; - -for run = 1:nruns - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - - initialPartition = admixture_initialization(npops, Z); - PARTITION = initialPartition; - [cq_counts, cq_sumcounts] = initialCounts(counts_cq); - % clear counts_cq; - CQ_COUNTS = cq_counts; clear cq_counts; - CQ_SUMCOUNTS = cq_sumcounts; clear cq_sumcounts; - [sp_counts, sp_sumcounts] = initialCounts(counts_sp); - % clear counts_sp; - SP_COUNTS = sp_counts; clear sp_counts; - SP_SUMCOUNTS = sp_sumcounts; clear sp_sumcounts; - - logml = computeLogml(adjprior_cq, adjprior_sp); - POP_LOGML = computePopulationLogml(1:npops,adjprior_cq, adjprior_sp); - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - - %%%% - % Finding the best partition with the greedy search algorithm - %%%% - nRoundTypes = 7; - tested = zeros(nRoundTypes,1); - roundTypes = [1 1]; - ready = 0; phase = 1; - ninds = length(PARTITION); % number of individuals - LOGDIFF = repmat(-Inf,ninds,npops); - - disp(' '); - disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); - - while ready ~= 1 - changesMade = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - round = roundTypes(n); - % pack; - if tested(round) == 1 - - elseif round==1 % Moving one individual to another population - inds = randperm(ninds); % random order - changesMadeNow = 0; - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - [maxChange, i2] = max(changesInLogml); - - - if (i1~=i2 && maxChange>1e-5) - % Individual is moved - changesMade = 1; - if changesMadeNow == 0 - disp('Action 1'); - changesMadeNow = 1; - tested = zeros(nRoundTypes,1); - end - updateGlobalVariables(ind, i2, indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml+maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - end - end - end - - if changesMadeNow == 0 - tested(round) = 1; - end - - elseif round==2 % Merging two populations and splitting the result - maxChange = -1e50; - poplogml = POP_LOGML; - partition = PARTITION; - cq_counts = CQ_COUNTS; - sp_counts = SP_COUNTS; - cq_sumcounts = CQ_SUMCOUNTS; - sp_sumcounts = SP_SUMCOUNTS; - logdiff = LOGDIFF; - - % Two populations are merged first - - for pop = 1:npops - changesInLogml = computeChanges2(pop, adjprior_cq, adjprior_sp); - changesInLogml(pop)=-1e50; - [biggest, index] = max(changesInLogml); - if biggest>maxChange - maxChange = biggest; - i1 = pop; - i2 = index; - end - end - - totalChange = maxChange; - updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp); - - - % A population is split in two - - emptyPop = i1; - maxChange = -1e50; - - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>1 - % Computing the distance between individuals inds2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - npops2 = 2; - T2 = cluster_own(Z2, npops2); - - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - biggest = changesInLogml(1,emptyPop); - if biggest > maxChange - maxChange = biggest; - movingInds = inds2(logical(T2==1)); - end - end - end - - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, emptyPop,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - totalChange = totalChange + maxChange; - - % Individuals are moved between populations - inds = randperm(ninds); % random order - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - [maxChange, i2] = max(changesInLogml); - if (i1~=i2 && maxChange>1e-5) - updateGlobalVariables(ind, i2, indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - totalChange = totalChange + maxChange; - end - end - end - - if totalChange > 1e-5 - disp('Action 2'); - logml = logml + totalChange; - tested = zeros(nRoundTypes,1); - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - - else - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - tested(round) = 1; - end - - - elseif round==3 || round==4 % Splitting population into smaller groups - maxChange = 0; - - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>5 - % Computing the distance between individuals inds2 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - - - % Number of groups: - if round==3 - npops2 = max(min(20, floor(ninds2/2)),2); - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - [biggest, index] = max(changesInLogml(1:end)); - if biggest > maxChange - maxChange = biggest; - movingGroup = rem(index,npops2); % The group, which is moved - if movingGroup==0, movingGroup = npops2; end - movingInds = inds2(logical(T2==movingGroup)); - i2 = ceil(index/npops2); % pop where movingGroup would be moved - end - end - end - if maxChange>1e-5 - changesMade = 1; - tested = zeros(nRoundTypes,1); - if round==3 - disp('Action 3'); - else - disp('Action 4'); - end - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, i2,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - logml = logml + maxChange; - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - tested(round) = 1; - end - - elseif round == 5 || round == 6 - %Moving individuals out of population until positive change - %in logml has occured - pop=0; - changesMadeNow = 0; - %Saving old values - poplogml = POP_LOGML; - partition = PARTITION; - cq_counts = CQ_COUNTS; - sp_counts = SP_COUNTS; - cq_sumcounts = CQ_SUMCOUNTS; - sp_sumcounts = SP_SUMCOUNTS; - logdiff = LOGDIFF; - - while (pop < npops && changesMadeNow == 0) - pop = pop+1; - totalChangeInLogml = 0; - inds = find(PARTITION==pop); - if round == 5 - %Random order - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp); - end - - i=0; - - while (length(inds)>0 && i 1e-5 - i=length(inds); - end - end - end - - if totalChangeInLogml>1e-5 - if round == 5 - disp('Action 5'); - elseif round == 6 - disp('Action 6'); - end - tested = zeros(nRoundTypes,1); - changesMadeNow=1; - changesMade = 1; - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - logml = logml - totalChangeInLogml; - end - end - clear partition; clear poplogml; - if changesMadeNow == 0 - tested(round) = 1; - end - - elseif round == 7 - emptyPop = npops + 1; - j = 0; - pops = randperm(npops); - changesMadeNow = 0; - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION == pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - cq_sumcounts = CQ_SUMCOUNTS; - cq_counts = CQ_COUNTS; - sp_sumcounts = SP_SUMCOUNTS; - sp_counts = SP_COUNTS; - poplogml = POP_LOGML; - logdiff = LOGDIFF; - - % A new population is created temporarily - npops = npops + 1; - POP_LOGML(npops) = 0; - CQ_COUNTS(:,:,npops) = zeros(size(CQ_COUNTS(:,:,1))); - CQ_SUMCOUNTS(npops,:) = zeros(size(CQ_SUMCOUNTS(1,:))); - SP_COUNTS(:,:,npops) = zeros(size(SP_COUNTS(:,:,1))); - SP_SUMCOUNTS(npops,:) = zeros(size(SP_SUMCOUNTS(1,:))); - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = computeLinkage(dist2'); - T2 = cluster_own(Z2, 2); - % movingInds = inds2(find(T2 == 1)); - movingInds = inds2(logical(T2 == 1)); - changesInLogml = computeChanges3(T2, inds2, pop, ... - counts_cq, counts_sp, adjprior_cq, adjprior_sp); - totalChangeInLogml = changesInLogml(1, emptyPop); - - indCqCounts = uint16(sum(counts_cq(:,:,movingInds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,movingInds),3)); - updateGlobalVariables3(movingInds, emptyPop,indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - - % Individuals are moved between populations - inds = randperm(ninds); % random order - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - [maxChange, i2] = max(changesInLogml); - if (i1~=i2 && maxChange>1e-5) - updateGlobalVariables(ind, i2, indCqCounts, ... - indSpCounts, adjprior_cq, adjprior_sp); - totalChangeInLogml = totalChangeInLogml + maxChange; - end - end - end - - % Two populations are merged - if length(find(any(CQ_SUMCOUNTS,2))) == npops - maxChange = -1e50; - for pop = 1:npops - changesInLogml = computeChanges2(pop, adjprior_cq, adjprior_sp); - changesInLogml(pop)=-1e50; - [biggest, index] = max(changesInLogml); - if biggest>maxChange - maxChange = biggest; - i1 = pop; - i2 = index; - end - end - - totalChangeInLogml = totalChangeInLogml + maxChange; - updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp); - - end - - if totalChangeInLogml > 1e-5 - changesMade = 1; - - logml = logml + totalChangeInLogml; - - npops = removeEmptyPops; % The temporary population is removed - POP_LOGML = computePopulationLogml(1:npops, adjprior_cq, adjprior_sp); - - if logml>worstLogml - [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex); - if (added==1) - [worstLogml, worstIndex] = min(partitionSummary(:,2)); - end - end - if changesMadeNow == 0 - disp('Action 7'); - changesMadeNow = 1; - end - changesMadeNow = 1; - tested = zeros(nRoundTypes, 1); - j = npops; - else - % No better partition was found, restoring the old - % values - PARTITION = partition; - POP_LOGML = poplogml; - CQ_COUNTS = cq_counts; - SP_COUNTS = sp_counts; - CQ_SUMCOUNTS = cq_sumcounts; - SP_SUMCOUNTS = sp_sumcounts; - LOGDIFF = logdiff; - npops = npops-1; - end - end - end - if changesMadeNow == 0 - tested(round) = 1; - end - end - - end - - - if changesMade == 0 - if phase==1 - phase = 2; - elseif phase==2 - phase = 3; - elseif phase==3 - phase = 4; - elseif phase==4; - phase = 5; - elseif phase==5 - ready = 1; - end - else - changesMade = 0; - end - - if ready==0 - if phase==1 - roundTypes=[1]; - elseif phase==2 - roundTypes=[2]; - elseif phase==3 - roundTypes=[5 5 7]; - elseif phase==4 - roundTypes=[4 3 1 1]; - elseif phase==5 - roundTypes=[6 2 7 3 4 1]; - end - end - - end - % Saving results - - npops = removeEmptyPops; - POP_LOGML = computePopulationLogml(1:npops, adjprior_cq, adjprior_sp); - - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - if logml>logmlBest - % Updating the best found partition - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - cq_countsBest = CQ_COUNTS; - sp_countsBest = SP_COUNTS; - cq_sumcountsBest = CQ_SUMCOUNTS; - sp_sumcountsBest = SP_SUMCOUNTS; - pop_logmlBest = POP_LOGML; - logdiffbest = LOGDIFF; - end -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -CQ_COUNTS = cq_countsBest; -SP_COUNTS = sp_countsBest; -CQ_SUMCOUNTS = cq_sumcountsBest; -SP_SUMCOUNTS = sp_sumcountsBest; -POP_LOGML = pop_logmlBest; -LOGDIFF = logdiffbest; - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -npops = size(CQ_COUNTS,3); -changes = LOGDIFF(ind,:); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -changes(i1) = 0; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - -i2 = find(changes==-Inf); -i2 = setdiff(i2,i1); -i2_logml = POP_LOGML(i2); - -ni2 = length(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[ni2 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 ni2]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[ni2 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 ni2]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[ni2 1]); -% a = repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; -LOGDIFF(ind,:) = changes; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(logical(T2==pop2)); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(logical(PARTITION==i1)) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -global LOGDIFF; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - -function npops = removeEmptyPops -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; -global LOGDIFF; - -notEmpty = find(any(CQ_SUMCOUNTS,2)); -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); -SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) -% apu = find(PARTITION==notEmpty(n)); -% PARTITION(apu)=n; -PARTITION(logical(PARTITION==notEmpty(n))) = n; -end -npops = length(notEmpty); - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = initialCounts(ind_counts) - -global PARTITION; - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = find(PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end - -%-------------------------------------------------------------------------- - -function logml = computeLogml(adjprior_cq, adjprior_sp) - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -npops = size(CQ_COUNTS, 3); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS, 1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - - diff --git a/matlab/linkage/linkageMixture_speed.m b/matlab/linkage/linkageMixture_speed.m deleted file mode 100644 index e393726..0000000 --- a/matlab/linkage/linkageMixture_speed.m +++ /dev/null @@ -1,2130 +0,0 @@ -function linkageMixture_speed -base = findobj('Tag','base_figure'); - - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% check whether partition compare mode is selected -h1 = findobj('Tag','partitioncompare_menu'); -partitionCompare = get(h1, 'userdata'); - -% Data handling -input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'MLST-format', 'BAPS-format','Pre-processed data', 'MLST-format'); - -switch input_type - - case 'MLST-format' - - %waitALittle; - mlst_type = questdlg('Choose data type: ',... - 'Specify MLST format', ... - 'Separate allelic profiles(TXT)', ... - 'Concatenate allelic sequences(EXCEL)','Separate allelic profiles(TXT)'); - switch mlst_type - case 'Concatenate allelic sequences(EXCEL)' - %waitALittle - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.xls','Load new concatenate sequence profile(*.xls)'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - display('---------------------------------------------------'); - display(['Reading sequence profile from: ',[pathname filename],'...']); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); - [data, component_mat, popnames] = processxls([pathname filename]); - - if isempty(data) - display('*** ERROR: Failed in loading the data'); - return; - end - - case 'Separate allelic profiles(TXT)' - % Ask the allelic profile - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.pl;*.txt','Load new allelic profile(*.pl, *.txt)'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - display('---------------------------------------------------'); - display(['Reading allelic profile from: ',[pathname filename],'...']); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); - - % Preprocess the profile - output = processprofile([pathname filename]); - headercount = size(output,2); - flag = zeros(1,headercount); - for i = 1:headercount - if strcmpi('ST',output{1,i}) - flag(i) = 1; - else - if strcmpi('Isolate', output{1,i}) || strcmpi('Strain',output{1,i}) - flag(i) = 2; - else - if strcmpi('Species', output{1,i}) - flag(i) = 3; - end - end - end - end - - if ~any(flag) - h = errordlg(['Loading of the specified file was unsuccessful. ' ... - 'Please see the tutorial to find out the correct file ' ... - 'format.'] ,'Error','modal'); - handle = [h,base]; - setWindowOnTop(handle,{'true','true'}) - - uiwait(h); - fprintf(1,'\n*** ERROR: Failed in loading the allelic profile.\n'); - return; - end - - index = (1:size(output,1)-1)'; - - - % Species selection - species_loc = find(flag==3); - if ~isempty(species_loc) - species_col = output((2:end), species_loc); - [species_str, m, n] = unique(species_col); - - [s1,v1] = listdlg('PromptString','Select species:',... - 'SelectionMode','multiple',... - 'Name','Select Species',... - 'ListString',species_str'); - removed = setdiff(n,s1); - - if ~v1 || isempty(s1) - dispCancel;return - end - else - n = index; - removed = 0; - end - - % Isolate/Strain selection - isolate_loc = find(flag==2); - if ~isempty(isolate_loc) - isolate_col = output((2:end), isolate_loc); - isolate_str = isolate_col(logical(~ismember(n,removed))); - index(ismember(n,removed)) = []; - - [s2,v2] = choosebox('Name','Select Isolates','PromptString',... - 'Isolates in the sample(Ctrl+A to select all):','SelectString', ... - 'Isolates you have selected','ListString', isolate_str', ... - 'InitialValue',1); - if isempty(s2) || ~v2 - dispCancel; return - end - else - % ST selection - isolate_loc = find(flag==1); - isolate_col = output((2:end), isolate_loc); - % isolate_str = isolate_col(find(~ismember(n,removed))); - isolate_str = isolate_col(logical(~ismember(n,removed))); - index(ismember(n,removed)) = []; - - [s2,v2] = choosebox('Name','Select STs','PromptString',... - 'STs in the sample (Ctrl+A to select all):','SelectString', ... - 'STs you have selected','ListString', isolate_str'); - if isempty(s2) || ~v2 - dispCancel; return - end - end - - % Read the data - isolate_index = index(s2); - if ~isempty(isolate_loc) - popnames(:,1) = isolate_str(s2); - else - popnames(:,1) = num2cell(index(s2)); - end - popnames(:,2) = num2cell([1:length(isolate_index)]'); - - % remove empty elements - strmat = output([isolate_index+1]',find(flag==0)); - [i,j] = find(cellfun('isempty',strmat)); - ij = [i j]; - for k=1:length(i) - strmat(ij(k,1),ij(k,2))={''}; - end - [i,j] = find(strcmp(strmat,'')); - ij = [i j]; - for k=1:length(i) - strmat(ij(k,1),ij(k,2))={'0'}; - end - - data_allele = zeros(size(strmat)); - for i = 1:size(strmat,1) - for j = 1:size(strmat,2) - data_allele(i,j) = str2num(char(strmat(i,j))); - end - end - - % remove columns containing empty values - realgene = find(all(data_allele)); - data_allele = data_allele(:,realgene); - partition_index = 1:size(strmat,1); - data_allele = [data_allele partition_index']; - genename = output(1,find(flag==0)); - genename = genename(realgene); - - - if isempty(genename) || isempty(data_allele) - msgbox(['Loading of the specified file was unsuccessful. ' ... - 'Please see the tutorial to find out the correct file ' ... - 'format.'] ,'Error', ... - 'error'); - fprintf(1,'\n*** ERROR: Failed in loading the allelic profile.\n'); - return; - else - display('---------------------------------------------------'); - display(['# of strains: ', num2str(size(data_allele,1))]); - display(['# of genes: ', num2str(size(data_allele,2)-1)]); - end - - %waitALittle; - % Ask the individual gene sequence - % [isOK,returnvalue] = askSeq; - % if (isOK & returnvalue~= 1) % allelic profile loaded only - % data = data_allele; - % component_mat = [1:size(data,2)-1]'; % assume independence - % elseif - % isOK & returnvalue == 1, - [s3,v3] = listdlg('PromptString','Select genes:',... - 'SelectionMode','multiple',... - 'Name','Select Genes',... - 'ListString',genename); - - if isempty(s3) || ~v3 - dispCancel; - return - else - m = size(s3,2); % number of genes - % data_seq = cell(m,1); - data = []; - genesize = zeros(1,m); - for i=1:m - %waitALittle; - data_gene = readfasta(genename{s3(i)}); % read fasta - if (isempty(data_gene)) - display('*** ERROR: Failed in loading the sequence data'); - return; - else - data_gene(:,find(sum(data_gene)==0)) = []; % NB! remove all the gaps - % data_seq{i} = data_gene; - selected_data = data_gene(data_allele(:,s3(i))',:); % Store only those selected strains - - emptyloci = find(all(selected_data(:,[1:end])<0)); - if ~isempty(emptyloci) - disp('Removing empty loci...'); - end - selected_data(:,emptyloci) = []; % remove empty loci - data = [data selected_data]; - end - genesize(i) = size(selected_data,2); % NB! could be different than the original gene length - end - data = [data data_allele(:,end)]; % add the index - % determine the component matrix - component_mat = zeros(m,max(genesize)); - cum = cumsum(genesize); - component_mat(1,1:genesize(1)) = 1:cum(1); - for i=2:m - component_mat(i,1:genesize(i)) = (cum(i-1)+1):cum(i); - end - end - % elseif isOK ==0 - % return - % end - otherwise - return - end - - %waitALittle; - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...'); - - % Make the missing data complete - % missing values are denoted as -999 - data = uint16(data); - % data = uint8(data); - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - return; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - - % Distance between individuals is computed as if the loci are - % independent. - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.popnames = popnames; c.component_mat = component_mat; - c.dist = dist; c.Z = Z; - - %waitALittle; - save_preproc = questdlg('Do you wish to save the pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - else - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - end; - - %waitALittle; - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon', 'Independent', 'Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - elseif isequal(linkage_model,'Independent') - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - else - dispCancel; - return; - end; - - % Data transformation - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - % Count the data - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - clear data_clique data_separator - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - else - kokonimi = [pathname filename]; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - end; - clear c; - - case 'BAPS-format' - - input_type = questdlg('Specify the format of your data: ',... - 'Specify BAPS Format', ... - 'BAPS sequence data', 'BAPS numeric data', 'BAPS sequence data'); - switch input_type - case 'BAPS numeric data' - %waitALittle; - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.txt', 'Load BAPS numeric data'); - if (sum(filename)==0) || (sum(pathname)==0) - %cancel was pressed; do nothing. - return; - end; - - display('---------------------------------------------------'); - display(['Reading BAPS numeric data from: ',[pathname filename],'...']); - - try - data = load([pathname filename]); - catch - disp('*** ERROR: Incorrect BAPS numerical data.'); - return - end - case 'BAPS sequence data' - waitALittle;%waitALittle; - [data, filename] = readbaps; - if isempty(data) - return - end - otherwise - return; - end - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - waitALittle;%waitALittle; - input_pops = questdlg(['When using data which are in BAPS-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); - if isequal(input_pops,'Yes') - %waitALittle; - display('Reading name and index files...'); - setWindowOnTop(base,'false') - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - %waitALittle; - setWindowOnTop(base,'false') - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - % popnames = []; - dispCancel; - return - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - % popnames = []; - dispCancel; - return - end - else - % popnames = []; - popnames(:,1) = num2cell(unique(data(:,end))); - popnames(:,2) = popnames(:,1); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - end - - % check that popnames is correct - if isempty(popnames) - display('*** ERROR: error in reading popnames.') - return - end - - data = uint16(data); - % Check that the data is rational: - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - msgbox(['Loaded file contained incorrect data. The last column of the ' ... - 'data file must contain sampling unit identifiers. Identifier specifies the ' ... - 'unit from which the genetic data on that particular row was collected. ' ... - 'Identifiers must be positive integers. If the biggest unit identifier is i.e. 27, there must ' ... - 'be at least one row for each unit 1-27'] ,'Error', ... - 'error'); - disp('*** ERROR: Failed in loading the BAPS data.'); - return; - else - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - % display('Finished.'); - end; - - % Check if the data is discrete or continuous - if any(any(fix(data)~=data)) - disp('Found decimal numbers. Continuous model will be used.'); -% input_type = questdlg('Choose the method for the continuous data: ',... -% 'Specify the method', ... -% 'BEC', 'Gibbs sampling with WINBUGS','BEC'); -% switch input_type -% case 'BEC' -% disp('Using the BEC mixture model...'); -% becMixture(data, popnames); -% case 'Gibbs sampling with WINBUGS' -% disp('Preparing the WINBUGS code...'); -% isok = makeBUGS(data); -% if isok disp('Finished.'); -% -% end -% otherwise -% return -% end - disp('** CANCELLED: continuous model is under construction.'); - return; - end - - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...'); - % Make the missing data complete - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - % Distance between individuals is computed as if the loci are - % independent. - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; - c.Z = Z; - - input_linkage = questdlg('Do you wish to load the linkage map?',... - 'Load Linkage Map',... - 'Yes','No','Yes'); - if isequal(input_linkage,'Yes'); - display('---------------------------------------------------'); - %%waitALittle; - setWindowOnTop(base,'false') - [linkage_filename, linkage_pathname] = uigetfile('*.txt', 'Load Linkage Map'); - - if isempty(linkage_filename) && isempty(linkage_pathname) - return; - else - display(['Reading linkage map from: ',[linkage_pathname linkage_filename],'...']); - end; - - try - component_mat = load([linkage_pathname linkage_filename]); - catch - disp('*** ERROR: Incorrect linkage map.'); - return; - end - - % Check if the linkage map matches the data - if (size(data,2)-1) ~= max(component_mat(:)) - msgbox(['Loading of the specified file was unsuccessful. ' ... - 'The linkage map dose not match with the data.'] ,'Error', ... - 'error'); - disp('*** ERROR: Failed in loading the linkage map.'); - return; - else - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - end; - display('---------------------------------------------------'); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',[filename '/' linkage_filename]); clear h0; - c.component_mat = component_mat; - else - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - end - - - save_preproc = questdlg('Do you wish to save the pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon', 'Independent', 'Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - elseif isequal(linkage_model,'Independent') - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - else - dispCancel; - return; - end; - - % Data transformation - % display('---------------------------------------------------'); - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - kokonimi = [pathname filename]; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - clear c; - - case 'Pre-processed data' - % This is basically the same format as the "Pre-processed data" in - % the basic clustering. The only difference is that the file - % includes also the component_mat - % %waitALittle; - setWindowOnTop(base,'false') - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - display('---------------------------------------------------'); - display(['Reading preprocessed data from: ',[pathname filename],'...']); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - struct_array = load([pathname filename]); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - display('*** ERROR: Incorrect file format'); - return - end - clear struct_array; - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - clear struct_array; - else - display('*** ERROR: Incorrect file format'); - return; - end - - % The following are the same as in the basic clustering - data = c.data; popnames = c.popnames; Z = c.Z; - noalle = c.noalle; adjprior = c.adjprior; - rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - dist = c.dist; priorTerm = c.priorTerm; - - if ~isfield(c,'component_mat') - display('*** ERROR: Incorrect file format'); - return - end - - % This is new - component_mat = c.component_mat; - data = uint16(data); - - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - - if ~isfield(c, 'linkage_model') - %%%waitALittle; - % Independent is not an option, since it can be computed with the - % basic clustering which is much faster - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon','Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - else - dispCancel; - return; - end; - - clear c; % save the memory usage - - % Data transformation - fprintf(1,'Transforming the data ...'); - index = data(:,end); -% [data_clique, data_separator, noalle_clique, noalle_separator] = ... -% transform2(data, component_mat, linkage_model); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - - kokonimi = [pathname filename]; c.data = data; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; - c.rowsFromInd = rowsFromInd; - c.alleleCodes = alleleCodes; - c.noalle = noalle; - c.adjprior = adjprior; - c.priorTerm = priorTerm; - c.popnames = popnames; - c.component_mat = component_mat; - c.dist = dist; c.Z = Z; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - clear c; - else - %Linkage model is specified in the preprocessed file. - counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; - counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; - linkage_model = c.linkage_model; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - clear c; - display(['linkage model: ', linkage_model]); - end - - otherwise - return; -end - - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -clearGlobalVars; - -c.noalle = noalle; -c.adjprior = adjprior; %priorTerm = c.priorTerm; -c.rowsFromInd = rowsFromInd; -c.counts_cq = counts_cq; -c.adjprior_cq = adjprior_cq; -c.counts_sp = counts_sp; -c.adjprior_sp = adjprior_sp; -c.dist = dist; c.Z = Z; - -% partition compare mode -if ~isempty(partitionCompare) - partitions = partitionCompare.partitions; - npartitions = size(partitions,2); - partitionLogml = zeros(1,npartitions); - for i = 1:npartitions - % number of unique partition lables - try - [cq_counts, cq_sumcounts] = ... - initialCounts(counts_cq, partitions(:,i)); - [sp_counts, sp_sumcounts] = initialCounts(counts_sp, partitions(:,i)); - partitionLogml(i) = computeLogml(adjprior_cq, adjprior_sp, ... - cq_counts, cq_sumcounts, ... - sp_counts, sp_sumcounts); - catch - disp('*** ERROR: unmatched data.'); - return - end - end - % return the logml result - partitionCompare.logmls = partitionLogml; - set(h1, 'userdata', partitionCompare); - return -end - -if fixedK - [logml, npops, partitionSummary] = linkageMix_fixK(c); -else - [logml, npops, partitionSummary] = linkageMix(c); -end - -if logml==1 - return; -end - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); - -clear c; % save the memory -%This is basically the same as in BAPS 3. -changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outp, inp, partitionSummary, popnames, linkage_model, ... - fixedK); - -viewMixPartition(PARTITION, popnames); - -% --------------------------------------------------------------------- -% Save the result. -% Jing - 26.12.2005 -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (sum(filename)==0) || (sum(pathname)==0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - [sumcounts, counts] = indLociCounts(PARTITION, data, npops, noalle); - % NB! Index column is removed in data matrix. - c.PARTITION = PARTITION; c.CQ_COUNTS = CQ_COUNTS; c.CQ_SUMCOUNTS = CQ_SUMCOUNTS; - c.SP_COUNTS = SP_COUNTS; c.SP_SUMCOUNTS = SP_SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior_cq = adjprior_cq; c.adjprior_sp = adjprior_sp; c.popnames = popnames; - c.rowsFromInd = rowsFromInd; c.data = uint16(data); c.npops = npops; - % c.nalleles_cq = nalleles_cq; c.nalleles_sp = nalleles_sp; - if strcmp(linkage_model,'linear') % Added on 03.11.06 - c.mixtureType = 'linear_mix'; - elseif strcmp(linkage_model,'codon') - c.mixtureType = 'codon_mix'; - end - c.changesInLogml = changesInLogml; % this variable stores the change of likelihoods. - % [ncluster ninds] - % -Added on 02.11.2006 - - % The next ones are for the admixture input - c.COUNTS = counts; c.SUMCOUNTS = sumcounts; - c.adjprior = adjprior; c.rowsFromInd = rowsFromInd; c.noalle = noalle; c.alleleCodes = alleleCodes; - - % The two variables below are for the new linkage admixture model - c.gene_lengths = calcGeneLengths(component_mat); - - % The logml is saved for parallel computing - c.logml = logml; - - fprintf(1,'Saving the result...') - try -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); - catch - display('*** ERROR in saving the result.'); - end -else - if exist('baps4_output.baps','file') - delete('baps4_output.baps') - end -end -% ----------------------------------------------------------------------- - - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[npops-1 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(logical(T2==pop2)); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(logical(PARTITION==i1)) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(double(ninds),2); - -% empties = find(data<0); -% data(empties)=0; -data(logical(data<0)) = 0; -data = uint16(data); - -pariTaulu = zeros(riviLkm,2); -aPointer=1; - -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; - pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); - aPointer = aPointer+double(ninds-a); -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -eka = uint16(eka); -toka = uint16(toka); - -clear pariTaulu; clear miinus; - -summa = uint16(zeros(riviLkm,1)); -vertailuja = uint16(zeros(riviLkm,1)); - -x = zeros(size(eka)); x = uint16(x); -y = zeros(size(toka)); y = uint16(y); -% fprintf(1,'%%10'); -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa + uint16(lisays); - end - end - % fprintf(1,'\b\b'); - % fprintf(1,'%d',floor(10+80*j/nloci)); -end - -clear x; clear y; clear vertailutNyt; -clear eka; clear toka; clear data; clear lisays; -dist = zeros(length(vertailuja),1); -% nollat = find(vertailuja==0); -% dist(nollat) = 1; -dist(logical(vertailuja==0)) = 1; -muut = find(vertailuja>0); -dist(muut) = double(summa(muut))./double(vertailuja(muut)); -clear summa; clear vertailuja; clear muut; - -Z = computeLinkage(dist'); -% fprintf(1,'\b\b'); -% fprintf(1,'%d\n',100); -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------------------------- - -function npops = removeEmptyPops -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(CQ_SUMCOUNTS,2)); -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); -SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) -% apu = find(PARTITION==notEmpty(n)); -% PARTITION(apu)=n; -PARTITION(logical(PARTITION==notEmpty(n))) = n; -end -npops = length(notEmpty); - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- -function [counts, sumcounts] = initialCounts(ind_counts, PARTITION) -% This version is for partition compare mode - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = find(PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end -%-------------------------------------------------------------------------- - - -function logml = computeLogml(adjprior_cq, adjprior_sp, ... - CQ_COUNTS, CQ_SUMCOUNTS, ... - SP_COUNTS, SP_SUMCOUNTS) - - % for partition compare mode -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -npops = size(CQ_COUNTS, 3); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS, 1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - -function changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outPutFile, inputFile, partitionSummary, popnames, linkage_model,... - fixedK) - -global PARTITION; -global CQ_COUNTS; - -%global CQ_SUMCOUNTS; -%global SP_COUNTS; global SP_SUMCOUNTS; -ninds = length(PARTITION); -npops = size(CQ_COUNTS,3); -names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin - -if length(outPutFile)>0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file/ Linkage map: ' inputFile]); -disp(['Model: Codon']); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - fprintf(fid,'\n'); - end - end; -end - -if npops == 1 - changesInLogml = []; -else - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to group j:'); fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; - else - ekarivi = ' ind '; - end - - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', ekarivi); fprintf(fid, '\n'); - end - - %ninds = size(data,1)/rowsFromInd; - changesInLogml = zeros(npops,ninds); - for ind = 1:ninds - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - changesInLogml(:,ind) = computeChanges(ind, adjprior_cq, ... - adjprior_sp, indCqCounts, indSpCounts); - - % transform the logml change to conditional posterior probabilities - % Added by Lu Cheng, 28.03.2010 - tmp_changelogml = exp(changesInLogml(:,ind)); - changesInLogml(:,ind) = tmp_changelogml ./ sum(tmp_changelogml); - clear tmp_changelogml; - %---------------------- - - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(changesInLogml(j,ind)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', rivi); fprintf(fid, '\n'); - end - end - - - % % KL-divergence has to be calculated otherwise... - % % { - % disp(' '); disp(' '); - % disp('KL-divergence matrix:'); - % - % if (fid ~= -1) - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', ['KL-divergence matrix:']); fprintf(fid, '\n'); - % end - % - % maxnoalle = size(COUNTS,1); - % nloci = size(COUNTS,2); - % d = zeros(maxnoalle, nloci, npops); - % prior = adjprior; - % prior(find(prior==1))=0; - % nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - % prior(1,nollia)=1; - % for pop1 = 1:npops - % d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - % dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - % end - % ekarivi = blanks(7); - % for pop = 1:npops - % ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; - % end - % disp(ekarivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - % end - % - % for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - % for pop2 = 1:pop1-1 - % dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - % div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - % div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - % div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - % end - % disp(rivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - % end - % end - % % } -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -% partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Probabilities for number of clusters'); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - % npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - npopstn = sum(exp(partitionSummary(logical(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end - end - end - -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - -%------------------------------------------------------------------------- - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -% dataApu = []; -% nollat = []; -% isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - %alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function popnames = fixPopnames(popnames, ninds) - -if length(popnames) == ninds - for i=1:ninds - if isnumeric(popnames{i}) - popnames{i} = num2str(popnames{i}); - % popnames(i) = num2str(popnames{i}); - end - popnames{i} = cellstr(popnames{i}); - % popnames(i) = cellstr(popnames{i}); - end -end - -%-------------------------------------------------------------------------- -function isRational = isTheLoadedNewDataRational(data) -% The last column of the data must include numbers 1-npops -% If so, isRational = 1, otherwise isRational = 0. -% The row numbers must be larger than 1. -if size(data,1) == 1 - isRational = 0; - display('*** ERROR: Sample size must be larger than one'); - return; -end -last_column = data(:,end); -last_column = sort(last_column); -current = 1; -if last_column(1) ~= current - isRational = 0; - display('*** ERROR: Wrong Indexes in the data'); - return; -end; -lengthcol = length(last_column); -for n = 2:lengthcol - if ~(last_column(n) == current || last_column(n) == current + 1) - %Some population is missing from the last column - isRational = 0; - display('*** ERROR: Missing indexes in the data'); - return; - end; - current = last_column(n); -end; -isRational = 1; - - -% %------------------------------------------------------------------------- -% function isRational = isTheLoadedNewLinkageRational(linkage_data) -% % Each positive element must be unique. -% % If so, isRational = 1, otherwise isRational = 0; -% nonzero = find(linkage_data~=0); -% dif = diff(linkage_data(nonzero)); -% if ~all(dif) -% isRational = 0; return; -% end; -% isRational = 1; - -%-------------------------------------------------------------------------- - -function [sumcounts, counts] = ... - indLociCounts(partition, data, npops, noalle) - -nloci=size(data,2)-1; -% ninds = size(data,1); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - % havainnotLokuksessa = find(partition==i & data(:,j)>=0); - havainnotLokuksessa = find(ismember(data(:,end),find(partition==i))); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -try -indices = load(indexFile); -catch - msgbox('Loading of the index file was unsuccessful', ... - 'Error', 'error'); - return -end -fid = fopen(nameFile); -if fid == -1 - % File does not exist - msgbox('Loading of the name file was unsuccessful', ... - 'Error', 'error'); - return; -end - -line = fgetl(fid); -counter = 1; - -while sum(line~=-1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end diff --git a/matlab/linkage/makecomplete.m b/matlab/linkage/makecomplete.m deleted file mode 100644 index a5c28b6..0000000 --- a/matlab/linkage/makecomplete.m +++ /dev/null @@ -1,70 +0,0 @@ -function [data, totalmissing] = makecomplete(data) -%MAKECOMPLETE ESTIMATING missing alleles in a loci. -% Input: -% data: a baps data format, e.g. last column is index. Missing values -% are denoted as any non-positive integers, commonly as 0 or -999. - - -[nrows, ncols] = size(data); -id = all(data>0); -if all(id) % no missing values - return -else - fprintf(1,'Estimating the missing values...'); - - % remove the totally missing loci - missingloci = find(id==0); - totalmissing = find(all(data(:,missingloci)<=0)); - if ~isempty(totalmissing) - disp('Totally missing loci were found.'); - data(:,missingloci(totalmissing)) = ones(nrows,length(totalmissing)); - end -% goodloci = setdiff([1:ncols],missingloci(totalmissing)); -% data = data(:,goodloci); -% missingloci = find(all(data>0)==0); - - data_in = data(:,[missingloci end]); - [counts,noalle,prior,adjprior,rawalleles,data_in] = allfreqsnew3(data_in); - data(:,missingloci) = data_in(:,[1:end-1]); - - s_counts = sum(counts,3); -% m_counts = size(counts,3)*ones(1, size(counts,2)) - sum(s_counts); % counts of missing values - m_counts = nrows*ones(1, size(counts,2)) - sum(s_counts); % counts of missing values - m = repmat(m_counts,size(s_counts,1),1); - a = s_counts + prior; - a = a./repmat(sum(a),size(a,1),1); - c = round(m.*a); - - % Be sure that the sum matches - reassign = find(sum(c)~=m_counts); - for i = reassign - dif = sum(c(:,i)) - m_counts(:,i); - if dif > 0 - remove = find(c(:,i)==max(c(:,i))); - c(remove(1),i)=c(remove(1),i) - dif; % decrease by the diff - else - add = find(c(:,i)==min(c(:,i))); - c(add(1),i)=c(add(1),i) - dif; % increase by the diff - end - end - try - % Fill in the missing values - for i = 1:length(missingloci) - missingrows = find(data(:,missingloci(i))<=0); - p = randperm(m_counts(i)); - cum = [0 cumsum(c(:,i))']; - for j = 1:size(c,1) - data(missingrows(p([cum(j)+1:cum(j+1)])),missingloci(i)) = j; - end - end - catch - % disp('*** ERROR: in completing the missing data.'); - data = []; - totalmissing = []; - return - end - fprintf(1,'Finished.\n'); -end - - - diff --git a/matlab/linkage/myCell2mat.m b/matlab/linkage/myCell2mat.m deleted file mode 100644 index a6f8baa..0000000 --- a/matlab/linkage/myCell2mat.m +++ /dev/null @@ -1,120 +0,0 @@ -function m = myCell2mat(c) -%CELL2MAT Convert the contents of a cell array into a single matrix. -% M = CELL2MAT(C) converts a multidimensional cell array with contents of -% the same data type into a single matrix. The contents of C must be able -% to concatenate into a hyperrectangle. Moreover, for each pair of -% neighboring cells, the dimensions of the cell's contents must match, -% excluding the dimension in which the cells are neighbors. This constraint -% must hold true for neighboring cells along all of the cell array's -% dimensions. -% -% The dimensionality of M, i.e. the number of dimensions of M, will match -% the highest dimensionality contained in the cell array. -% -% CELL2MAT is not supported for cell arrays containing cell arrays or -% objects. -% -% Example: -% C = {[1] [2 3 4]; [5; 9] [6 7 8; 10 11 12]}; -% M = cell2mat(C) -% -% See also MAT2CELL, NUM2CELL - -% Copyright 1984-2006 The MathWorks, Inc. -% $Revision: 1.10.4.6 $ $Date: 2006/06/20 20:09:38 $ - -% Error out if there is no input argument -if nargin==0 - error('MATLAB:cell2mat:NoInputs',['No input argument specified. ' ... - 'There should be exactly one input argument.']) -end -% short circuit for simplest case -elements = numel(c); -if elements == 0 - m = []; - return -end -if elements == 1 - if isnumeric(c{1}) || ischar(c{1}) || islogical(c{1}) || isstruct(c{1}) - m = c{1}; - return - end -end -% Error out if cell array contains mixed data types -cellclass = class(c{1}); -ciscellclass = cellfun('isclass',c,cellclass); -if ~all(ciscellclass(:)) - error('MATLAB:cell2mat:MixedDataTypes', ... - 'All contents of the input cell array must be of the same data type.'); -end - -% Error out if cell array contains any cell arrays or objects -ciscell = iscell(c{1}); -cisobj = isobject(c{1}); -if cisobj || ciscell - error('MATLAB:cell2mat:UnsupportedCellContent',['Cannot support cell ' ... - 'arrays containing cell arrays or objects.']); -end - -% If cell array of structures, make sure the field names are all the same -if isstruct(c{1}) - cfields = cell(elements,1); - for n=1:elements - cfields{n} = fieldnames(c{n}); - end - % Perform the actual field name equality test - if ~isequal(cfields{:}) - error('MATLAB:cell2mat:InconsistentFieldNames', ... - ['The field names of each cell array element must be consistent ' ... - 'and in consistent order.']) - end -end - -% If cell array is 2-D, execute 2-D code for speed efficiency -if ndims(c) == 2 - rows = size(c,1); - m = cell(rows,1); - % Concatenate each row first - for n=1:rows - m{n} = cat(3,c{n,:}); - end - % Now concatenate the single column of cells into a matrix - m = cat(3,m{:}); - return -end - -csize = size(c); -% Treat 3+ dimension arrays - -% Construct the matrix by concatenating each dimension of the cell array into -% a temporary cell array, CT -% The exterior loop iterates one time less than the number of dimensions, -% and the final dimension (dimension 1) concatenation occurs after the loops - -% Loop through the cell array dimensions in reverse order to perform the -% sequential concatenations -for cdim=(length(csize)-1):-1:1 - % Pre-calculated outside the next loop for efficiency - ct = cell([csize(1:cdim) 1]); - cts = size(ct); - ctsl = length(cts); - mref = {}; - - % Concatenate the dimension, (CDIM+1), at each element in the temporary cell - % array, CT - for mind=1:prod(cts) - [mref{1:ctsl}] = ind2sub(cts,mind); - % Treat a size [N 1] array as size [N], since this is how the indices - % are found to calculate CT - if ctsl==2 && cts(2)==1 - mref = {mref{1}}; - end - % Perform the concatenation along the (CDIM+1) dimension - ct{mref{:}} = cat(cdim+1,c{mref{:},:}); - end - % Replace M with the new temporarily concatenated cell array, CT - c = ct; -end - -% Finally, concatenate the final rows of cells into a matrix -m = cat(1,c{:}); diff --git a/matlab/linkage/processprofile.m b/matlab/linkage/processprofile.m deleted file mode 100644 index fc34f15..0000000 --- a/matlab/linkage/processprofile.m +++ /dev/null @@ -1,35 +0,0 @@ -function Output = processprofile(root) - -%Open File and count the number of rows in the file -fid=fopen(root); -nRows=0; -while 1 - iString=fgetl(fid); - if ~ischar(iString) - break - end - nRows=nRows+1; -end - -%Return to beginning of file -fseek(fid,0,'bof'); - -%For each row, assign each space delimitted object to a cell in the "Output" matrix -for iRow=1:nRows - iCol=1; - %Temporary storage of the first object - % Note: the space delimitter used here can be replaced by any delimitter - [TempOutput,Rem]=strtok(fgetl(fid),sprintf('\t')); - %If there is now data on this row, then assign the first object to be an underscore - if (length(TempOutput) == 0) - TempOutput='_'; - end - %Build the "Output" matrix this will be the first column of the iRow-th row - Output(iRow,iCol)=cellstr(TempOutput); - %Repeat this only using Rem as the total string and incrementing the iCol counter - while length(Rem) > 0 - iCol=iCol+1; - [TempOutput,Rem]=strtok(Rem,sprintf('\t')); - Output(iRow,iCol)=cellstr(TempOutput); - end -end \ No newline at end of file diff --git a/matlab/linkage/processxls.m b/matlab/linkage/processxls.m deleted file mode 100644 index 580fd4b..0000000 --- a/matlab/linkage/processxls.m +++ /dev/null @@ -1,77 +0,0 @@ -function [data, component_mat, popnames] = processxls(filename) -% -% a bug in line 64-68 was fixed -data = []; -component_mat = []; -popnames = []; -try - [A,B] = xlsread(filename); -catch - display('*** ERROR: Wrong Excel format'); - return -end - -if size(A,2)~=1 % more than one columns containing numeric ST values - display('*** ERROR: multiple columns of numeric values'); - data = []; component_mat = []; popnames = []; - return -end - -if size(A,1)~=size(B,1)-1 - display('*** ERROR: Wrong format'); - data = []; component_mat = []; popnames = []; - return -end - -B = deblank(B); % remove any trailing blanks -nstrains = size(B,1)-1; -nheader = size(B,2); -for i = 1:nheader - if strcmpi('ST',B{1,i}) ix_ST = i; end - if strcmpi('Strain', B{1,i}) || strcmpi('Isolate',B{1,i}) - ix_Strain = i; - end -end -if ~exist('ix_ST') - display('*** ERROR: ST column needed'); - data = []; component_mat = []; popnames = []; - return -end - -if ~exist('ix_Strain') - ix_gene = setdiff([1:nheader],ix_ST); -else - ix_gene = setdiff([1:nheader],[ix_ST ix_Strain]); -end - -ngenes = length(ix_gene); - -C = cell(nstrains,ngenes); -if ~isempty(A) - for i=1:nstrains - B{i+1,ix_ST}=num2str(A(i)); - for j=1:ngenes - C{i,j}=uint16(i_encode_n(B{i+1,ix_gene(j)})); % save the memory. - end - end -end -genesize=cellfun('size',C(1,:),2); -data=cell2mat(C); -data=[data uint16([1:nstrains]')]; -component_mat = zeros(ngenes,max(genesize)); -cum = cumsum(genesize); -component_mat(1,[1:genesize(1)]) = [1:cum(1)]; -for i=2:ngenes - component_mat(i,[1:genesize(i)]) = [(cum(i-1)+1):cum(i)]; -end - -if ~exist('ix_Strain') - popnames = num2cell(B([2:end],ix_ST)); -else % store the strain names only - popnames = num2cell(B([2:end],ix_Strain)); -end -popnames(:,2)=num2cell([1:nstrains]'); - -display('---------------------------------------------------'); -display(['# of strains: ', num2str(nstrains)]); -display(['# of genes: ', num2str(ngenes)]); \ No newline at end of file diff --git a/matlab/linkage/readbaps.m b/matlab/linkage/readbaps.m deleted file mode 100644 index cd54f20..0000000 --- a/matlab/linkage/readbaps.m +++ /dev/null @@ -1,188 +0,0 @@ -function [data, filename] = readbaps(varargin) -MAXNAME = 200; - -if nargin == 0 - [filename, pathname] = uigetfile( ... - {'*.txt', 'BAPS Sequence Files (*.txt)'; - '*.*', 'All Files (*.*)'}, ... - 'Load BAPS sequence data'); - if ~(filename), data=[]; return; end - filename=[pathname,filename]; -end - -if nargin == 1 - [filename,pathname] = uigetfile( ... - {'*.txt', 'BAPS Sequence Files (*.txt)'; - '*.*', 'All Files (*.*)'}, ... - sprintf('Load the BAPS sequence data for gene %s',varargin{1}) ); - if ~(filename), data = []; - return; - end - filename=[pathname,filename]; -end - -if nargin == 2 - [filename,pathname] = uigetfile( ... - {'*.txt', 'BAPS Sequence Files (*.txt)'; - '*.*', 'All Files (*.*)'}, ... - sprintf('Load the BAPS sequence file for gene %s',varargin{1}) ); - if ~(filename),data = []; - return; - end - filename=[pathname,filename]; - chosen_index = varargin{2}; -end - -if nargin < 3 - % [seqtype, geneticcode]=selectSeqTypeAndGeneticCode; - seqtype = 2; - geneticcode = 1; - if (isempty(seqtype)|isempty(geneticcode)), data=[]; return; end -end - - -pause(0.0001); -if ~ischar(filename) - error('BAPS:InvalidInput','Input must be a character array') - data = []; - return; -end - -if ~(exist(filename,'file') | exist(fullfile(cd,filename),'file')), - % is a valid filename ? - error('BAPS:InvalidInput','Input must be a valid file') - data = []; - return; -end - -file = fopen(filename, 'r'); -display('---------------------------------------------------'); -display(['Reading BAPS sequence data from: ',filename,'...']); -display('---------------------------------------------------'); -% Now we are looking for the maximum length of the sequence -n=0; % the number of sequences -m=0; % the maximum length -cm = 0; % current sequence length - -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 break; end; - if x==' ' % new sequence started - if cm ~=m & m >0 - % fprintf(['*** ERROR: Different sequence length found in allelic ','%d','.\n'],n+1); - disp('***ERROR: Incorrect BAPS sequence data.'); - data = []; - fclose(file); - return; - end - if cm > m m=cm; end; - cm = 0; - fgets(file); - n=n+1; - else - if isletter(x) | x=='-' | x == '?' - cm=cm+1; - end; - end; -end - -if cm > m m=cm; end; - -% go throught the file -if (m==0 | n==0) - % display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - disp('***ERROR: Incorrect BAPS sequence data.'); - data = []; - fclose(file); - return; -end - -Ss = char(m); S = []; -str = zeros(1,MAXNAME); -sizes = zeros(1,n); -frewind(file); -% names=[]; -names={}; -i=1;j=1; -id = 0; -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 - break; - end; - if x==' ' % new sequence started - if i~= 0 % save the sequence - % str=x; - [x, sizes(i)]=size(Ss); - S=strvcat(S,Ss); - Ss = []; Ss = char(m); - end; - str=fgetl(file); % read the name, we remove the '>' symbol - % names=strvcat(names,str); - % pos=find(str==' '); - % if ~(isempty(pos)) - % str=str(1:pos(1,1)); - % end - names{i}=str2num(str); - i=i+1; - - if nargin == 1 - if isempty(findstr(str, varargin{1})) - display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - data = []; - fclose(file); - return - end - end - % disp(['Processing in: ' str]); - id = id + 1; - j=1; - else - if isletter(x) | x== '-' | x=='?' - % processing the sequence symbol - Ss(j) = upper(x); - j=j+1; - end; - end; -end -% S=strvcat(S,Ss); -[x, sizes(i)]=size(Ss); -if ~isempty(find(S==' ')) - disp('***ERROR: unequal sequence length.'); - data = []; - fclose(file); - return -end - -if exist('chosen_index','var') - S = S(chosen_index,:); - names = names(chosen_index); -end -aln.seqtype = seqtype; -aln.geneticcode = geneticcode; -aln.seqnames = names; -aln.seq = S; -aln = encodealn(aln); -data = aln.seq; -% if nargin == 1 -% if isempty(findstr(names{1},varargin{1})) -% disp(['*** ERROR: The file does not contain the required gene ' varargin{1}]); -% data = []; -% return; -% end -% end -% display(['# of allelic types: ' num2str(size(aln.seq,1))]); -% display(['# of nucleotides: ' num2str(size(aln.seq,2))]); -% ---------------------- -% order_index = [1:size(aln.seq,1)]'; -% data = [aln.seq order_index]; % Append the index column in the end. -% ---------------------- -try, -data = [data cell2mat(names)']; -catch, - disp('*** ERROR: Failed in loading the BAPS data.'); - disp('*** ERROR: Inidividual indices are not numerically sequential.'); - data = []; -end -fclose(file); - diff --git a/matlab/linkage/readfasta.m b/matlab/linkage/readfasta.m deleted file mode 100644 index 5fadcc4..0000000 --- a/matlab/linkage/readfasta.m +++ /dev/null @@ -1,171 +0,0 @@ -function data = readfasta(varargin) -MAXNAME = 200; - -if nargin == 0 - [filename, pathname] = uigetfile( ... - {'*.fasta;*.fas;*.txt', 'FASTA Format Files (*.fasta, *.fas, *.txt)'; - '*.*', 'All Files (*.*)'}, ... - 'Pick a FASTA file'); - if ~(filename), aln=[]; return; end - filename=[pathname,filename]; -end - -if nargin == 1 - [filename,pathname] = uigetfile( ... - {'*.fasta;*.fas;*.txt', 'FASTA Format Files (*.fasta, *.fas, *.txt)'; - '*.*', 'All Files (*.*)'}, ... - sprintf('Pick the FASTA file for gene %s',varargin{1}) ); - if ~(filename), aln=[]; - data = []; - return; - end - filename=[pathname,filename]; -end - -if nargin == 2 - [filename,pathname] = uigetfile( ... - {'*.fasta;*.fas;*.txt', 'FASTA Format Files (*.fasta, *.fas, *.txt)'; - '*.*', 'All Files (*.*)'}, ... - sprintf('Pick the FASTA file for gene %s',varargin{1}) ); - if ~(filename), aln=[]; - data = []; - return; - end - filename=[pathname,filename]; - chosen_index = varargin{2}; -end - -if nargin < 3 - % [seqtype, geneticcode]=selectSeqTypeAndGeneticCode; - seqtype = 2; - geneticcode = 1; - if (isempty(seqtype)|isempty(geneticcode)), aln=[],data=[]; return; end -end - - -pause(0.0001); -if ~ischar(filename) - error('BAPS:InvalidInput','Input must be a character array') - data = []; - return; -end - -if ~(exist(filename,'file') | exist(fullfile(cd,filename),'file')), - % is a valid filename ? - error('BAPS:InvalidInput','Input must be a valid file') - data = []; - return; -end - -file = fopen(filename, 'r'); -display('---------------------------------------------------'); -display(['Reading fasta sequence from: ',filename,'...']); -display('---------------------------------------------------'); -% Now we are looking for the maximum length of the sequence -n=0; % the number of sequences -m=0; % the maximum length -cm = 0; % current sequence length - -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 break; end; - if x =='>' % new sequence started - if cm ~=m & m >0 - fprintf(['*** ERROR: Different sequence length found in allelic ','%d','.\n'],n+1); - data = []; - return; - end - if cm > m m=cm; end; - cm = 0; - fgets(file); - n=n+1; - else - if isletter(x) | x=='-' | x == '?' - cm=cm+1; - end; - end; -end - -if cm > m m=cm; end; - -% go throught the file -if (m==0 | n==0) - display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - data = []; - return; -end - -Ss = char(m); S = []; -str = zeros(1,MAXNAME); -sizes = zeros(1,n); -frewind(file); -% names=[]; -names={}; -i=0;j=1; -id = 0; -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 - break; - end; - if x =='>' % new sequence started - if i~= 0 % save the sequence - [x, sizes(i)]=size(Ss); - S=strvcat(S,Ss); - Ss = []; Ss = char(m); - end; - str=fgetl(file); % read the name, we remove the '>' symbol - % names=strvcat(names,str); - % pos=find(str==' '); - % if ~(isempty(pos)) - % str=str(1:pos(1,1)); - % end - i=i+1; - names{i}=str; - - if nargin == 1 - if isempty(findstr(str, varargin{1})) - display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - data = []; - return - end - end - % disp(['Processing in: ' str]); - id = id + 1; - j=1; - else - if isletter(x) | x== '-' | x=='?' - % processing the sequence symbol - Ss(j) = upper(x); - j=j+1; - end; - end; -end -S=strvcat(S,Ss); -[x, sizes(i)]=size(Ss); -if exist('chosen_index','var') - S = S(chosen_index,:); - names = names(chosen_index); -end -aln.seqtype = seqtype; -aln.geneticcode = geneticcode; -aln.seqnames = names; -aln.seq = S; -aln = encodealn(aln); -data = aln.seq; -% if nargin == 1 -% if isempty(findstr(names{1},varargin{1})) -% disp(['*** ERROR: The file does not contain the required gene ' varargin{1}]); -% data = []; -% return; -% end -% end -display(['# of allelic types: ' num2str(size(aln.seq,1))]); -display(['# of nucleotides: ' num2str(size(aln.seq,2))]); -% ---------------------- -% order_index = [1:size(aln.seq,1)]'; -% data = [aln.seq order_index]; % Append the index column in the end. -% ---------------------- - -fclose(file); - diff --git a/matlab/linkage/selectDataType.m b/matlab/linkage/selectDataType.m deleted file mode 100644 index 3fb4139..0000000 --- a/matlab/linkage/selectDataType.m +++ /dev/null @@ -1,37 +0,0 @@ -function [isOK, datatype] = selectDataType() - -items(1).name = 'MLST DATA'; -items(1).default = 1; -items(1).linked = [2 3]; -items(1).exclusive = [4 5 6]; -items(1).values = {1}; - -items(2).name = '1 = Allelic profile'; -items(2).default = 1; -items(2).exclusive = [3 4 5 6]; -items(2).indent = 1; - -items(3).name = '2 = FASTA-format'; -items(3).default = 0; -items(3).exclusive = [2 4 5 6]; -items(3).indent = 1; - - -title = 'STEP 1'; -msg = sprintf(['Please specify the data format:']); -out = CSEFlagDialog(items, title, msg); -if ~(isempty(out)), - if(out(2).answer==1) - datatype = 1; - elseif(out(3).answer==1) - datatype = 2; - elseif(out(5).answer==1) - datatype = 3; - elseif(out(6).answer==1) - datatype = 4; - end - isOK = 1; -else - isOK = 0; - datatype = 0; -end \ No newline at end of file diff --git a/matlab/linkage/selectGene.m b/matlab/linkage/selectGene.m deleted file mode 100644 index 245527d..0000000 --- a/matlab/linkage/selectGene.m +++ /dev/null @@ -1,25 +0,0 @@ -function [isOK, genename] = selectGene(genename) -ngenes = size(genename,1); -for i=1:ngenes - items(i).name = genename{i}; - items(i).default = 1; - items(i).values = []; -end - -title = 'STEP 3'; -nGenestr = sprintf('%d',ngenes); -msg = sprintf(['The allelic profile contains ' nGenestr ' genes named below.\n'... - 'Select the individual sequence data that you want to load.\n'... - 'It is recommended that all the genes are selected.']); -out = CSEFlagDialog(items, title, msg); -if ~(isempty(out)), - for i=1:ngenes - if ~out(i).answer - genename{i}=[]; - end - end - isOK = 1; -else - isOK = 0; - genename = {[]}; -end \ No newline at end of file diff --git a/matlab/linkage/selectSeqType.m b/matlab/linkage/selectSeqType.m deleted file mode 100644 index 3589d99..0000000 --- a/matlab/linkage/selectSeqType.m +++ /dev/null @@ -1,47 +0,0 @@ -function nextstep = selectSeqType() - -items(1).name = 'SEQUENCE TYPE:'; -items(1).default = 1; -items(1).linked = [2 3 4]; -items(1).values = []; - -items(2).name = '1 = Non-coding nucleotide'; -items(2).default = 0; -items(2).exclusive = [3 4]; -items(2).indent = 1; - -items(3).name = '2 = Coding nucleotide'; -items(3).default = 1; -items(3).exclusive = [2 4]; -items(3).indent = 1; - -items(4).name = '3 = Protein'; -items(4).default = 0; -items(4).exclusive = [2 3]; -items(4).indent = 1; - -items(5).name = 'GENETIC CODE: '; -items(5).default = 1; -items(5).indent = 0; -items(5).values = {'1 - Standard';'2 - Vertebrate Mithchondrial'; ... - '3 - Yeast Mithchondrial';'4 - Mold Mithchondrial'; ... - '5 - Invertebrate Mithchondrial';'6 - Protozoan Mithchondrial'; ... - '7 - Coelenterate Mithchondrial';'8 - Mycoplasma'}; -items(5).help = 'Please select a genetic code'; - -title = 'Sequence type and genetic code'; -% msg = sprintf(['Please select sequence type and genetic code']); -out = CSEFlagDialog(items, title); -if ~(isempty(out)), - if(out(2).answer==1) - seqtype=1; - elseif(out(3).answer==1) - seqtype=2; - elseif(out(4).answer==1) - seqtype=3; - end - geneticcode=out(5).answer; -else - seqtype=[]; - geneticcode=[]; -end \ No newline at end of file diff --git a/matlab/linkage/seqcode.m b/matlab/linkage/seqcode.m deleted file mode 100644 index 62a5f92..0000000 --- a/matlab/linkage/seqcode.m +++ /dev/null @@ -1,20 +0,0 @@ -function [NT,AA] = seqcode() -%SEQCODE - Return vector for mapping sequence letters to integers -% -% Syntax: [NT,AA] = seqcode -% -% See also: - -% Molecular Biology & Evolution Toolbox, (C) 2005 -% Author: James J. Cai -% Email: jamescai@hkusua.hku.hk -% Website: http://web.hku.hk/~jamescai/ -% Last revision: 5/28/2005 - -NT = 'ACGTDI?-'; -if (nargout>1) -AA = 'ARNDCQEGHILKMFPSTWYV*-'; -end - -% AANames = {'ala' 'arg' 'asn' 'asp' 'cys' 'gln' 'glu' 'gly' 'his' 'ile' 'leu' 'lys' 'met' ... -% 'phe' 'pro' 'ser' 'thr' 'trp' 'tyr' 'val'}; \ No newline at end of file diff --git a/matlab/linkage/silentReadBaps.m b/matlab/linkage/silentReadBaps.m deleted file mode 100644 index a045aae..0000000 --- a/matlab/linkage/silentReadBaps.m +++ /dev/null @@ -1,193 +0,0 @@ -function [data, filename] = silentReadBaps(varargin) -% This function is modified from readbaps.m to avoid invoke a dialog in Linux -% The input must be a vaild BAPS sequence file -% modified by Lu Cheng, 29.06.2010 - -MAXNAME = 200; - -if nargin == 0 - [filename, pathname] = uigetfile( ... - {'*.txt', 'BAPS Sequence Files (*.txt)'; - '*.*', 'All Files (*.*)'}, ... - 'Load BAPS sequence data'); - if ~(filename), data=[]; return; end - filename=[pathname,filename]; -end - -if nargin == 1 -% [filename,pathname] = uigetfile( ... -% {'*.txt', 'BAPS Sequence Files (*.txt)'; -% '*.*', 'All Files (*.*)'}, ... -% sprintf('Load the BAPS sequence data for gene %s',varargin{1}) ); -% if ~(filename), data = []; -% return; -% end -% filename = [pathname,filename]; - filename=varargin{1}; % added by Lu Cheng, 29.06.2010 -end - -if nargin == 2 - [filename,pathname] = uigetfile( ... - {'*.txt', 'BAPS Sequence Files (*.txt)'; - '*.*', 'All Files (*.*)'}, ... - sprintf('Load the BAPS sequence file for gene %s',varargin{1}) ); - if ~(filename),data = []; - return; - end - filename=[pathname,filename]; - chosen_index = varargin{2}; -end - -if nargin < 3 - % [seqtype, geneticcode]=selectSeqTypeAndGeneticCode; - seqtype = 2; - geneticcode = 1; - if (isempty(seqtype)|isempty(geneticcode)), data=[]; return; end -end - - -pause(0.0001); -if ~ischar(filename) - error('BAPS:InvalidInput','Input must be a character array') - data = []; - return; -end - -if ~(exist(filename,'file') | exist(fullfile(cd,filename),'file')), - % is a valid filename ? - error('BAPS:InvalidInput','Input must be a valid file') - data = []; - return; -end - -file = fopen(filename, 'r'); -display('---------------------------------------------------'); -display(['Reading BAPS sequence data from: ',filename,'...']); -display('---------------------------------------------------'); -% Now we are looking for the maximum length of the sequence -n=0; % the number of sequences -m=0; % the maximum length -cm = 0; % current sequence length - -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 break; end; - if x==' ' % new sequence started - if cm ~=m & m >0 - % fprintf(['*** ERROR: Different sequence length found in allelic ','%d','.\n'],n+1); - disp('***ERROR: Incorrect BAPS sequence data.'); - data = []; - fclose(file); - return; - end - if cm > m m=cm; end; - cm = 0; - fgets(file); - n=n+1; - else - if isletter(x) | x=='-' | x == '?' - cm=cm+1; - end; - end; -end - -if cm > m m=cm; end; - -% go throught the file -if (m==0 | n==0) - % display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - disp('***ERROR: Incorrect BAPS sequence data.'); - data = []; - fclose(file); - return; -end - -Ss = char(m); S = []; -str = zeros(1,MAXNAME); -sizes = zeros(1,n); -frewind(file); -% names=[]; -names={}; -i=1;j=1; -id = 0; -while 1 - [x,nr] = fscanf(file,'%c',1); - if nr == 0 - break; - end; - if x==' ' % new sequence started - if i~= 0 % save the sequence - % str=x; - [x, sizes(i)]=size(Ss); - S=strvcat(S,Ss); - Ss = []; Ss = char(m); - end; - str=fgetl(file); % read the name, we remove the '>' symbol - % names=strvcat(names,str); - % pos=find(str==' '); - % if ~(isempty(pos)) - % str=str(1:pos(1,1)); - % end - names{i}=str2num(str); - i=i+1; - %if nargin == 1 - if nargin == 1000 % modified by Lu Cheng, 29.06.2010 - if isempty(findstr(str, varargin{1})) - display(['*** ERROR: Unmatched data for gene ' varargin{1}]); - data = []; - fclose(file); - return - end - end - % disp(['Processing in: ' str]); - id = id + 1; - j=1; - else - if isletter(x) | x== '-' | x=='?' - % processing the sequence symbol - Ss(j) = upper(x); - j=j+1; - end; - end; -end -% S=strvcat(S,Ss); -[x, sizes(i)]=size(Ss); -if ~isempty(find(S==' ')) - disp('***ERROR: unequal sequence length.'); - data = []; - fclose(file); - return -end - -if exist('chosen_index','var') - S = S(chosen_index,:); - names = names(chosen_index); -end -aln.seqtype = seqtype; -aln.geneticcode = geneticcode; -aln.seqnames = names; -aln.seq = S; -aln = encodealn(aln); -data = aln.seq; -% if nargin == 1 -% if isempty(findstr(names{1},varargin{1})) -% disp(['*** ERROR: The file does not contain the required gene ' varargin{1}]); -% data = []; -% return; -% end -% end -% display(['# of allelic types: ' num2str(size(aln.seq,1))]); -% display(['# of nucleotides: ' num2str(size(aln.seq,2))]); -% ---------------------- -% order_index = [1:size(aln.seq,1)]'; -% data = [aln.seq order_index]; % Append the index column in the end. -% ---------------------- -try, -data = [data cell2mat(names)']; -catch, - disp('*** ERROR: Failed in loading the BAPS data.'); - disp('*** ERROR: Inidividual indices are not numerically sequential.'); - data = []; -end -fclose(file); - diff --git a/matlab/linkage/sumCell.m b/matlab/linkage/sumCell.m deleted file mode 100644 index f3abd6f..0000000 --- a/matlab/linkage/sumCell.m +++ /dev/null @@ -1,10 +0,0 @@ -function summat = sumCell(counts) -nbatches = size(counts,2); -sumcell = cell(1,nbatches); -[nalleles, nloci, ninds] = size(counts{1}); -summat = zeros(nalleles, nloci,'uint16'); -for i = 1:nbatches - sumcell{i} = uint16(sum(counts{i},3)); % sum as double format. - summat = summat + sumcell{i}; -end -clear counts; diff --git a/matlab/linkage/transform2.m b/matlab/linkage/transform2.m deleted file mode 100644 index b7e334d..0000000 --- a/matlab/linkage/transform2.m +++ /dev/null @@ -1,240 +0,0 @@ -function [data_clique, data_separator, noalle_clique, noalle_separator] = transform2(data, component_mat, linkage_model) -% Filename: transform.m -% [data_clique,data_clique,noalle_clique, noalle_separator] = tranform2(data, component_mat, linkage_model) -% -% Description: -% Function for tranforming the profile data into clique data and separator data. -% For the linear graphical models, in each component, ncliques + nseparators = 2*nnodes - 3 -% holds for each block(component in the graph). - -% Author: Jing Tang -% Modified date: 18/10/2005 -% The function for treating missing data is added. - -% Input: -% data: the allelic profile data. Last col is the index vector. -% component_mat: the component matrix. -% isdependent: -% islinear: - -% Output: -% data_clique: [nindividuals x (ncliques+nsingletons)] -% data_separator: [nindividual x nseparators] - - -if strcmp(linkage_model,'linear') - nindividuals = size(data,1); - nloci = size(data,2)-1; - % if ~all(diag(adj_mat)), adj_mat = adj_mat + diag(ones(nloci,1));, end; - % [p,p,r]=dmperm(adj_mat); - - % UNSOLVED --- Need to find a better algorithm to count the numbers of - % separators ,cliques and singletons. - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; - ncliques = sum(cardinality-1); - nsingletons = sum(cardinality==1); - nseparators = sum(cardinality(find(cardinality>1))-2); - - data_clique = zeros(nindividuals, ncliques+nsingletons); - noalle_clique = zeros(ncliques+nsingletons,1); - data_separator = zeros(nindividuals, nseparators); - noalle_separator = zeros(nseparators,1); - % data_separator = []; % used for vectorization - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - data_clique(:,k) = data(:,component_mat(i,1)); - temp = unique(data(:,component_mat(i,1))); - if temp(1) == 0 % missing data found - noalle_clique(k) = size(temp,1) -1; - else - noalle_clique(k) = size(temp,1); - end - k = k+1; - else - for j = 1:cardinality(i)-1 - - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_clique(nonmissing,k) = n; - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - data_clique(:,k) = n; - - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-1 - data_separator(:,k) = data(:,component_mat(i,j)); - k = k+1; - end - end - end - if (k-1)~=nseparators - error('ERROR in transform.m'); - return - end - for i = 1:nseparators - % nonmissing = find(all(data_separator(:,i),2)); - % noalle_separator(i) = length(unique(data_separator(nonmissing,i))); - noalle_separator(i) = length(unique(data_separator(:,i))); - end - -elseif strcmp(linkage_model,'codon') - nindividuals = size(data,1); - nloci = size(data,2)-1; - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; - - ncliques = sum(cardinality(find(cardinality>2))-2); - nsingletons = sum(cardinality==1 | cardinality ==2); - nseparators = sum(cardinality(find(cardinality>2))-3); - - data_clique = zeros(nindividuals,ncliques+nsingletons); - noalle_clique = zeros(ncliques+nsingletons,1); - data_seperator = zeros(nindividuals,nseparators); - noalle_separator = zeros(nseparators,1); - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - data_clique(:,k) = data(:,component_mat(i,1)); - temp = unique(data(:,component_mat(i,1))); - if temp(1) == 0 % missing data found - noalle_clique(k) = size(temp,1) -1; - else - noalle_clique(k) = size(temp,1); - end - k = k+1; - elseif (cardinality(i)==2) % transform to linear linkage - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,1) component_mat(i,2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,1) component_mat(i,2)]),'rows'); - - [b,m,n] = unique(data(:,[component_mat(i,1) component_mat(i,2)]),'rows'); - data_clique(:,k) = n; - % data_clique(nonmissing,k) = n; - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,1))),1)*... - % size(unique(data(nonmissing,component_mat(i,2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,1))),1)*... - size(unique(data(:,component_mat(i,2))),1); - k = k+1; - else - for j = 1:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows'); - % data_clique(nonmissing,k) = n; - - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows'); - data_clique(:,k) = n; - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1)*size(unique(data(nonmissing,component_mat(i,j+2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1)*size(unique(data(:,component_mat(i,j+2))),1); - k = k+1; - end - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_separator(nonmissing,k) = n; - data_separator(:,k) = n; - % noalle_separator(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - noalle_separator(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - end - end - - -elseif strcmp(linkage_model,'independent') - data_clique = data(:,[1:end-1]); - data_separator = []; - noalle_clique = []; - noalle_separator = []; - -elseif strcmp(linkage_model,'random') - adj_mat = mk_adjmat(component_mat); - nloci = size(adj_mat,1); - ns = 4*ones(1,nloci); - porder = []; - stages = {[1:nloci]}; - clusters = {}; - - [jtree,root2,cliques,B,w]=graph_to_jtree(adj_mat,ns,porder,stages,clusters); - C = length(cliques); - [is,js] = find(jtree > 0); - separator = cell(C,C); - for k=1:length(is) - i = is(k); j = js(k); - separator{i,j} = find(B(i,:) & B(j,:)); % intersect(cliques{i}, cliques{j}); - end - - - nindividuals = size(data,1); - if nloci ~= size(data,2)-1 error('Error in transform3.m'); - return - end - ncliques = length(cliques); - [i,j] = find(~cellfun('isempty',triu(separator))); - ij = [i j]; - nseparators = length(i); - - data_clique = zeros(nindividuals, ncliques); - noalle_clique = zeros(ncliques,1); - data_separator = zeros(nindividuals, nseparators); - noalle_separator = zeros(nseparators,1); - - k = 1; - for i = 1:ncliques - [b,m,n] = unique(data(:,cliques{i}),'rows'); - data_clique(:,k) = n; - for j = 1:length(cliques{i}) - sz(j) = size(unique(data(:,cliques{i}(j))),1); - end - noalle_clique(k) = prod(sz); - k = k+1; - clear sz; - end - k = 1; - for i = 1:nseparators - [b,m,n] = unique(data(:,separator{ij(i,1),ij(i,2)}),'rows'); - data_separator(:,k) = n; - - for j = 1:length(separator{ij(i,1),ij(i,2)}) - sz(j) = size(unique(data(:,separator{ij(i,1),ij(i,2)}(j))),1); - end - noalle_separator(k) = prod(sz); - k = k+1; - clear sz; - end -end -clear data; - - - - - diff --git a/matlab/linkage/transform4.m b/matlab/linkage/transform4.m deleted file mode 100644 index e43ca70..0000000 --- a/matlab/linkage/transform4.m +++ /dev/null @@ -1,235 +0,0 @@ -function [data_clique, data_separator, noalle_clique, noalle_separator] = transform4(data, component_mat, linkage_model) -% Filename: transform4.m -% [data_clique,data_clique,noalle_clique, noalle_separator] = tranform4(data, component_mat, linkage_model) - -% DATA SHRINKAGE STEP IN THE END TO SAVE MEMORY USE -% Further shrink the data using uint8 format. -% Make sure that allele numbers smaller than 256. - - -if strcmp(linkage_model,'linear') - nindividuals = size(data,1); - nloci = size(data,2)-1; - % if ~all(diag(adj_mat)), adj_mat = adj_mat + diag(ones(nloci,1));, end; - % [p,p,r]=dmperm(adj_mat); - - % UNSOLVED --- Need to find a better algorithm to count the numbers of - % separators ,cliques and singletons. - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; - ncliques = sum(cardinality-1); - nsingletons = sum(cardinality==1); - nseparators = sum(cardinality(find(cardinality>1))-2); - - data_clique = zeros(nindividuals, ncliques+nsingletons,'uint16'); - noalle_clique = zeros(ncliques+nsingletons,1,'uint16'); - data_separator = zeros(nindividuals, nseparators,'uint16'); - noalle_separator = zeros(nseparators,1,'uint16'); - % data_separator = []; % used for vectorization - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - data_clique(:,k) = data(:,component_mat(i,1)); - temp = unique(data(:,component_mat(i,1))); - if temp(1) == 0 % missing data found - noalle_clique(k) = size(temp,1) -1; - else - noalle_clique(k) = size(temp,1); - end - k = k+1; - else - for j = 1:cardinality(i)-1 - - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_clique(nonmissing,k) = n; - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - data_clique(:,k) = n; - - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-1 - data_separator(:,k) = data(:,component_mat(i,j)); - k = k+1; - end - end - end - if (k-1)~=nseparators - error('ERROR in transform.m'); - return - end - for i = 1:nseparators - % nonmissing = find(all(data_separator(:,i),2)); - % noalle_separator(i) = length(unique(data_separator(nonmissing,i))); - noalle_separator(i) = length(unique(data_separator(:,i))); - end - -elseif strcmp(linkage_model,'codon') - nindividuals = size(data,1); - nloci = size(data,2)-1; - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; - - ncliques = sum(cardinality(find(cardinality>2))-2); - nsingletons = sum(cardinality==1 | cardinality ==2); - nseparators = sum(cardinality(find(cardinality>2))-3); - - data_clique = zeros(nindividuals,ncliques+nsingletons,'uint16'); - noalle_clique = zeros(ncliques+nsingletons,1,'uint16'); - data_separator = zeros(nindividuals,nseparators,'uint16'); - noalle_separator = zeros(nseparators,1,'uint16'); - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - data_clique(:,k) = data(:,component_mat(i,1)); - temp = unique(data(:,component_mat(i,1))); - if temp(1) == 0 % missing data found - noalle_clique(k) = size(temp,1) -1; - else - noalle_clique(k) = size(temp,1); - end - k = k+1; - elseif (cardinality(i)==2) % transform to linear linkage - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,1) component_mat(i,2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,1) component_mat(i,2)]),'rows'); - - [b,m,n] = unique(data(:,[component_mat(i,1) component_mat(i,2)]),'rows'); - data_clique(:,k) = uint16(n); - % data_clique(nonmissing,k) = n; - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,1))),1)*... - % size(unique(data(nonmissing,component_mat(i,2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,1))),1)*... - size(unique(data(:,component_mat(i,2))),1); - k = k+1; - else - for j = 1:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows'); - % data_clique(nonmissing,k) = n; - - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows'); - data_clique(:,k) = uint16(n); - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1)*size(unique(data(nonmissing,component_mat(i,j+2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1)*size(unique(data(:,component_mat(i,j+2))),1); - k = k+1; - end - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_separator(nonmissing,k) = n; - data_separator(:,k) = uint16(n); - % noalle_separator(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - noalle_separator(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - end - end - - -elseif strcmpi(linkage_model,'Independent') - data_clique = data(:,[1:end-1]); - data_separator = []; - noalle_clique = max(data_clique); % Be sure the data is normalized. - noalle_separator = []; - -elseif strcmp(linkage_model,'random') - adj_mat = mk_adjmat(component_mat); - nloci = size(adj_mat,1); - ns = 4*ones(1,nloci); - porder = []; - stages = {[1:nloci]}; - clusters = {}; - - [jtree,root2,cliques,B,w]=graph_to_jtree(adj_mat,ns,porder,stages,clusters); - C = length(cliques); - [is,js] = find(jtree > 0); - separator = cell(C,C); - for k=1:length(is) - i = is(k); j = js(k); - separator{i,j} = find(B(i,:) & B(j,:)); % intersect(cliques{i}, cliques{j}); - end - - - nindividuals = size(data,1); - if nloci ~= size(data,2)-1 error('*** ERROR: in transform3.m'); - return - end - ncliques = length(cliques); - [i,j] = find(~cellfun('isempty',triu(separator))); - ij = [i j]; - nseparators = length(i); - - data_clique = zeros(nindividuals, ncliques); - noalle_clique = zeros(ncliques,1); - data_separator = zeros(nindividuals, nseparators); - noalle_separator = zeros(nseparators,1); - - k = 1; - for i = 1:ncliques - [b,m,n] = unique(data(:,cliques{i}),'rows'); - data_clique(:,k) = n; -% for j = 1:length(cliques{i}) -% sz(j) = size(unique(data(:,cliques{i}(j))),1); -% end -% noalle_clique(k) = prod(sz); - noalle_clique(k) = 4^length(cliques{i}); - k = k+1; -% clear sz; - end - k = 1; - for i = 1:nseparators - [b,m,n] = unique(data(:,separator{ij(i,1),ij(i,2)}),'rows'); - data_separator(:,k) = n; - -% for j = 1:length(separator{ij(i,1),ij(i,2)}) -% sz(j) = size(unique(data(:,separator{ij(i,1),ij(i,2)}(j))),1); -% end -% noalle_separator(k) = prod(sz); - noalle_separator(k) = 4^length(separator{ij(i,1),ij(i,2)}); - k = k+1; -% clear sz; - end -end - -% Data shrinkage step -informative_clique = find(noalle_clique~=1); -informative_separator = find(noalle_separator~=1); -data_clique = data_clique(:,informative_clique); -data_separator = data_separator(:,informative_separator); -noalle_clique = noalle_clique(informative_clique); -noalle_separator = noalle_separator(informative_separator); -clear data; - - - - - diff --git a/matlab/linkage/transform5.m b/matlab/linkage/transform5.m deleted file mode 100644 index cf2c119..0000000 --- a/matlab/linkage/transform5.m +++ /dev/null @@ -1,265 +0,0 @@ -function [data_clique, data_separator, noalle_clique, noalle_separator, alphabet_cq, alphabet_sp, informative_clique, informative_separator] = ... - transform5(data, component_mat, varargin) -% Filename: transform4.m (modified, Lu Cheng, 17.02.2010) -% [data_clique,data_clique,noalle_clique, noalle_separator] = tranform4(data, component_mat, linkage_model) - -% DATA SHRINKAGE STEP IN THE END TO SAVE MEMORY USE -% Further shrink the data using uint8 format. -% Make sure that allele numbers smaller than 256. - -% Modified by Lu Cheng from transform4.m, 17.02.2010 -% New: The alphabet at each locus is stored in alphabet_cq and alphabet_sp -% And here we also assume there is no missing data - -linkage_model = 'codon'; - -if strcmp(linkage_model,'linear') - nindividuals = size(data,1); - nloci = size(data,2)-1; - % if ~all(diag(adj_mat)), adj_mat = adj_mat + diag(ones(nloci,1));, end; - % [p,p,r]=dmperm(adj_mat); - - % UNSOLVED --- Need to find a better algorithm to count the numbers of - % separators ,cliques and singletons. - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; - ncliques = sum(cardinality-1); - nsingletons = sum(cardinality==1); - nseparators = sum(cardinality(find(cardinality>1))-2); - - data_clique = zeros(nindividuals, ncliques+nsingletons,'uint16'); - noalle_clique = zeros(ncliques+nsingletons,1,'uint16'); - data_separator = zeros(nindividuals, nseparators,'uint16'); - noalle_separator = zeros(nseparators,1,'uint16'); - % data_separator = []; % used for vectorization - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - data_clique(:,k) = data(:,component_mat(i,1)); - temp = unique(data(:,component_mat(i,1))); - if temp(1) == 0 % missing data found - noalle_clique(k) = size(temp,1) -1; - else - noalle_clique(k) = size(temp,1); - end - k = k+1; - else - for j = 1:cardinality(i)-1 - - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_clique(nonmissing,k) = n; - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - data_clique(:,k) = n; - - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-1 - data_separator(:,k) = data(:,component_mat(i,j)); - k = k+1; - end - end - end - if (k-1)~=nseparators - error('ERROR in transform.m'); - return - end - for i = 1:nseparators - % nonmissing = find(all(data_separator(:,i),2)); - % noalle_separator(i) = length(unique(data_separator(nonmissing,i))); - noalle_separator(i) = length(unique(data_separator(:,i))); - end - -elseif strcmp(linkage_model,'codon') - nindividuals = size(data,1); - nloci = size(data,2)-1; - ncomponents = size(component_mat,1); - cardinality = sum(component_mat>0,2)'; % length of each gene - - ncliques = sum(cardinality(cardinality>2)-2); - nsingletons = sum(cardinality==1 | cardinality ==2); - nseparators = sum(cardinality(cardinality>2)-3); - - data_clique = zeros(nindividuals,ncliques+nsingletons,'uint16'); - noalle_clique = zeros(ncliques+nsingletons,1,'uint16'); - data_separator = zeros(nindividuals,nseparators,'uint16'); - noalle_separator = zeros(nseparators,1,'uint16'); - - alphabet_cq = cell(1,ncliques+nsingletons); - alphabet_sp = cell(1,nseparators); - - k = 1; - for i = 1:ncomponents - if (cardinality(i)==1) % singleton - [b, m, n] = unique(data(:,component_mat(i,1))); - - alphabet_cq{k} = b; - data_clique(:,k) = unit16(n); - -% temp = unique(data(:,component_mat(i,1))); -% if temp(1) == 0 % missing data found -% noalle_clique(k) = size(temp,1) -1; -% else -% noalle_clique(k) = size(temp,1); -% end - k = k+1; - elseif (cardinality(i)==2) % transform to linear linkage - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,1) component_mat(i,2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,1) component_mat(i,2)]),'rows'); - - [b,m,n] = unique(data(:,[component_mat(i,1) component_mat(i,2)]),'rows'); - data_clique(:,k) = uint16(n); - alphabet_cq{k} = b; - - % data_clique(nonmissing,k) = n; - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,1))),1)*... - % size(unique(data(nonmissing,component_mat(i,2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,1))),1)*... - size(unique(data(:,component_mat(i,2))),1); - k = k+1; - else - for j = 1:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows'); - % data_clique(nonmissing,k) = n; - - [b,m,n] = unique(data(:,component_mat(i,j:j+2)),'rows'); - data_clique(:,k) = uint16(n); - alphabet_cq{k} = b; - - % noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1)*size(unique(data(nonmissing,component_mat(i,j+2))),1); - noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1)*size(unique(data(:,component_mat(i,j+2))),1); - k = k+1; - end - end - end - - k = 1; - for i = 1:ncomponents - if (cardinality(i)>2) - for j = 2:cardinality(i)-2 - % dealing with missing data - % nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2)); - % [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - [b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows'); - % data_separator(nonmissing,k) = n; - data_separator(:,k) = uint16(n); - alphabet_sp{k} = b; - - % noalle_separator(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*... - % size(unique(data(nonmissing,component_mat(i,j+1))),1); - noalle_separator(k) = size(unique(data(:,component_mat(i,j))),1)*... - size(unique(data(:,component_mat(i,j+1))),1); - k = k+1; - end - end - end - - -elseif strcmpi(linkage_model,'Independent') - data_clique = data(:,[1:end-1]); - data_separator = []; - noalle_clique = max(data_clique); % Be sure the data is normalized. - noalle_separator = []; - -elseif strcmp(linkage_model,'random') - adj_mat = mk_adjmat(component_mat); - nloci = size(adj_mat,1); - ns = 4*ones(1,nloci); - porder = []; - stages = {[1:nloci]}; - clusters = {}; - - [jtree,root2,cliques,B,w]=graph_to_jtree(adj_mat,ns,porder,stages,clusters); - C = length(cliques); - [is,js] = find(jtree > 0); - separator = cell(C,C); - for k=1:length(is) - i = is(k); j = js(k); - separator{i,j} = find(B(i,:) & B(j,:)); % intersect(cliques{i}, cliques{j}); - end - - - nindividuals = size(data,1); - if nloci ~= size(data,2)-1 error('*** ERROR: in transform3.m'); - return - end - ncliques = length(cliques); - [i,j] = find(~cellfun('isempty',triu(separator))); - ij = [i j]; - nseparators = length(i); - - data_clique = zeros(nindividuals, ncliques); - noalle_clique = zeros(ncliques,1); - data_separator = zeros(nindividuals, nseparators); - noalle_separator = zeros(nseparators,1); - - k = 1; - for i = 1:ncliques - [b,m,n] = unique(data(:,cliques{i}),'rows'); - data_clique(:,k) = n; -% for j = 1:length(cliques{i}) -% sz(j) = size(unique(data(:,cliques{i}(j))),1); -% end -% noalle_clique(k) = prod(sz); - noalle_clique(k) = 4^length(cliques{i}); - k = k+1; -% clear sz; - end - k = 1; - for i = 1:nseparators - [b,m,n] = unique(data(:,separator{ij(i,1),ij(i,2)}),'rows'); - data_separator(:,k) = n; - -% for j = 1:length(separator{ij(i,1),ij(i,2)}) -% sz(j) = size(unique(data(:,separator{ij(i,1),ij(i,2)}(j))),1); -% end -% noalle_separator(k) = prod(sz); - noalle_separator(k) = 4^length(separator{ij(i,1),ij(i,2)}); - k = k+1; -% clear sz; - end -end - -% Data shrinkage step - -if ~isempty(varargin) - informative_clique = varargin{1}; - informative_separator = varargin{2}; -else - informative_clique = find(noalle_clique~=1); - informative_separator = find(noalle_separator~=1); -end - -data_clique = data_clique(:,informative_clique); -data_separator = data_separator(:,informative_separator); - -alphabet_cq = alphabet_cq(informative_clique); -alphabet_sp = alphabet_sp(informative_separator); - -noalle_clique = noalle_clique(informative_clique); -noalle_separator = noalle_separator(informative_separator); -clear data; - - - - - diff --git a/matlab/linkage/xls2structure.m b/matlab/linkage/xls2structure.m deleted file mode 100644 index 3e47e20..0000000 --- a/matlab/linkage/xls2structure.m +++ /dev/null @@ -1,19 +0,0 @@ -function xls2structure(source, target) -% XLS2STRUCTURE converts a MLST data in xls format into a Structure data - -[data, component_mat, popnames] = processxls(source); - -data = data(:,[1:end-1]); -[ninds, nloci] = size(data); - -fid = fopen(target,'w'); -if (fid ~= -1) - for i = 1:ninds - fprintf(fid,'%s\t', popnames{i,1}{1}); - for j = 1:nloci - fprintf(fid,'%d\t',data(i,j)); - end - fprintf(fid,'\n'); - end -end -fclose(fid); diff --git a/matlab/parallel/admix_parallel.m b/matlab/parallel/admix_parallel.m deleted file mode 100644 index 210e297..0000000 --- a/matlab/parallel/admix_parallel.m +++ /dev/null @@ -1,776 +0,0 @@ -function admix_parallel(options) -% ADMIX_PARALLEL is the command line version of the baps partition with -% admixture models. -% Input: options is a struct generated by parallel.m - -%-------------------------------------------------------------------------- -%- Syntax check out -%-------------------------------------------------------------------------- -rand('state',0); % used for debugging -outp = [options.outputMat '.txt']; -inp = options.dataFile; -clusters = options.clusters; -fprintf(1,'Parallel computing...\n'); -fprintf(1,'Admixture analysis for cluster(s): %s.\n',num2str(clusters)); - -global COUNTS; global PARTITION; global SUMCOUNTS; -clearGlobalVars; - -struct_array = load(options.dataFile); -if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd') - disp('*** ERROR: Incorrect data format'); - return - end -elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - disp('*** ERROR: Incorrect data format'); - return - end -else - disp('*** ERROR: Incorrect data format'); - return; -end - - -if isfield(c, 'gene_lengths') && ... - (strcmp(c.mixtureType,'linear_mix') | ... - strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model - % Redirect the call to the linkage admixture function. - fprintf(1,'Redirecting to Linkage Model Admixture\n'); - c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column - linkage_admix_parallel(c,options); - return - end - -% This section is disabled, -Jing 27.10.2009 -% if isfield(c, 'gene_lengths') && ... -% (strcmp(c.mixtureType,'linkage_mix') | ... -% strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model -% % Redirect the call to the linkage admixture function. -% c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column -% linkage_admix(c); -% return -% end - -PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS; -alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames; -rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle; - - -% answers = inputdlg({['Input the minimum size of a population that will'... -% ' be taken into account when admixture is estimated.']},... -% 'Input minimum population size',[1],... -% {'5'}); -% if isempty(answers) return; end -% ------------------------------------------- -% NEW: for parallel computing -% ------------------------------------------- -alaRaja = options.minSize; -[npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaRaja); -if length(clusterIndex)0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLogml(omaFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLogml(omaFreqs, osuusTaulu); - logmlAlku = logml; - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - logmlLoppu = logml; - likelihood(ind) = logmlLoppu-logmlAlku; - end -end - -% Analyze further only individuals who have log-likelihood ratio larger than 3: -% --------------------------------------- -% PARALLEL COMPUTING IMPLEMENTED HERE -% --------------------------------------- -to_investigate = (find(likelihood>3))'; -admix_populaatiot = unique(PARTITION(to_investigate)); -validCluster = intersect(clusters, admix_populaatiot); % for the chosen clusters - -ix = zeros(length(to_investigate),1); -for i = 1:length(validCluster) - ix = ix | (PARTITION(to_investigate) == validCluster(i)); -end -if ~any(ix) to_investigate = []; -else to_investigate = to_investigate(ix); -end -admix_populaatiot = unique(PARTITION(to_investigate)); - -disp('Possibly admixed individuals in the chosen clusters: '); -if isempty(to_investigate) - disp('none'); - disp('Admixture analysis terminated.'); - return -else - for i = 1:length(to_investigate) - disp(num2str(to_investigate(i))); - end -end - -disp(' '); -disp('clusters for possibly admixed individuals: '); - -for i = 1:length(admix_populaatiot) - disp(num2str(admix_populaatiot(i))); -end - -% THUS, there are two types of individuals, who will not be analyzed with -% simulated allele frequencies: those who belonged to a mini-population -% which was removed, and those who have log-likelihood ratio less than 3. -% The value in the PARTITION for the first kind of individuals is 0. The -% second kind of individuals can be identified, because they do not -% belong to "to_investigate" array. When the results are presented, the -% first kind of individuals are omitted completely, while the second kind -% of individuals are completely put to the population, where they ended up -% in the mixture analysis. These second type of individuals will have a -% unit p-value. - - -% Simulate allele frequencies a given number of times and save the average -% result to "proportionsIt" array. - -proportionsIt = zeros(ninds,npops); -for iterationNum = 1:iterationCount - disp(['Iter: ' num2str(iterationNum)]); - allfreqs = simulateAllFreqs(noalle); % Allele frequencies on this iteration. - - for ind=to_investigate - %disp(num2str(ind)); - omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd); - osuusTaulu = zeros(1,npops); - if PARTITION(ind)==0 - % Yksil?on outlier - elseif PARTITION(ind)~=0 - if PARTITION(ind)>0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLogml(omaFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLogml(omaFreqs, osuusTaulu); - - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - end - proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu; - proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum; - end -end - -%disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); -%disp('each population.'); - -%allfreqs = simulateAllFreqs(noalle); % Simuloidaan alleelifrekvenssisetti -allfreqs = computeAllFreqs2(noalle); % Koitetaan tällaista. - - -% Initialize the data structures, which are required in taking the missing -% data into account: -n_missing_levels = zeros(npops,1); % number of different levels of "missingness" in each pop (max 3). -missing_levels = zeros(npops,3); % the mean values for different levels. -missing_level_partition = zeros(ninds,1); % level of each individual (one of the levels of its population). -for i=1:npops - inds = find(PARTITION==i); - % Proportions of non-missing data for the individuals: - non_missing_data = zeros(length(inds),1); - for j = 1:length(inds) - ind = inds(j); - non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci); - end - if all(non_missing_data>0.9) - n_missing_levels(i) = 1; - missing_levels(i,1) = mean(non_missing_data); - missing_level_partition(inds) = 1; - else - [ordered, ordering] = sort(non_missing_data); - %part = learn_simple_partition(ordered, 0.05); - part = learn_partition_modified(ordered); - aux = sortrows([part ordering],2); - part = aux(:,1); - missing_level_partition(inds) = part; - n_levels = length(unique(part)); - n_missing_levels(i) = n_levels; - for j=1:n_levels - missing_levels(i,j) = mean(non_missing_data(find(part==j))); - end - end -end - -% Create and analyse reference individuals for populations -% with potentially admixed individuals: -refTaulu = zeros(npops,100,3); -for pop = admix_populaatiot' - - for level = 1:n_missing_levels(pop) - - potential_inds_in_this_pop_and_level = ... - find(PARTITION==pop & missing_level_partition==level &... - likelihood>3); % Potential admix individuals here. - - if ~isempty(potential_inds_in_this_pop_and_level) - - %refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); - refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ... - pop, missing_levels(pop,level)); - - disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']); - refProportions = zeros(nrefIndsInPop,npops); - for iter = 1:iterationCountRef - %disp(['Iter: ' num2str(iter)]); - allfreqs = simulateAllFreqs(noalle); - - for ind = 1:nrefIndsInPop - omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd); - osuusTaulu = zeros(1,npops); - osuusTaulu(pop)=1; - logml = computeIndLogml(omaFreqs, osuusTaulu); - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml); - end - refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu; - refProportions(ind,:) = refProportions(ind,:)./iter; - end - end - for ind = 1:nrefIndsInPop - omanOsuus = refProportions(ind,pop); - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1; - end - end - end -end - -% Rounding of the results: -proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt); -proportionsIt = proportionsIt./100; -for ind = 1:ninds - if ~any(to_investigate==ind) - if PARTITION(ind)>0 - proportionsIt(ind,PARTITION(ind))=1; - end - else - % In case of a rounding error, the sum is made equal to unity by - % fixing the largest value. - if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1) - [isoin,indeksi] = max(proportionsIt(ind,:)); - erotus = sum(proportionsIt(ind,:))-1; - proportionsIt(ind,indeksi) = isoin-erotus; - end - end -end - -% Calculate p-value for each individual: -uskottavuus = zeros(ninds,1); -for ind = 1:ninds - pop = PARTITION(ind); - if pop==0 % Individual is outlier - uskottavuus(ind)=1; - elseif isempty(find(to_investigate==ind)) - % Individual had log-likelihood ratio<3 - uskottavuus(ind)=1; - else - omanOsuus = proportionsIt(ind,pop); - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - level = missing_level_partition(ind); - refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level)); - uskottavuus(ind) = refPienempia / nrefIndsInPop; - end -end - -% tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount); - -% viewPartition(proportionsIt, popnames); - - -c.proportionsIt = proportionsIt; -c.pvalue = uskottavuus; % Added by Jing -c.mixtureType = 'admix'; % Jing -c.admixnpops = npops; - -c.clusters = clusters; % added for parallel computing -c.minsize = alaRaja; -c.iters = iterationCount; -c.refInds = nrefIndsInPop; -c.refIters = iterationCountRef; - - -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - -%---------------------------------------------------------------------------- - - -function [npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaraja) -% Muokkaa tulokset muotoon, jossa outlier yksilöt on -% poistettu. Tarkalleen ottaen poistaa ne populaatiot, -% joissa on vähemmän kuin 'alaraja':n verran yksilöit? - -% NEW: clusterIndex is output for parallel computing. - Jing -global PARTITION; -global COUNTS; -global SUMCOUNTS; - -clusterIndex = [1:npops]'; -popSize=zeros(1,npops); -for i=1:npops - popSize(i)=length(find(PARTITION==i)); -end -miniPops = find(popSize0))); -for n=1:length(korit) - kori = korit(n); - yksilot = find(PARTITION==kori); - PARTITION(yksilot) = n; - clusterIndex(kori) = n; -end -COUNTS(:,:,miniPops) = []; -SUMCOUNTS(miniPops,:) = []; - -npops = npops-length(miniPops); - -%------------------------------------------------------------------------ - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------- - - -function allFreqs = computeAllFreqs2(noalle) -% Lisää a priori jokaista alleelia -% joka populaation joka lokukseen j 1/noalle(j) verran. - -global COUNTS; -global SUMCOUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS)); -sumCounts = reshape(sumCounts', [1, nloci, npops]); -sumCounts = repmat(sumCounts, [max_noalle, 1 1]); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allFreqs = counts./sumCounts; - - -function allfreqs = simulateAllFreqs(noalle) -% Lisää jokaista alleelia joka populaation joka lokukseen j 1/noalle(j) -% verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista -% simuloidaan arvot populaatioiden alleelifrekvensseille. - -global COUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allfreqs = zeros(size(counts)); - -for i=1:npops - for j=1:nloci - simuloidut = randdir(counts(1:noalle(j),j,i) , noalle(j)); - allfreqs(1:noalle(j),j,i) = simuloidut; - end -end - -%-------------------------------------------------------------------------- - - -function refData = simulateIndividuals(n,rowsFromInd,allfreqs,pop, missing_level) -% simulate n individuals from population pop, such that approximately -% proportion "missing_level" of the alleles are present. - -nloci = size(allfreqs,2); - -refData = zeros(n*rowsFromInd,nloci); -counter = 1; % which row will be generated next. - -for ind = 1:n - for loc = 1:nloci - for k=0:rowsFromInd-1 - if randarvo); -all = min(isommat); - - -%-------------------------------------------------------------------------- - - -function omaFreqs = computePersonalAllFreqs(ind, data, allFreqs, rowsFromInd) -% Laskee npops*(rowsFromInd*nloci) taulukon, jonka kutakin saraketta -% vastaa yksilön ind alleeli. Eri rivit ovat alleelin alkuperäfrekvenssit -% eri populaatioissa. Jos yksilölt?puuttuu jokin alleeli, niin vastaavaan -% kohtaa tulee sarake ykkösi? - -global COUNTS; -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -rows = data(computeRows(rowsFromInd, ind, 1),:); - -omaFreqs = zeros(npops, (rowsFromInd*nloci)); -pointer = 1; -for loc=1:size(rows,2) - for all=1:size(rows,1) - if rows(all,loc)>=0 - try, - omaFreqs(:,pointer) = ... - reshape(allFreqs(rows(all,loc),loc,:), [npops,1]); - catch - a=0; - end - else - omaFreqs(:,pointer) = ones(npops,1); - end - pointer = pointer+1; - end -end - - -%--------------------------------------------------------------------------- - - -function loggis = computeIndLogml(omaFreqs, osuusTaulu) -% Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät -% määritellyiksi kuten osuusTaulu:ssa. - -apu = repmat(osuusTaulu', [1 size(omaFreqs,2)]); -apu = apu .* omaFreqs; -apu = sum(apu); - -apu = log(apu); - -loggis = sum(apu); - - -%-------------------------------------------------------------------------- - - -function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi) -% Päivittää osuusTaulun muutoksen jälkeen. - -global COUNTS; -npops = size(COUNTS,3); - -i1 = rem(indeksi,npops); -if i1==0, i1 = npops; end; -i2 = ceil(indeksi / npops); - -osuusTaulu(i1) = osuusTaulu(i1)-osuus; -osuusTaulu(i2) = osuusTaulu(i2)+osuus; - - -%------------------------------------------------------------------------- - - -function [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml) - -ready = 0; -while ready ~= 1 - muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml); - [maxMuutos, indeksi] = max(muutokset(1:end)); - if maxMuutos>0 - osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi); - logml = logml + maxMuutos; - else - ready = 1; - end -end - - - -%--------------------------------------------------------------------------- - - -function muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) -% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -% muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -% todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole -% mitään siirrettävää, on vastaavassa kohdassa rivi nollia. - -global COUNTS; -npops = size(COUNTS,3); - -notEmpty = find(osuusTaulu>0.005); -muutokset = zeros(npops); -empties = ~notEmpty; - -for i1=notEmpty - - osuusTaulu(i1) = osuusTaulu(i1)-osuus; - - for i2 = [1:i1-1 i1+1:npops] - osuusTaulu(i2) = osuusTaulu(i2)+osuus; - loggis = computeIndLogml(omaFreqs, osuusTaulu); - muutokset(i1,i2) = loggis-logml; - osuusTaulu(i2) = osuusTaulu(i2)-osuus; - end - - osuusTaulu(i1) = osuusTaulu(i1)+osuus; -end - - - - -function g=randga(a,b) -flag = 0; -if a>1 -c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a); -U1=-1; -while flag == 0, -if a<=2.5, -U1=rand;U2=rand; -else -while ~(U1>0 & U1<1), -U1=rand;U2=rand; -U1 = U2 + c5*(1-1.86*U1); -end %while -end %if -W = c2*U2/U1; -if c3*U1+W+(1/W)<=c4, -flag = 1; -g = c1*W/b; -elseif c3*log(U1)-log(W)+W<1, -flag = 1; -g = c1*W/b; -else -U1=-1; -end %if -end %while flag -elseif a==1 -g=sum(-(1/b)*log(rand(a,1))); -else -while flag == 0, -U = rand(2,1); -if U(1)>exp(1)/(a+exp(1)), -g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1))); -if U(2)<=g^(a-1), -flag = 1; -end %if -else -g = ((a+exp(1))*U(1)/((exp(1))^(1/a))); -if U(2)<=exp(-g), -flag = 1; -end %if -end %if -end %while flag -g=g/b; -end %if; - - -%------------------------------------------------- - -function svar=randdir(counts,nc) -% Käyttöesim randdir([10;30;60],3) - -svar=zeros(nc,1); -for i=1:nc - svar(i,1)=randga(counts(i,1),1); -end -svar=svar/sum(svar); - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% Individuals inds have been given. The function returns a vector, -% containing the indices of the rows, which contain data from the -% individuals. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - -%-------------------------------------------------------------------------- -%----- - -function str = ownNum2Str(number) - -absolute = abs(number); - -if absolute < 1000 - str = num2str(number); -elseif absolute < 10000000 - first_three = rem(number,1000); - next_four = (number - first_three) /1000; - first_three = abs(first_three); - if first_three<10 - first_three = ['00' num2str(first_three)]; - elseif first_three<100 - first_three = ['0' num2str(first_three)]; - else - first_three = num2str(first_three); - end; - str = [num2str(next_four) first_three]; -elseif absolute < 100000000 - first_four = rem(number,10000); - next_four = (number - first_four) /10000; - first_four = abs(first_four); - if first_four<10 - first_four = ['000' num2str(first_four)]; - elseif first_four<100 - first_four = ['00' num2str(first_four)]; - elseif first_four<1000 - first_four = ['0' num2str(first_four)]; - else - first_four = num2str(first_four); - end; - str = [num2str(next_four) first_four]; -else - str = num2str(number); -end; - -%------------------------------------------------ - - -function part = learn_partition_modified(ordered) -% This function is called only if some individual has less than 90 per cent -% non-missing data. The function uses fuzzy clustering for the "non-missingness" -% values, finding maximum three clusters. If two of the found clusters are such -% that all the values are >0.9, then those two are further combined. - -part = learn_simple_partition(ordered,0.05); -nclust = length(unique(part)); -if nclust==3 - mini_1 = min(ordered(find(part==1))); - mini_2 = min(ordered(find(part==2))); - mini_3 = min(ordered(find(part==3))); - - if mini_1>0.9 & mini_2>0.9 - part(find(part==2)) = 1; - part(find(part==3)) = 2; - - elseif mini_1>0.9 & mini_3>0.9 - part(find(part==3)) = 1; - - elseif mini_2>0.9 & mini_3>0.9 - % This is the one happening in practice, since the values are - % ordered, leading to mini_1 <= mini_2 <= mini_3 - part(find(part==3)) = 2; - end -end \ No newline at end of file diff --git a/matlab/parallel/compare.m b/matlab/parallel/compare.m deleted file mode 100644 index 65958ca..0000000 --- a/matlab/parallel/compare.m +++ /dev/null @@ -1,81 +0,0 @@ -function compare(varargin) -% COMPARE compares the results from multiple runs. -% input: is a group of result .mat files on the same data. - -% Example: compare('e:\data\result1.mat','e:\data\result2.mat',...) -% or call it from the BAPS menu. - -if nargin == 1 - error('number of input arguments must be >=2'); -end - -if nargin == 0 - out = uipickfiles('FilterSpec','*.mat',... - 'Prompt','Select mixture results: be sure that the underlying data and models are consistent.'); - if isnumeric(out) - return - end - nfiles = length(out); - filesin = out; - -else - nfiles = nargin; - filesin = varargin; -end - -display('---------------------------------------------------'); -fprintf(1,'Comparing results ...\n'); -if nfiles == 1 - disp('*** ERROR: Too few files.'); - return -end -for i = 1:nfiles - struct_array = load(filesin{i}); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - clear struct_array; - if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') || strcmp(c.mixtureType,'admix') - fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i ); - return - end - elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i ); - return - end - else - fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i ); - return; - end - - try - partitionMat(i,:) = sort_partition(c.PARTITION); - catch - error('*** ERROR: inconsistent results.'); - end - mixtureType{i} = c.mixtureType; - logml(i) = c.logml; - clear c; -end - -len_mixture = length(mixtureType{1}); -for i = 2:nfiles - if len_mixture ~= length(mixtureType{i}); - error('*** ERROR: inconsistent mixture types.'); - end -end - - -% Find the best partition -best = logical(logml == max(logml)); -[uniquepartition, ind1, ind2] = unique(partitionMat(best,:), 'rows'); -fprintf(1,'Best partition was found at ''%s''\n',filesin{best}); - - - - - - - - diff --git a/matlab/parallel/compare_admix.m b/matlab/parallel/compare_admix.m deleted file mode 100644 index defd0bc..0000000 --- a/matlab/parallel/compare_admix.m +++ /dev/null @@ -1,142 +0,0 @@ -function compare_admix(varargin) -% COMPARE compares the results from multiple runs for admixture results -% input: is a group of result .mat files on the same data. - -% Example: compare('e:\data\result1.mat','e:\data\result2.mat',...) -% or call it from the BAPS menu. - -if nargin == 1 - error('number of input arguments must be >=2'); -end - -if nargin == 0 - out = uipickfiles('FilterSpec','*.mat',... - 'Prompt','Select admixture results: be sure that the underlying data and parameters are consistent.'); - if isnumeric(out) - return - end - nfiles = length(out); - filesin = out; - -else - nfiles = nargin; - filesin = varargin; -end - -display('---------------------------------------------------'); -fprintf(1,'Comparing results ...\n'); -minsize = zeros(nfiles,1); -iters = zeros(nfiles,1); -refInds = zeros(nfiles,1); -refIters = zeros(nfiles,1); -prop = cell(nfiles,1); - -clusters = cell(nfiles,1); -adjprior = []; - -if nfiles == 1 - disp('*** ERROR: Too few files.'); - return -end - -% read admixture files -for i = 1:nfiles - struct_array = load(filesin{i}); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - clear struct_array; - if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') ... - || ~isfield(c,'proportionsIt') - fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i ); - return - end - elseif isfield(struct_array,'PARTITION') %Mideva versio - c = struct_array; - if ~isfield(c,'rowsFromInd') - fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i ); - return - end - else - fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i ); - return; - end - - prop{i} = c.proportionsIt; - pvalue(:,i) = c.pvalue; - clusters{i} = c.clusters; - popnames = c.popnames; - - % parameters - minsize(i) = c.minsize; - iters(i) = c.iters; - refInds(i) = c.refInds; - refIters(i) = c.refIters; - - if i==1 - adjprior = c.adjprior; - else - if ~isequal(adjprior,c.adjprior) - disp('*** ERROR: incosistent admixture results.'); - return - end - end - - clear c; -end - -if length(unique(minsize))~=1 || length(unique(iters))~=1 ... - || length(unique(refInds))~=1 || length(unique(refIters))~=1 - disp('*** ERROR: inconsistent admixture parameters.'); - return -end - -% now combine the results -prop_combine = prop{1}; -[ninds npops] = size(prop_combine); -[pvalue_combine,index] = min(pvalue,[],2); -for i = 1:ninds -prop_combine(i,:) = prop{index(i)}(i,:); -end - -% display the results -tulostaAdmixtureTiedot(prop_combine, pvalue_combine, minsize(1), iters(1)); - -viewPartition(prop_combine, popnames); - -% save the results -talle = questdlg(['Do you want to save the combined admixture results?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - struct_array = load(filesin{1}); - c = struct_array.c; - clear struct_array; - c.proportionsIt = prop_combine; - c.pvalue = pvalue_combine; % Added by Jing - - fprintf(1, 'Saving the results...'); -% save([pathname filename], 'c'); - save([pathname filename], 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'finished.\'); - -end - - - - - - - - diff --git a/matlab/parallel/dispLine.m b/matlab/parallel/dispLine.m deleted file mode 100644 index c7679e5..0000000 --- a/matlab/parallel/dispLine.m +++ /dev/null @@ -1,3 +0,0 @@ -%------------------------------------------------------ -function dispLine; -disp('---------------------------------------------------'); \ No newline at end of file diff --git a/matlab/parallel/greedyPopMix_parallel.m b/matlab/parallel/greedyPopMix_parallel.m deleted file mode 100644 index 5f8d8d2..0000000 --- a/matlab/parallel/greedyPopMix_parallel.m +++ /dev/null @@ -1,1622 +0,0 @@ -function greedyPopMix_parallel(options) - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -clearGlobalVars; - -% LASKENNAN ALKUARVOJEN MÄÄRITTÄMINEN -outp = [options.outputMat '.txt']; -inp = options.dataFile; - -if isequal(options.dataType,'numeric') %Raakadata - data = load(options.dataFile); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('*** ERROR: Incorrect Data-file.'); - return; - end - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(data); - rowsFromInd = 0; %Ei tiedet? - - if ~isempty(options.groupname) - popnames = initPopNames(options.groupname); - if (size(popnames,1)~=ninds) - disp('*** ERROR: Incorrect name-file.'); - popnames = []; - end - else - popnames = []; - end - -elseif isequal(options.dataType,'genepop') - kunnossa = testaaGenePopData(options.dataFile); - if kunnossa==0 - return - end - - [data, popnames]=lueGenePopDataPop(options.dataType); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(data); - rowsFromInd = 2; %Tiedetään GenePop:in tapauksessa. - -end - -if ~isequal(options.dataType, 'matlab') - a_data = data(:,1:end-1); - - npops = size(rows,1); - PARTITION = 1:npops'; %Jokainen "yksil? eli populaatio on oma ryhmäns? - [sumcounts, counts, logml] = ... - initialPopCounts(a_data, npops, rows, noalle, adjprior); - COUNTS = counts; SUMCOUNTS = sumcounts; - POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - - clear('counts', 'sumcounts','pathname','filename','vast2',... - 'vast3','vast4'); - [Z,dist] = getPopDistancesByKL(adjprior); %Saadaan COUNTS:in avulla. - -% save_preproc = questdlg('Do you wish to save pre-processed data?',... -% 'Save pre-processed data?',... -% 'Yes','No','Yes'); -% if isequal(save_preproc,'Yes'); -% waitALittle; -% [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); -% kokonimi = [pathname filename]; -% c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; -% c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -% c.dist = dist; c.Z = Z; c.popnames = popnames; c.rowsFromInd = rowsFromInd; -% c.npops = npops; c.logml = logml; -% save(kokonimi,'c'); -% clear c; -% end; -end - -if isequal(options.dataType, 'matlab') - struct_array = load(options.dataFile); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'rows') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'rows') %Mideva versio - c = struct_array; - else - disp('*** ERROR: Incorrect file format'); - return; - end - data = double(c.data); rows = c.rows; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; Z = c.Z; popnames = c.popnames; rowsFromInd = c.rowsFromInd; - clear c; -end - -if strcmp(options.fixedK, 'yes') - fixedK = 1; -else - fixedK = 0; -end - -npopstext = []; -npopstextExtra = options.initialK; -if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; -else - % ----------------------------------------------------- - % Set the limit of the input value. - % Modified by Jing Tang, 30.12.2005 - if max(npopstextExtra) > size(data,1) - error('Initial K larger than the sample size are not accepted. '); - else - npopstext = [npopstext ' ' num2str(npopstextExtra)]; - end -end - -clear teksti; -if isempty(npopstext) || length(npopstext)==1 - return -else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösi?annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - return - end - clear ykkoset; -end - - -c.data=data; c.rows = rows; c.alleleCodes = alleleCodes; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; - -if fixedK - % Only the first value of npopsTaulu is used - npops = npopsTaulu(1); - nruns = length(npopsTaulu); - [logml, npops, partitionSummary]=indMix_fixK(c,npops,nruns,1); -else - [logml, npops, partitionSummary]=indMix(c,npopsTaulu,1); -end - -if logml==1 - return -end - -data = data(:,1:end-1); - -viewPopMixPartition(PARTITION, rows, popnames); -%npops = poistaTyhjatPopulaatiot(npops); -%POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - -% h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -% h0 = findobj('Tag','filename2_text'); -% outp = get(h0,'String'); -changesInLogml = writeMixtureInfoPop(logml, rows, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, fixedK); - -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',outp) - delete('baps4_output.baps') -end - -if rowsFromInd==0 - %Käytettiin BAPS-formaattia, eik?rowsFromInd ole tunnettu. - [popnames, rowsFromInd] = findOutRowsFromInd(popnames, rows); -end - -groupPartition = PARTITION; - -fiksaaPartitioYksiloTasolle(rows, rowsFromInd); -% The logml is saved for parallel computing -c.logml = logml; -c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior = adjprior; -c.rowsFromInd = rowsFromInd; c.popnames = popnames; -c.data = data; c.npops = npops; c.noalle = noalle; -c.mixtureType = 'popMix'; c.groupPartition = groupPartition; -c.rows = rows; c.changesInLogml = changesInLogml; -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - -% ------------------------------------------------------------------------- -% - Subfunctions -% ------------------------------------------------------------------------- - -function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit -% siten, ett?yhden lokuksen j koodit saavat arvoja -% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka -% koodi on nolla muutetaan. - - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -%rows = cell(nind,1); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end -newData = data; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------- - - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - - -%-------------------------------------------------------------------- - -function [Z,distances] = getPopDistancesByKL(adjprior) -% Laskee populaatioille etäisyydet -% käyttäen KL-divergenssi? -global COUNTS; -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); -distances = zeros(nchoosek(npops,2),1); - -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -pointer = 1; -for pop1 = 1:npops-1 - for pop2 = pop1+1:npops - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - distances(pointer) = div; - pointer = pointer+1; - end -end -Z=linkage(distances'); - -%-------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialPopCounts(data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); - -for i=1:npops - for j=1:nloci - i_rivit = rows(i,1):rows(i,2); - havainnotLokuksessa = find(data(i_rivit,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(i_rivit,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -logml = laskeLoggis(counts, sumcounts, adjprior); - - -%----------------------------------------------------------------------- - - -function loggis = laskeLoggis(counts, sumcounts, adjprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior))) - ... - sum(sum(gammaln(1+sumcounts))); -loggis = logml2; - - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%-------------------------------------------------------------------- - - -function [data, popnames] = lueGenePopDataPop(tiedostonNimi) -% Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän. -% popnames on kuten ennenkin. - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -fclose(fid); -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -npops = size(popnames,1); -ind = 1; -for pop = 1:npops - if pop=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - -%------------------------------------------------------------------------ - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%-------------------------------------------------------------------------- -%-- - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, globalRows, data, adjprior, priorTerm); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = []; -for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; -end - -diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, diffInCounts, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -%-------------------------------------------------------------------------- -%---- - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile) - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = 0; -end - - -%------------------------------------------------------------------------- - - -function [popnames2, rowsFromInd] = findOutRowsFromInd(popnames, rows) - -ploidisuus = questdlg('Specify the type of individuals in the data: ',... - 'Individual type?', 'Haploid', 'Diploid', 'Tetraploid', ... - 'Diploid'); - -switch ploidisuus -case 'Haploid' - rowsFromInd = 1; -case 'Diploid' - rowsFromInd = 2; -case 'Tetraploid' - rowsFromInd = 4; -end - -if ~isempty(popnames) - for i = 1:size(rows,1) - popnames2{i,1} = popnames{i,1}; - rivi = rows(i,1):rows(i,2); - popnames2{i,2} = (rivi(rowsFromInd))/rowsFromInd; - end -else - popnames2 = []; -end - -%------------------------------------------------------------------ - -function fiksaaPartitioYksiloTasolle(rows, rowsFromInd) - -global PARTITION; -totalRows = 0; -for ind = 1:size(rows,1) - totalRows = totalRows + (rows(ind,2)-rows(ind,1)+1); -end -partitio2 = zeros(totalRows/rowsFromInd,1); - -for ind = 1:size(rows,1) - kaikkiRivit = rows(ind,1):rows(ind,2); - for riviNumero = rowsFromInd:rowsFromInd:length(kaikkiRivit) - %for riviNumero = rowsFromInd:rowsFromInd:length(rows{ind}) - %rivi = rows{ind}(riviNumero); - rivi = kaikkiRivit(riviNumero); - partitio2(rivi/rowsFromInd) = PARTITION(ind); - end -end -PARTITION = partitio2; - -%--------------------------------------------------------------- - - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered groups: ' ownNum2Str(ninds)]); -disp(['Number of clusters in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clusters in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); - fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end; -end - -if npops > 1 - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if group i is moved to cluster j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if group i is moved to cluster j:']); %fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku 'group' blanks(6+erotus)]; - else - ekarivi = 'group '; - end - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); - end - - changesInLogml = LOGDIFF'; - for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, rows, data, ... - % adjprior, priorTerm); - muutokset = changesInLogml(:,ind); - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end - end - - disp(' '); disp(' '); - disp('KL-divergence matrix in PHYLIP format:'); - dist_mat = zeros(npops, npops); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); - end - - maxnoalle = size(COUNTS,1); - nloci = size(COUNTS,2); - d = zeros(maxnoalle, nloci, npops); - prior = adjprior; - prior(find(prior==1))=0; - nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - prior(1,nollia)=1; - for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end - ekarivi = num2str(npops); - disp(ekarivi); - - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); - end - - for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -% end - end - -else - changesInLogml = []; -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%----------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - -%-------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters, Z) - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -%koot = rows(:,1) - rows(:,2) + 1; -%maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%initializeGammaln(ninds, maxSize, max(noalle)); - -logml = laskeLoggis(counts, sumcounts, adjprior); - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end diff --git a/matlab/parallel/independent_parallel.m b/matlab/parallel/independent_parallel.m deleted file mode 100644 index 21fc9f7..0000000 --- a/matlab/parallel/independent_parallel.m +++ /dev/null @@ -1,1674 +0,0 @@ -function independent_parallel(options) -% INDEPENDENT_PARALLEL is the command line version of the baps partition with -% independent models. -% Input: options is a struct generated by parallel.m - -%-------------------------------------------------------------------------- -%- Syntax check out -%-------------------------------------------------------------------------- -outp = [options.outputMat '.txt']; -inp = options.dataFile; - -if strcmp(options.fixedK, 'yes') - fixedK = 1; -else - fixedK = 0; -end - -%-------------------------------------------------------------------------- -%- Get data file location -%-------------------------------------------------------------------------- -switch options.dataType - case 'numeric' - %------------------------------------------------------------------ - %- Get name and index file location - %------------------------------------------------------------------ - try - data = load(options.dataFile); - catch - disp('*** ERROR: Incorrect numerical text data.'); - return - end - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('*** ERROR: Incorrect numerical text data.'); - return; - end - - if ~isempty(options.nameFile) && ~isempty(options.indexFile) - popnames = initPopNames(options.nameFile{1}, options.indexFile{1}); - else - popnames = []; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - - case 'genepop' - kunnossa = testaaGenePopData(options.dataFile); - if kunnossa == 0 - return - end - [data,popnames] = lueGenePopData(options.dataFile); - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - case 'matlab' - struct_array = load(options.dataFile); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('*** ERROR: Incorrect matlab format'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('*** ERROR: Incorrect matlab format'); - return; - end - data = double(c.data); rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; - clear c; - otherwise - error('*** ERROR: data type is not specified or unknown.'); -end - -% --------------------------------------------------------- -% - Stochastic search algorithm -% --------------------------------------------------------- -global PARTITION; global COUNTS; -global SUMCOUNTS; global POP_LOGML; -clearGlobalVars; - -c.data=data; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; - -ninds = length(unique(data(:,end))); -ekat = (1:rowsFromInd:ninds*rowsFromInd)'; -c.rows = [ekat ekat+rowsFromInd-1]; - -npopstext = []; -npopstextExtra = options.initialK; -if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; -else - % ----------------------------------------------------- - % Set the limit of the input value. - % Modified by Jing Tang, 30.12.2005 - if max(npopstextExtra) > size(data,1) - error('Initial K larger than the sample size are not accepted. '); - else - npopstext = [npopstext ' ' num2str(npopstextExtra)]; - end -end - -clear teksti; -if isempty(npopstext) || length(npopstext)==1 - return -else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösi?annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - return - end - clear ykkoset; -end - -if fixedK - % Only the first value of npopsTaulu is used - npops = npopsTaulu(1); - nruns = length(npopsTaulu); - [logml, npops, partitionSummary]=indMix_fixK(c,npops,nruns,1); -else - [logml, npops, partitionSummary]=indMix(c,npopsTaulu,1); -end - -if logml==1 - return -end - -data = noIndex(data,noalle); - -changesInLogml = writeMixtureInfo(logml, rowsFromInd, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, fixedK); - -viewMixPartition(PARTITION, popnames); - -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',outp) - delete('baps4_output.baps') -end - -% The logml is saved for parallel computing -c.logml = logml; -c.changesInLogml = changesInLogml; % this variable stores the change of likelihoods. - -c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; -c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; -c.noalle = noalle; c.mixtureType = 'mix'; -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - -% --------------------------------------------------------- -% - Subfunctions -% --------------------------------------------------------- - -%------------------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... - adjprior, priorTerm, i2) -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = computeRows(rowsFromInd, inds, ninds); - -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - -%------------------------------------------------------------------------------------ - - -function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = computeRows(rowsFromInd, inds, ninds); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - PARTITION(ind) = pop2; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - PARTITION(ind) = pop1; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%-------------------------------------------------------------------------- - - - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]= getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - - -%----------------------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -initializeGammaln(ninds, rowsFromInd, max(noalle)); - -logml = computeLogml(counts, sumcounts, noalle, data, rowsFromInd); - - -%----------------------------------------------------------------------- - - -function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -nloci = size(counts,2); -npops = size(counts,3); -adjnoalle = zeros(max(noalle),nloci); -for j=1:nloci - adjnoalle(1:noalle(j),j)=noalle(j); - if noalle(j)0 - fid = fopen(outPutFile,'w'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Model: independent']); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end; -end - -if npops > 1 - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if indvidual i is moved to group j:']); fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; - else - ekarivi = ' ind '; - end - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - end - - %ninds = size(data,1)/rowsFromInd; - changesInLogml = LOGDIFF'; - for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - % adjprior, priorTerm); - %changesInLogml(:,ind) = muutokset; - muutokset = changesInLogml(:,ind); - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end - end - - disp(' '); disp(' '); - disp('KL-divergence matrix in PHYLIP format:'); - - dist_mat = zeros(npops, npops); - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); - end - - maxnoalle = size(COUNTS,1); - nloci = size(COUNTS,2); - d = zeros(maxnoalle, nloci, npops); - prior = adjprior; - prior(find(prior==1))=0; - nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - prior(1,nollia)=1; - for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end - ekarivi = num2str(npops); - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); - end - - for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end - % disp(rivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - % end - end - -end -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%--------------------------------------------------------------- - - -function dispLine -disp('---------------------------------------------------'); - -%-------------------------------------------------------------- - -function num2 = omaRound(num) -% Pyöristää luvun num 1 desimaalin tarkkuuteen -num = num*10; -num = round(num); -num2 = num/10; - -%--------------------------------------------------------- - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n 1)); -end - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) || isequal(line2,-1) || isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 || testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 && ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - -%-------------------------------------------------------- - - -function data = addAlleles(data, ind, line, divider) -% Lisaa BAPS-formaatissa olevaan datataulukkoon -% yksilöä ind vastaavat rivit. Yksilön alleelit -% luetaan genepop-formaatissa olevasta rivist? -% line. Jos data on 3 digit formaatissa on divider=1000. -% Jos data on 2 digit formaatissa on divider=100. - -nloci = size(data,2)-1; -if size(data,1) < 2*ind - data = [data; zeros(100,nloci+1)]; -end - -k=1; -merkki=line(k); -while ~isequal(merkki,',') - k=k+1; - merkki=line(k); -end -line = line(k+1:end); -clear k; clear merkki; - -alleeliTaulu = sscanf(line,'%d'); - -if length(alleeliTaulu)~=nloci - disp('Incorrect data format.'); -end - -for j=1:nloci - ekaAlleeli = floor(alleeliTaulu(j)/divider); - if ekaAlleeli==0 - ekaAlleeli=-999; - end - tokaAlleeli = rem(alleeliTaulu(j),divider); - if tokaAlleeli==0 - tokaAlleeli=-999; - end - - data(2*ind-1,j) = ekaAlleeli; - data(2*ind,j) = tokaAlleeli; -end - -data(2*ind-1,end) = ind; -data(2*ind,end) = ind; - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen väliss?täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntej? 1 jos odotetaan muita merkkej? -for i=1:pit - merkki = line(i); - if (isspace(merkki) && tila==0) - %Ei tehd?mitään. - elseif (isspace(merkki) && tila==1) - tila = 0; - elseif (~isspace(merkki) && tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) && tila==1) - %Ei tehd?mitään - end -end - - -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; \ No newline at end of file diff --git a/matlab/parallel/initPopNames.m b/matlab/parallel/initPopNames.m deleted file mode 100644 index 6b6adb1..0000000 --- a/matlab/parallel/initPopNames.m +++ /dev/null @@ -1,35 +0,0 @@ -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yhtä pitkiä. - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end diff --git a/matlab/parallel/linkageMixture_speed.m b/matlab/parallel/linkageMixture_speed.m deleted file mode 100644 index 9201aaf..0000000 --- a/matlab/parallel/linkageMixture_speed.m +++ /dev/null @@ -1,2138 +0,0 @@ -function linkageMixture_speed -base = findobj('Tag','base_figure'); - - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% Data handling -input_type = questdlg('Specify the format of your data: ',... - 'Specify Data Format', ... - 'MLST-format', 'BAPS-format','Pre-processed data', 'MLST-format'); - -switch input_type - - case 'MLST-format' - - %waitALittle; - mlst_type = questdlg('Choose data type: ',... - 'Specify MLST format', ... - 'Separate allelic profiles(TXT)', ... - 'Concatenate allelic sequences(EXCEL)','Separate allelic profiles(TXT)'); - switch mlst_type - case 'Concatenate allelic sequences(EXCEL)' - %waitALittle - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.xls','Load new concatenate sequence profile(*.xls)'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - display('---------------------------------------------------'); - display(['Reading sequence profile from: ',[pathname filename],'...']); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); - [data, component_mat, popnames] = processxls([pathname filename]); - - if isempty(data) - display('*** ERROR: Failed in loading the data'); - return; - end - - case 'Separate allelic profiles(TXT)' - % Ask the allelic profile - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.pl;*.txt','Load new allelic profile(*.pl, *.txt)'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - display('---------------------------------------------------'); - display(['Reading allelic profile from: ',[pathname filename],'...']); - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); - - % Preprocess the profile - output = processprofile([pathname filename]); - headercount = size(output,2); - flag = zeros(1,headercount); - for i = 1:headercount - if strcmpi('ST',output{1,i}) - flag(i) = 1; - else - if strcmpi('Isolate', output{1,i}) || strcmpi('Strain',output{1,i}) - flag(i) = 2; - else - if strcmpi('Species', output{1,i}) - flag(i) = 3; - end - end - end - end - - if ~any(flag) - h = errordlg(['Loading of the specified file was unsuccessful. ' ... - 'Please see the tutorial to find out the correct file ' ... - 'format.'] ,'Error','modal'); - handle = [h,base]; - setWindowOnTop(handle,{'true','true'}) - - uiwait(h); - fprintf(1,'\n*** ERROR: Failed in loading the allelic profile.\n'); - return; - end - - index = (1:size(output,1)-1)'; - - - % Species selection - species_loc = find(flag==3); - if ~isempty(species_loc) - species_col = output((2:end), species_loc); - [species_str, m, n] = unique(species_col); - - [s1,v1] = listdlg('PromptString','Select species:',... - 'SelectionMode','multiple',... - 'Name','Select Species',... - 'ListString',species_str'); - removed = setdiff(n,s1); - - if ~v1 || isempty(s1) - dispCancel;return - end - else - n = index; - removed = 0; - end - - % Isolate/Strain selection - isolate_loc = find(flag==2); - if ~isempty(isolate_loc) - isolate_col = output((2:end), isolate_loc); - isolate_str = isolate_col(logical(~ismember(n,removed))); - index(ismember(n,removed)) = []; - - [s2,v2] = choosebox('Name','Select Isolates','PromptString',... - 'Isolates in the sample(Ctrl+A to select all):','SelectString', ... - 'Isolates you have selected','ListString', isolate_str', ... - 'InitialValue',1); - if isempty(s2) || ~v2 - dispCancel; return - end - else - % ST selection - isolate_loc = find(flag==1); - isolate_col = output((2:end), isolate_loc); - % isolate_str = isolate_col(find(~ismember(n,removed))); - isolate_str = isolate_col(logical(~ismember(n,removed))); - index(ismember(n,removed)) = []; - - [s2,v2] = choosebox('Name','Select STs','PromptString',... - 'STs in the sample (Ctrl+A to select all):','SelectString', ... - 'STs you have selected','ListString', isolate_str'); - if isempty(s2) || ~v2 - dispCancel; return - end - end - - % Read the data - isolate_index = index(s2); - if ~isempty(isolate_loc) - popnames(:,1) = isolate_str(s2); - else - popnames(:,1) = num2cell(index(s2)); - end - popnames(:,2) = num2cell([1:length(isolate_index)]'); - - % remove empty elements - strmat = output([isolate_index+1]',find(flag==0)); - [i,j] = find(cellfun('isempty',strmat)); - ij = [i j]; - for k=1:length(i) - strmat(ij(k,1),ij(k,2))={''}; - end - [i,j] = find(strcmp(strmat,'')); - ij = [i j]; - for k=1:length(i) - strmat(ij(k,1),ij(k,2))={'0'}; - end - - data_allele = zeros(size(strmat)); - for i = 1:size(strmat,1) - for j = 1:size(strmat,2) - data_allele(i,j) = str2num(char(strmat(i,j))); - end - end - - % remove columns containing empty values - realgene = find(all(data_allele)); - data_allele = data_allele(:,realgene); - partition_index = 1:size(strmat,1); - data_allele = [data_allele partition_index']; - genename = output(1,find(flag==0)); - genename = genename(realgene); - - - if isempty(genename) || isempty(data_allele) - msgbox(['Loading of the specified file was unsuccessful. ' ... - 'Please see the tutorial to find out the correct file ' ... - 'format.'] ,'Error', ... - 'error'); - fprintf(1,'\n*** ERROR: Failed in loading the allelic profile.\n'); - return; - else - display('---------------------------------------------------'); - display(['# of strains: ', num2str(size(data_allele,1))]); - display(['# of genes: ', num2str(size(data_allele,2)-1)]); - end - - %waitALittle; - % Ask the individual gene sequence - % [isOK,returnvalue] = askSeq; - % if (isOK & returnvalue~= 1) % allelic profile loaded only - % data = data_allele; - % component_mat = [1:size(data,2)-1]'; % assume independence - % elseif - % isOK & returnvalue == 1, - [s3,v3] = listdlg('PromptString','Select genes:',... - 'SelectionMode','multiple',... - 'Name','Select Genes',... - 'ListString',genename); - - if isempty(s3) || ~v3 - dispCancel; - return - else - m = size(s3,2); % number of genes - % data_seq = cell(m,1); - data = []; - genesize = zeros(1,m); - for i=1:m - %waitALittle; - data_gene = readfasta(genename{s3(i)}); % read fasta - if (isempty(data_gene)) - display('*** ERROR: Failed in loading the sequence data'); - return; - else - data_gene(:,find(sum(data_gene)==0)) = []; % NB! remove all the gaps - % data_seq{i} = data_gene; - selected_data = data_gene(data_allele(:,s3(i))',:); % Store only those selected strains - - emptyloci = find(all(selected_data(:,[1:end])<0)); - if ~isempty(emptyloci) - disp('Removing empty loci...'); - end - selected_data(:,emptyloci) = []; % remove empty loci - data = [data selected_data]; - end - genesize(i) = size(selected_data,2); % NB! could be different than the original gene length - end - data = [data data_allele(:,end)]; % add the index - % determine the component matrix - component_mat = zeros(m,max(genesize)); - cum = cumsum(genesize); - component_mat(1,1:genesize(1)) = 1:cum(1); - for i=2:m - component_mat(i,1:genesize(i)) = (cum(i-1)+1):cum(i); - end - end - % elseif isOK ==0 - % return - % end - otherwise - return - end - - %waitALittle; - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...'); - - % Make the missing data complete - % missing values are denoted as -999 - data = uint16(data); - % data = uint8(data); - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - return; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - - % Distance between individuals is computed as if the loci are - % independent. - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.popnames = popnames; c.component_mat = component_mat; - c.dist = dist; c.Z = Z; - - %waitALittle; - save_preproc = questdlg('Do you wish to save the pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - else - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - - end; - - %waitALittle; - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon', 'Independent', 'Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - elseif isequal(linkage_model,'Independent') - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - else - dispCancel; - return; - end; - - % Data transformation - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - % Count the data - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - clear data_clique data_separator - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - else - kokonimi = [pathname filename]; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end - end; - clear c; - - case 'BAPS-format' - - input_type = questdlg('Specify the format of your data: ',... - 'Specify BAPS Format', ... - 'BAPS sequence data', 'BAPS numeric data', 'BAPS sequence data'); - switch input_type - case 'BAPS numeric data' - %waitALittle; - setWindowOnTop(base,'false') - [filename,pathname] = uigetfile('*.txt', 'Load BAPS numeric data'); - if (sum(filename)==0) || (sum(pathname)==0) - %cancel was pressed; do nothing. - return; - end; - - display('---------------------------------------------------'); - display(['Reading BAPS numeric data from: ',[pathname filename],'...']); - - try - data = load([pathname filename]); - catch - disp('*** ERROR: Incorrect BAPS numerical data.'); - return - end - case 'BAPS sequence data' - waitALittle;%waitALittle; - [data, filename] = readbaps; - if isempty(data) - return - end - otherwise - return; - end - - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - waitALittle;%waitALittle; - input_pops = questdlg(['When using data which are in BAPS-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); - if isequal(input_pops,'Yes') - %waitALittle; - display('Reading name and index files...'); - setWindowOnTop(base,'false') - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - %waitALittle; - setWindowOnTop(base,'false') - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - % popnames = []; - dispCancel; - return - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - % popnames = []; - dispCancel; - return - end - else - % popnames = []; - popnames(:,1) = num2cell(unique(data(:,end))); - popnames(:,2) = popnames(:,1); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - end - - % check that popnames is correct - if isempty(popnames) - display('*** ERROR: error in reading popnames.') - return - end - - data = uint16(data); - % Check that the data is rational: - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - msgbox(['Loaded file contained incorrect data. The last column of the ' ... - 'data file must contain sampling unit identifiers. Identifier specifies the ' ... - 'unit from which the genetic data on that particular row was collected. ' ... - 'Identifiers must be positive integers. If the biggest unit identifier is i.e. 27, there must ' ... - 'be at least one row for each unit 1-27'] ,'Error', ... - 'error'); - disp('*** ERROR: Failed in loading the BAPS data.'); - return; - else - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - % display('Finished.'); - end; - - % Check if the data is discrete or continuous - if any(any(fix(data)~=data)) - disp('Found decimal numbers. Continuous model will be used.'); -% input_type = questdlg('Choose the method for the continuous data: ',... -% 'Specify the method', ... -% 'BEC', 'Gibbs sampling with WINBUGS','BEC'); -% switch input_type -% case 'BEC' -% disp('Using the BEC mixture model...'); -% becMixture(data, popnames); -% case 'Gibbs sampling with WINBUGS' -% disp('Preparing the WINBUGS code...'); -% isok = makeBUGS(data); -% if isok disp('Finished.'); -% -% end -% otherwise -% return -% end - disp('** CANCELLED: continuous model is under construction.'); - return; - end - - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...'); - % Make the missing data complete - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - % Distance between individuals is computed as if the loci are - % independent. - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; - c.Z = Z; - - input_linkage = questdlg('Do you wish to load the linkage map?',... - 'Load Linkage Map',... - 'Yes','No','Yes'); - if isequal(input_linkage,'Yes'); - display('---------------------------------------------------'); - %%waitALittle; - setWindowOnTop(base,'false') - [linkage_filename, linkage_pathname] = uigetfile('*.txt', 'Load Linkage Map'); - - if isempty(linkage_filename) && isempty(linkage_pathname) - return; - else - display(['Reading linkage map from: ',[linkage_pathname linkage_filename],'...']); - end; - - try - component_mat = load([linkage_pathname linkage_filename]); - catch - disp('*** ERROR: Incorrect linkage map.'); - return; - end - - % Check if the linkage map matches the data - if (size(data,2)-1) ~= max(component_mat(:)) - msgbox(['Loading of the specified file was unsuccessful. ' ... - 'The linkage map dose not match with the data.'] ,'Error', ... - 'error'); - disp('*** ERROR: Failed in loading the linkage map.'); - return; - else - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - end; - display('---------------------------------------------------'); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',[filename '/' linkage_filename]); clear h0; - c.component_mat = component_mat; - else - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - end - - - save_preproc = questdlg('Do you wish to save the pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - kokonimi = [pathname filename]; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon', 'Independent', 'Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - elseif isequal(linkage_model,'Independent') - display('Independent model was selected.'); - c.data = double(c.data); - greedyMix(c); - return; - else - dispCancel; - return; - end; - - % Data transformation - % display('---------------------------------------------------'); - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - kokonimi = [pathname filename]; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - clear c; - - case 'Pre-processed data' - % This is basically the same format as the "Pre-processed data" in - % the basic clustering. The only difference is that the file - % includes also the component_mat - % %waitALittle; - setWindowOnTop(base,'false') - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - display('---------------------------------------------------'); - display(['Reading preprocessed data from: ',[pathname filename],'...']); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - struct_array = load([pathname filename]); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - display('*** ERROR: Incorrect file format'); - return - end - clear struct_array; - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - clear struct_array; - else - display('*** ERROR: Incorrect file format'); - return; - end - - % The following are the same as in the basic clustering - data = c.data; popnames = c.popnames; Z = c.Z; - noalle = c.noalle; adjprior = c.adjprior; - rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - dist = c.dist; priorTerm = c.priorTerm; - - if ~isfield(c,'component_mat') - display('*** ERROR: Incorrect file format'); - return - end - - % This is new - component_mat = c.component_mat; - data = uint16(data); - - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - - if ~isfield(c, 'linkage_model') - %%%waitALittle; - % Independent is not an option, since it can be computed with the - % basic clustering which is much faster - linkage_model = questdlg('Specify the linkage model',... - 'Specify the linkage model?',... - 'Linear','Codon','Linear'); - if isequal(linkage_model,'Linear') - linkage_model = 'linear'; - display('Linear model was selected.'); - elseif isequal(linkage_model,'Codon') - linkage_model = 'codon'; - display('Codon model was selected.'); - else - dispCancel; - return; - end; - - clear c; % save the memory usage - - % Data transformation - fprintf(1,'Transforming the data ...'); - index = data(:,end); -% [data_clique, data_separator, noalle_clique, noalle_separator] = ... -% transform2(data, component_mat, linkage_model); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - - save_preproc = questdlg('Do you wish to save the fully pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save fully pre-processed data as'); - if (sum(filename)==0) || (sum(pathname)==0) - return; - end - - kokonimi = [pathname filename]; c.data = data; - c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; - c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; - c.linkage_model = linkage_model; - c.rowsFromInd = rowsFromInd; - c.alleleCodes = alleleCodes; - c.noalle = noalle; - c.adjprior = adjprior; - c.priorTerm = priorTerm; - c.popnames = popnames; - c.component_mat = component_mat; - c.dist = dist; c.Z = Z; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - clear c; - else - %Linkage model is specified in the preprocessed file. - counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; - counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; - linkage_model = c.linkage_model; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - clear c; - display(['linkage model: ', linkage_model]); - end - - otherwise - return; -end - - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -clearGlobalVars; - -c.noalle = noalle; -c.adjprior = adjprior; %priorTerm = c.priorTerm; -c.rowsFromInd = rowsFromInd; -c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; -c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; -c.dist = dist; c.Z = Z; - - -if fixedK - [logml, npops, partitionSummary] = linkageMix_fixK(c); -else - [logml, npops, partitionSummary] = linkageMix(c); -end - -if logml==1 - return; -end - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); - -%This is basically the same as in BAPS 3. -changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outp, inp, partitionSummary, popnames, linkage_model, ... - fixedK); - -viewMixPartition(PARTITION, popnames); - -% --------------------------------------------------------------------- -% Save the result. -% Jing - 26.12.2005 -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (sum(filename)==0) || (sum(pathname)==0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') - end - end - - [sumcounts, counts] = indLociCounts(PARTITION, data, npops, noalle); - % NB! Index column is removed in data matrix. - c.PARTITION = PARTITION; c.CQ_COUNTS = CQ_COUNTS; c.CQ_SUMCOUNTS = CQ_SUMCOUNTS; - c.SP_COUNTS = SP_COUNTS; c.SP_SUMCOUNTS = SP_SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior_cq = adjprior_cq; c.adjprior_sp = adjprior_sp; c.popnames = popnames; - c.rowsFromInd = rowsFromInd; c.data = uint16(data); c.npops = npops; - % c.nalleles_cq = nalleles_cq; c.nalleles_sp = nalleles_sp; - if strcmp(linkage_model,'linear') % Added on 03.11.06 - c.mixtureType = 'linear_mix'; - elseif strcmp(linkage_model,'codon') - c.mixtureType = 'codon_mix'; - end - c.changesInLogml = changesInLogml; % this variable stores the change of likelihoods. - % [ncluster ninds] - % -Added on 02.11.2006 - - % The next ones are for the admixture input - c.COUNTS = counts; c.SUMCOUNTS = sumcounts; - c.adjprior = adjprior; c.rowsFromInd = rowsFromInd; c.noalle = noalle; c.alleleCodes = alleleCodes; - - % The two variables below are for the new linkage admixture model - c.gene_lengths = calcGeneLengths(component_mat); - - % The logml is saved for parallel computing - c.logml = logml; - - fprintf(1,'Saving the result...') - try -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); - catch - display('*** ERROR in saving the result.'); - end -else - if exist('baps4_output.baps','file') - delete('baps4_output.baps') - end -end -% ----------------------------------------------------------------------- - - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[npops-1 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(logical(T2==pop2)); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(logical(PARTITION==i1)) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(double(ninds),2); - -% empties = find(data<0); -% data(empties)=0; -data(logical(data<0)) = 0; -data = uint16(data); - -pariTaulu = zeros(riviLkm,2); -aPointer=1; - -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; - pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); - aPointer = aPointer+double(ninds-a); -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -eka = uint16(eka); -toka = uint16(toka); - -clear pariTaulu; clear miinus; - -summa = uint16(zeros(riviLkm,1)); -vertailuja = uint16(zeros(riviLkm,1)); - -x = zeros(size(eka)); x = uint16(x); -y = zeros(size(toka)); y = uint16(y); -% fprintf(1,'%%10'); -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa + uint16(lisays); - end - end - % fprintf(1,'\b\b'); - % fprintf(1,'%d',floor(10+80*j/nloci)); -end - -clear x; clear y; clear vertailutNyt; -clear eka; clear toka; clear data; clear lisays; -dist = zeros(length(vertailuja),1); -% nollat = find(vertailuja==0); -% dist(nollat) = 1; -dist(logical(vertailuja==0)) = 1; -muut = find(vertailuja>0); -dist(muut) = double(summa(muut))./double(vertailuja(muut)); -clear summa; clear vertailuja; clear muut; - -Z = computeLinkage(dist'); -% fprintf(1,'\b\b'); -% fprintf(1,'%d\n',100); -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------------------------- - -function npops = removeEmptyPops -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(CQ_SUMCOUNTS,2)); -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); -SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) -% apu = find(PARTITION==notEmpty(n)); -% PARTITION(apu)=n; -PARTITION(logical(PARTITION==notEmpty(n))) = n; -end -npops = length(notEmpty); - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = initialCounts(ind_counts) - -global PARTITION; - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = find(PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end - -%-------------------------------------------------------------------------- - -function logml = computeLogml(adjprior_cq, adjprior_sp) - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -npops = size(CQ_COUNTS, 3); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS, 1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - -function changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outPutFile, inputFile, partitionSummary, popnames, linkage_model,... - fixedK) - -global PARTITION; -global CQ_COUNTS; -global LOGDIFF; - -%global CQ_SUMCOUNTS; -%global SP_COUNTS; global SP_SUMCOUNTS; -ninds = length(PARTITION); -npops = size(CQ_COUNTS,3); -names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin - -if length(outPutFile)>0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file/ Linkage map: ' inputFile]); -disp(['Model: ' linkage_model]); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - fprintf(fid,'\n'); - end - end; -end - -if npops == 1 - changesInLogml = []; -else - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to group j:'); fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; - else - ekarivi = ' ind '; - end - - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', ekarivi); fprintf(fid, '\n'); - end - - %ninds = size(data,1)/rowsFromInd; - changesInLogml = LOGDIFF'; - for ind = 1:ninds - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - %changesInLogml(:,ind) = computeChanges(ind, adjprior_cq, ... - % adjprior_sp, indCqCounts, indSpCounts); - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(changesInLogml(j,ind)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', rivi); fprintf(fid, '\n'); - end - end - - - % % KL-divergence has to be calculated otherwise... - % % { - % disp(' '); disp(' '); - % disp('KL-divergence matrix:'); - % - % if (fid ~= -1) - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', ['KL-divergence matrix:']); fprintf(fid, '\n'); - % end - % - % maxnoalle = size(COUNTS,1); - % nloci = size(COUNTS,2); - % d = zeros(maxnoalle, nloci, npops); - % prior = adjprior; - % prior(find(prior==1))=0; - % nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - % prior(1,nollia)=1; - % for pop1 = 1:npops - % d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - % dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - % end - % ekarivi = blanks(7); - % for pop = 1:npops - % ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; - % end - % disp(ekarivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - % end - % - % for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - % for pop2 = 1:pop1-1 - % dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - % div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - % div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - % div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - % end - % disp(rivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - % end - % end - % % } -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -% partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Probabilities for number of clusters'); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - % npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - npopstn = sum(exp(partitionSummary(logical(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end - end - end - -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:in? -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmill? -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - -%------------------------------------------------------------------------- - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -% dataApu = []; -% nollat = []; -% isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - %alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function popnames = fixPopnames(popnames, ninds) - -if length(popnames) == ninds - for i=1:ninds - if isnumeric(popnames{i}) - popnames{i} = num2str(popnames{i}); - % popnames(i) = num2str(popnames{i}); - end - popnames{i} = cellstr(popnames{i}); - % popnames(i) = cellstr(popnames{i}); - end -end - -%-------------------------------------------------------------------------- -function isRational = isTheLoadedNewDataRational(data) -% The last column of the data must include numbers 1-npops -% If so, isRational = 1, otherwise isRational = 0. -% The row numbers must be larger than 1. -if size(data,1) == 1 - isRational = 0; - display('*** ERROR: Sample size must be larger than one'); - return; -end -last_column = data(:,end); -last_column = sort(last_column); -current = 1; -if last_column(1) ~= current - isRational = 0; - display('*** ERROR: Wrong Indexes in the data'); - return; -end; -lengthcol = length(last_column); -for n = 2:lengthcol - if ~(last_column(n) == current || last_column(n) == current + 1) - %Some population is missing from the last column - isRational = 0; - display('*** ERROR: Missing indexes in the data'); - return; - end; - current = last_column(n); -end; -isRational = 1; - - -% %------------------------------------------------------------------------- -% function isRational = isTheLoadedNewLinkageRational(linkage_data) -% % Each positive element must be unique. -% % If so, isRational = 1, otherwise isRational = 0; -% nonzero = find(linkage_data~=0); -% dif = diff(linkage_data(nonzero)); -% if ~all(dif) -% isRational = 0; return; -% end; -% isRational = 1; - -%-------------------------------------------------------------------------- - -function [sumcounts, counts] = ... - indLociCounts(partition, data, npops, noalle) - -nloci=size(data,2)-1; -% ninds = size(data,1); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - % havainnotLokuksessa = find(partition==i & data(:,j)>=0); - havainnotLokuksessa = find(ismember(data(:,end),find(partition==i))); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -try -indices = load(indexFile); -catch - msgbox('Loading of the index file was unsuccessful', ... - 'Error', 'error'); - return -end -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the name file was unsuccessful', ... - 'Error', 'error'); - return; -end - -line = fgetl(fid); -counter = 1; -while sum(line~=-1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end diff --git a/matlab/parallel/linkage_admix_parallel.m b/matlab/parallel/linkage_admix_parallel.m deleted file mode 100644 index 1d7a061..0000000 --- a/matlab/parallel/linkage_admix_parallel.m +++ /dev/null @@ -1,1030 +0,0 @@ -function linkage_admix_parallel(tietue, options) - -global COUNTS; global PARTITION; global SUMCOUNTS; -clearGlobalVars; - -PARTITION = tietue.PARTITION; -COUNTS = tietue.COUNTS; -SUMCOUNTS = tietue.SUMCOUNTS; -rowsFromInd = tietue.rowsFromInd; -data = double(tietue.data); -npops = tietue.npops; -noalle = tietue.noalle; -switch tietue.mixtureType - case 'linear_mix' - linkage_model = 'linear'; - case 'codon_mix' - linkage_model = 'codon'; -end -if isfield(tietue,'gene_lengths') - gene_lengths = tietue.gene_lengths; -else - [filename, pathname] = uigetfile('*.txt', 'Load file with lengths of the genes (same order as in data).'); - gene_lengths = load([pathname filename]); -end - -if length(unique(PARTITION(find(PARTITION>0))))==1 - disp('Only one population in the input file'); - disp('No admixture detected'); - return -end - -%answers = inputdlg({['Input the minimum size of a population that will'... -% ' be taken into account when admixture is estimated.']},... -% 'Input minimum population size',1,{'5'}); -%if isempty(answers), return; end -%alaRaja = str2double(answers{1,1}); -alaRaja = options.minSize; -[npops] = poistaLiianPienet(npops, rowsFromInd, alaRaja); - -nloci = size(COUNTS,2); -ninds = size(data,1)/rowsFromInd; - -iterationCount = options.iters; -nrefIndsInPop = options.refInds; -iterationCountRef = options.refIters; - -[cliq_data, sep_data, cliq_counts, component_mat] = createCliqData(data, gene_lengths, noalle, ... - linkage_model, rowsFromInd); - -% Repeat: simulate clique frequencies and estimate proportions. Save the -% average proportions in "proportionsIt". - -proportionsIt = zeros(ninds,npops); -for iterationNum = 1:iterationCount - disp(['Iter: ' num2str(iterationNum)]); - %allfreqs = simulateAllFreqs(noalle); - [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); - for ind=1:ninds - - %omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd); - [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(ind, ... - cliq_data, cliq_freqs, sep_data, sep_freqs, rowsFromInd, gene_lengths, linkage_model); - osuusTaulu = zeros(1,npops); - if PARTITION(ind)==0 - % Outlier individual - elseif PARTITION(ind)~=0 - if PARTITION(ind)>0 - osuusTaulu(PARTITION(ind)) = 1; - else - % Individuals who are not assigned to any cluster. - arvot = zeros(1,npops); - for q=1:npops - osuusTaulu = zeros(1,npops); - osuusTaulu(q) = 1; - arvot(q) = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - end - [iso_arvo, isoimman_indeksi] = max(arvot); - osuusTaulu = zeros(1,npops); - osuusTaulu(isoimman_indeksi) = 1; - PARTITION(ind)=isoimman_indeksi; - end - logml = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - - for osuus = [0.5 0.25 0.05 0.01] - [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - end - end - proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu; - proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum; - end -end - -disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); -disp('each population.'); - -%allfreqs = simulateAllFreqs(noalle); % Simuloidaan alleelifrekvenssisetti -%allfreqs = computeAllFreqs2(noalle); % Koitetaan tällaista. -%refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); - -exp_cliq_freqs = ... - computeExpectedFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); -[ref_cliq_data, ref_sep_data] = ... - simulateLinkageIndividuals(nrefIndsInPop, rowsFromInd, exp_cliq_freqs, ... - gene_lengths, noalle, component_mat, linkage_model); -nrefInds = npops*nrefIndsInPop; - -disp(['Analysing the reference individuals in ' num2str(iterationCountRef) ' iterations.']); -refProportions = zeros(nrefInds,npops); -for iter = 1:iterationCountRef - disp(['Iter: ' num2str(iter)]); - %allfreqs = simulateAllFreqs(noalle); - [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, gene_lengths, linkage_model); - for ind = 1:nrefInds - %omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd); - [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(ind, ... - ref_cliq_data, cliq_freqs, ref_sep_data, sep_freqs, rowsFromInd, gene_lengths, linkage_model); - osuusTaulu = zeros(1,npops); - pop = ceil(ind/nrefIndsInPop); - osuusTaulu(pop)=1; - %logml = computeIndLogml(omaFreqs, osuusTaulu); - logml = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - for osuus = [0.5 0.25 0.05 0.01] - %[osuusTaulu, logml] = searchBest(osuus, osuusTaulu, omaFreqs, logml); - [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - end - refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu; - refProportions(ind,:) = refProportions(ind,:)./iter; - end -end -refTaulu = zeros(npops,100); -for ind = 1:nrefInds - pop = ceil(ind/nrefIndsInPop); - omanOsuus = refProportions(ind,pop); - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - refTaulu(pop, round(omanOsuus*100)) = refTaulu(pop, round(omanOsuus*100))+1; -end - -% Rounding: -proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt); -proportionsIt = proportionsIt./100; -for ind = 1:ninds - % if sum not equal to one, fix the largest part. - if (PARTITION(ind)>0) && (sum(proportionsIt(ind,:)) ~= 1) - [isoin,indeksi] = max(proportionsIt(ind,:)); - erotus = sum(proportionsIt(ind,:))-1; - proportionsIt(ind,indeksi) = isoin-erotus; - end -end - -% "p-value" for admixture -uskottavuus = zeros(ninds,1); -for ind = 1:ninds - pop = PARTITION(ind); - if pop==0 % an outlier - uskottavuus(ind)=1; - else - omanOsuus = proportionsIt(ind,pop); - if abs(omanOsuus)<1e-5 - omanOsuus = 0.01; - end - if round(omanOsuus*100)==0 - omanOsuus = 0.01; - end - refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus))); - uskottavuus(ind) = refPienempia / nrefIndsInPop; - end -end - -tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount); - -%viewPartition(proportionsIt, popnames); - -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - -% copy 'baps4_output.baps' into the text file with the same name. -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[options.outputMat '.txt']) - delete('baps4_output.baps') -end - -tietue.proportionsIt = proportionsIt; -tietue.pvalue = uskottavuus; % Added by Jing -tietue.admixnpops = npops; -tietue.mixtureType = 'admix'; % added by jing on 09.09.2008 -% save(options.outputMat, 'tietue'); -save(options.outputMat, 'tietue','-v7.3'); % added by Lu Cheng, 08.06.2012 - - -%---------------------------------------------------------------------------- - - -function [npops] = poistaLiianPienet(npops, rowsFromInd, alaraja) -% Muokkaa tulokset muotoon, jossa outlier yksilät on -% poistettu. Tarkalleen ottaen poistaa ne populaatiot, -% joissa on vähemmän kuin 'alaraja':n verran yksiläit? - -global PARTITION; -global COUNTS; -global SUMCOUNTS; - -popSize=zeros(1,npops); -for i=1:npops - popSize(i)=length(find(PARTITION==i)); -end -miniPops = find(popSize0))); -for n=1:length(korit) - kori = korit(n); - yksilot = find(PARTITION==kori); - PARTITION(yksilot) = n; -end -COUNTS(:,:,miniPops) = []; -SUMCOUNTS(miniPops,:) = []; - -npops = npops-length(miniPops); - -%------------------------------------------------------------------------ - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------- - - -function allFreqs = computeAllFreqs2(noalle) -% Lisää a priori jokaista alleelia -% joka populaation joka lokukseen j 1/noalle(j) verran. - -global COUNTS; -global SUMCOUNTS; - -max_noalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); - -sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS)); -sumCounts = reshape(sumCounts', [1, nloci, npops]); -sumCounts = repmat(sumCounts, [max_noalle, 1 1]); - -prioriAlleelit = zeros(max_noalle,nloci); -for j=1:nloci - prioriAlleelit(1:noalle(j),j) = 1/noalle(j); -end -prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]); -counts = COUNTS + prioriAlleelit; -allFreqs = counts./sumCounts; - -%-------------------------------------------------------------------------- - - -function refData = simulateIndividuals(n,rowsFromInd,allfreqs) -% simuloidaan n yksilää jokaisesta populaatiosta. ( - -npops = size(allfreqs,3); -nloci = size(allfreqs,2); -ninds = n*npops; - -refData = zeros(ninds*rowsFromInd,nloci); -counter = 1; % Pitää kirjaa mille riville seuraavaksi simuloidaan. - -for ind = 1:ninds - pop = ceil(ind/n); - for loc = 1:nloci - for k=0:rowsFromInd-1 - refData(counter+k,loc) = simuloiAlleeli(allfreqs,pop,loc); - end - end - counter = counter+rowsFromInd; -end - -function all = simuloiAlleeli(allfreqs,pop,loc) -% Simuloi populaation pop lokukseen loc alleelin. -freqs = allfreqs(:,loc,pop); -cumsumma = cumsum(freqs); -arvo = rand; -isommat = find(cumsumma>arvo); -all = min(isommat); - -%--------------------------------------------------------------------------- - - -function loggis = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, proportions) -% Calculates the likelihood when the origins are defined by "proportions". - -aux = proportions * ownCliqFreqs; -aux = log(aux); -loggis = sum(aux); - -clear aux; - -aux2 = proportions * ownSepFreqs; -aux2 = log(aux2); -loggis = loggis - sum(aux2); - - -%-------------------------------------------------------------------------- - - -function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi) -% Päivittää osuusTaulun muutoksen jälkeen. - -global COUNTS; -npops = size(COUNTS,3); - -i1 = rem(indeksi,npops); -if i1==0, i1 = npops; end; -i2 = ceil(indeksi / npops); - -osuusTaulu(i1) = osuusTaulu(i1)-osuus; -osuusTaulu(i2) = osuusTaulu(i2)+osuus; - - -%------------------------------------------------------------------------- - - -function [osuusTaulu, logml] = searchBest(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml) - -ready = 0; -while ready ~= 1 - muutokset = calcChanges(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml); - [maxMuutos, indeksi] = max(muutokset(1:end)); - if maxMuutos>0 - osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi); - logml = logml + maxMuutos; - else - ready = 1; - end -end - - - -%--------------------------------------------------------------------------- - - -function muutokset = calcChanges(osuus, osuusTaulu, ownCliqFreqs, ownSepFreqs, logml) -% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -% muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -% todennäkäisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole -% mitään siirrettävää, on vastaavassa kohdassa rivi nollia. - -global COUNTS; -npops = size(COUNTS,3); - -notEmpty = find(osuusTaulu>0.005); -muutokset = zeros(npops); -empties = ~notEmpty; - -for i1=notEmpty - - osuusTaulu(i1) = osuusTaulu(i1)-osuus; - - for i2 = [1:i1-1 i1+1:npops] - osuusTaulu(i2) = osuusTaulu(i2)+osuus; - loggis = computeIndLikelihood(ownCliqFreqs, ownSepFreqs, osuusTaulu); - muutokset(i1,i2) = loggis-logml; - osuusTaulu(i2) = osuusTaulu(i2)-osuus; - end - - osuusTaulu(i1) = osuusTaulu(i1)+osuus; -end - - -%--------------------------------------------------------------- - - -function dispLine -disp('---------------------------------------------------'); - - -%-------------------------------------------------------------------------- - - -function tulostaAdmixtureTiedot(proportions, uskottavuus, alaRaja, niter) -h0 = findobj('Tag','filename1_text'); -inputf = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outf = get(h0,'String'); clear h0; - -if length(outf)>0 - fid = fopen(outf,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -ninds = length(uskottavuus); -npops = size(proportions,2); -disp(' '); -dispLine; -disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); -disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); -disp(['Data file: ' inputf]); -disp(['Number of individuals: ' num2str(ninds)]); -disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); -disp(' '); -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); - fprintf(fid, '\n'); -end - -ekaRivi = blanks(6); -for pop = 1:npops - ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; -end -ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 -disp(ekaRivi); -for ind = 1:ninds - rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; - if any(proportions(ind,:)>0) - for pop = 1:npops-1 - rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - end - rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; - rivi = [rivi ownNum2Str(uskottavuus(ind))]; - end - disp(rivi); - if fid ~= -1 - fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); - end -end -if fid ~= -1 - fclose(fid); -else - diary off -end - -%------------------------------------------------------ - -function str = proportion2str(prob) -%prob belongs to [0.00, 0.01, ... ,1]. -%str is a 4-mark presentation of proportion. - -if abs(prob)<1e-3 - str = '0.00'; -elseif abs(prob-1) < 1e-3; - str = '1.00'; -else - prob = round(100*prob); - if prob<10 - str = ['0.0' num2str(prob)]; - else - str = ['0.' num2str(prob)]; - end; -end; - -%------------------------------------------------------- - -function g=randga(a,b) -flag = 0; -if a>1 -c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a); -U1=-1; -while flag == 0, -if a<=2.5, -U1=rand;U2=rand; -else -while ~(U1>0 & U1<1), -U1=rand;U2=rand; -U1 = U2 + c5*(1-1.86*U1); -end %while -end %if -W = c2*U2/U1; -if c3*U1+W+(1/W)<=c4, -flag = 1; -g = c1*W/b; -elseif c3*log(U1)-log(W)+W<1, -flag = 1; -g = c1*W/b; -else -U1=-1; -end %if -end %while flag -elseif a==1 -g=sum(-(1/b)*log(rand(a,1))); -else -while flag == 0, -U = rand(2,1); -if U(1)>exp(1)/(a+exp(1)), -g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1))); -if U(2)<=g^(a-1), -flag = 1; -end %if -else -g = ((a+exp(1))*U(1)/((exp(1))^(1/a))); -if U(2)<=exp(-g), -flag = 1; -end %if -end %if -end %while flag -g=g/b; -end %if; - - -%------------------------------------------------- - -function svar=randdir(counts,nc) -% Käyttäesim randdir([10;30;60],3) - -svar=zeros(nc,1); -for i=1:nc - svar(i,1)=randga(counts(i,1),1); -end -svar=svar/sum(svar); - -%-------------------------------------------------- - -function waitALittle -A = rand(500); -gammaln(A); - - -%-------------------------------------------------- - -function [cliq_data, sep_data, cliq_counts, component_mat] = ... - createCliqData(data, gene_lengths, noalle, linkage_model, ... - rowsFromInd) -% cliq_data: cell array, each cell corresponds to one gene. Element (i,j) -% in cell k is the code of the allele combination in the j:th clique in -% gene k, for individual i. - -% sep_data: like cliq_data. i:th separator separates cliques i and i+1. - -% cliq_counts: cell array, each cell corresponds to one gene. Each cell is -% a 3-dimensional array, where the element (i,j,k) is the observed count of -% allele combination i, in clique j, in population k. - -% The coding of the allele combinations: If a clique of 3 sites has -% noalle values 3,2,4, then the allele combinations are given numbers in -% lexicographic order: 111, 112, 113, 114, 121, 122, ..., 324. - -%------------------------------------------------------------------------ - -% cliq_data on cell-array, jossa kukin solu vastaa yht?geeni? Alkio (i,j) -% solussa k merkitsee sen alleelikombinaation koodia, joka yksiläll?i -% havaitaan geenin k klikiss?numero j. - -% cliq_counts on cell-array, jossa myäs kukin solu vastaa yht?geeni? -% Kukin solu on kolmiulotteinen taulukko, jonka alkio (i,j,k) on -% populaatiossa k, ko geenin j:nness?klikiss?havaitun alleelikombinaation -% i lukumäär? - -% Alleelikombinaatioiden koodaus: Jos kolmen position klikiss?on (koko -% datassa) noalle:t 3,2,4, (eli ekassa positiossa alleelit 1-3, tokassa 1-2 -% ja kolmannessa alleelit 1-4), niin alleelikombinaatiot numeroidaan -% leksikograafisessa järjestyksess? 111, 112, 113, 114, 121, 122, ..., -% 324. - -%----------------------------------------------------------------------- - -global PARTITION; - -if sum(gene_lengths) ~= size(data,2) - disp('Error 155'); -end -if ~isa(data,'double') - data = double(data); % Required in matlab 6 -end - -ninds = size(data,1); -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; % Number of cliques in each gene -else - n_cliques = gene_lengths-1; % Use linear model -end -max_noalle = zeros(n_genes,1); % Maximum "clique noalle" in each gene. - -component_mat = zeros(n_genes, max(gene_lengths)); -cum_length = cumsum(gene_lengths); -component_mat(1,1:gene_lengths(1))=1:gene_lengths(1); -for i = 2:n_genes - component_mat(i,1:gene_lengths(i)) = cum_length(i-1)+1:cum_length(i); -end - -for i = 1:n_genes - % What is the largest number of different values that are observed for - % some clique in this gene: - number = 0; - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - positions = component_mat(i, [1 2]); - number = prod(noalle(positions)); - else - % gene_lengths(i) == 1 - positions = component_mat(i,1); - number = noalle(positions); - end - else - for j = 1:n_cliques(i) - if strcmp(linkage_model,'codon'), positions = component_mat(i , j:j+2); - else positions = component_mat(i, j:j+1); - end - - cand = prod(noalle(positions)); - if cand>number - number=cand; - end - end - end - max_noalle(i) = number; -end - -cliq_data = cell(n_genes,1); % An array for each gene. -% (i,j) is the combination which individual i has in clique j (in haploid case..). - -for i = 1:n_genes - if n_cliques(i)<1 - cliq_data{i} = zeros(ninds, 1); - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - positions = component_mat(i, [1 2]); - rows = data(:,positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - else - positions = component_mat(i,1); - rows = data(:,positions); - observations = rows; - end - cliq_data{i}(:,1) = observations; - else - cliq_data{i} = zeros(ninds, n_cliques(i)); - for j = 1:n_cliques(i) - if strcmp(linkage_model,'codon') - positions = component_mat(i,j:j+2); - rows = data(:, positions); - observations = (rows(:,1)-1) * prod(noalle(positions(2:3))) + ... - (rows(:,2)-1) * noalle(positions(3)) + rows(:,3); - else - positions = component_mat(i,j:j+1); - rows = data(:,positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - end - cliq_data{i}(:,j) = observations; - end - end -end - -cliq_counts = cell(n_genes,1); -% (i,j,k) is the count of combination i, in clique j, in population k. - -npops = length(unique(PARTITION)); -for i = 1:n_genes - cliq_counts{i} = zeros(max_noalle(i), max(1,n_cliques(i)), npops); - for j = 1:npops - partition = repmat(PARTITION', [rowsFromInd 1]); - partition = partition(:); % Partition for rows in the data (instead of individuals). - inds_now = find(partition==j); - ninds_now = length(inds_now); - data_now = cliq_data{i}(inds_now,:); - - for k = 1:max(n_cliques(i),1) - apu = zeros(ninds_now, max_noalle(i)); - apu(sub2ind([ninds_now max_noalle(i)],... - (1:ninds_now)', data_now(:,k)))=1; - cliq_counts{i}(:, k, j) = (sum(apu,1))'; - end - end -end - -sep_data = cell(n_genes,1); -n_separators = n_cliques-1; -for i = 1:n_genes - sep_data{i} = zeros(ninds, n_separators(i)); - for j = 1:n_separators(i) - if strcmp(linkage_model, 'codon') - positions = component_mat(i,j+1:j+2); - rows = data(:, positions); - observations = (rows(:,1)-1) * noalle(positions(2)) + rows(:,2); - else - positions = component_mat(i,j+1); - rows = data(:,positions); - observations = rows; - end - sep_data{i}(:,j) = observations; - end -end - - -%------------------------------------------------------ - - -function [cliq_freqs, sep_freqs] = simulateCliqFreqs(cliq_counts, noalle, component_mat, ... - gene_lengths, linkage_model) - -% cliq_freqs: cell-array. Element (i,j,k) in cell m is the frequence of -% combination i, in clique j of the m:th gene, in population k. - -% sep_freqs: like cliq_freqs, but for the separators. - -%------------------------------------------------------------------------ - -% cliq_freqs: cell-array, jossa on vastaavat dimensiot kuin cliq_counts:issa. -% solun m alkio (i,j,k) on geenin m, klikin j, kombinaation i, frekvenssi -% populaatiossa k. - -% sep_freqs: cell-array, kuten cl_freqs, mutta separaattoreille. - -%------------------------------------------------------------------------- - -global PARTITION; - -n_genes = length(cliq_counts); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; % Number of cliques in each gene -else - n_cliques = gene_lengths-1; % Use linear model -end -npops = length(unique(PARTITION)); - -cliq_freqs = cell(n_genes,1); -sep_freqs = cell(n_genes,1); - -for i=1:n_genes - - cliq_freqs{i} = zeros(size(cliq_counts{i})); - - positions = component_mat(i,1:gene_lengths(i)); - - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - cliq_noalle = noalle(positions(1)) .* noalle(positions(2)); - sep_noalle = []; - else - % gene_lengths(i) == 1 - cliq_noalle = noalle(positions(1)); - sep_noalle = []; - end - sep_freqs{i} = []; - else - if strcmp(linkage_model, 'codon') - cliq_noalle = noalle(positions(1:end-2)) .* noalle(positions(2:end-1)) .* ... - noalle(positions(3:end)); - sep_noalle = noalle(positions(2:end-2)) .* noalle(positions(3:end-1)); - else - cliq_noalle = noalle(positions(1:end-1)) .* noalle(positions(2:end)); - sep_noalle = noalle(positions(2:end-1)); - end - sep_freqs{i} = zeros(max(sep_noalle), n_cliques(i)-1, npops); - end - - % First clique: - prior = (1 / cliq_noalle(1)) * ones(cliq_noalle(1),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(1),1,:); - for k = 1:npops - simul = randdir(counts_now(:,1,k), cliq_noalle(1)); - cliq_freqs{i}(1:cliq_noalle(1),1,k) = simul; - end - - for j=2:n_cliques(i) - % Obtain freqs for j-1:th separator by marginalization from j-1:th - % clique, and draw values for the frequencies of the j:th clique: - - aux = cliq_freqs{i}(1:cliq_noalle(j-1), j-1, :); % Freqs of the previous clique - aux = reshape(aux, [sep_noalle(j-1), noalle(positions(j-1)), npops]); - - % Freqs for separator by marginalization from the previous clique: - sep_freqs{i}(1:sep_noalle(j-1),j-1,:) = sum(aux,2); - - prior = (1 / cliq_noalle(j)) * ones(cliq_noalle(j),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(j),j,:); - for k = 1:npops - % Simulate conditional frequencies: - for m = 1:sep_noalle(j-1) - if strcmp(linkage_model, 'codon') - values = (m-1)*noalle(positions(j+2))+1:m*noalle(positions(j+2)); - else - values = (m-1)*noalle(positions(j+1))+1:m*noalle(positions(j+1)); - end - simul = randdir(counts_now(values,1,k), length(values)); - cliq_freqs{i}(values,j,k) = simul * sep_freqs{i}(m,j-1,k); % MIETI TARKKAAN! - end - end - end -end - - -%-------------------------------------------------------------------- - - -function [ownCliqFreqs, ownSepFreqs] = computePersonalCliqueFreqs(... - ind, cl_data, cl_freqs, sep_data, sep_freqs, rowsFromInd, ... - gene_lengths, linkage_model) - -% ownCliqFreqs is (npops * (n_cliques*rowsFromInd)) table, where each column -% contains the frequencies of the corresponding clique_combination, in -% different populations. - -% ownSepFreqs is (npops * (n_seps*rowsFromInd)) table, like ownCliqFreqs. - -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; - n_cliques = max([n_cliques ones(n_genes,1)], [], 2); % for genes shorter than clique -else - n_cliques = gene_lengths-1; % Use linear model. - n_cliques = max([n_cliques ones(n_genes,1)], [], 2); -end - -total_n_cliques = sum(n_cliques); -npops = size(cl_freqs{1},3); - -ownCliqFreqsXX = zeros(1, total_n_cliques*rowsFromInd, npops); - -pointer = 1; -for i = 1:n_genes - ind_data = cl_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd , :); - for j = 1:n_cliques(i) % MUUTA! - for k = 1:rowsFromInd - code = ind_data(k,j); - ownCliqFreqsXX(1,pointer,:) = cl_freqs{i}(code,j,:); - pointer = pointer+1; - end - end -end - -ownCliqFreqs = (squeeze(ownCliqFreqsXX))'; - -n_separators = n_cliques-1; -total_n_separators = sum(n_separators); - -ownSepFreqsXX = zeros(1, total_n_separators*rowsFromInd, npops); - -pointer = 1; -for i = 1:n_genes - ind_data = sep_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd , :); - for j = 1:n_separators(i) - for k = 1:rowsFromInd - code = ind_data(k,j); - ownSepFreqsXX(1,pointer,:) = sep_freqs{i}(code,j,:); - pointer = pointer+1; - end - end -end - -if (total_n_separators*rowsFromInd)==1 - ownSepFreqs = (squeeze(ownSepFreqsXX)); -else - ownSepFreqs = (squeeze(ownSepFreqsXX))'; -end - - -%------------------------------------------------------------------------- - - -function exp_cliq_freqs = computeExpectedFreqs(cliq_counts, ... - noalle, component_mat, gene_lengths, linkage_model) - -% Returns the expected values for the clique and separator frequencies in -% different populations. - -% exp_cliq_freqs: cell-array. Element (i,j,k) in cell m is the expected -% frequence of combination i, in clique j of the m:th gene, in -% population k. - -n_genes = length(gene_lengths); - -if strcmp(linkage_model, 'codon') - n_cliques = gene_lengths-2; -else - n_cliques = gene_lengths-1; % Linear model -end - -npops = size(cliq_counts{1},3); -exp_cliq_freqs = cell(n_genes,1); - -for i = 1:n_genes - - exp_cliq_freqs{i} = zeros(size(cliq_counts{i})); - positions = component_mat(i,1:gene_lengths(i)); - - if n_cliques(i)<1 - % the gene is shorter than a normal clique. - if gene_lengths(i)==2 - % linkage_model must be 'codon' to end up here.. - cliq_noalle = noalle(positions(1)) .* noalle(positions(2)); - else - % gene_lengths(i) == 1 - cliq_noalle = noalle(positions(1)); - end - else - if strcmp(linkage_model, 'codon') - cliq_noalle = noalle(positions(1:end-2)) .* noalle(positions(2:end-1)) .* ... - noalle(positions(3:end)); - else - cliq_noalle = noalle(positions(1:end-1)) .* noalle(positions(2:end)); - end - end - - for j = 1:max(1, n_cliques(i)) - prior = (1 / cliq_noalle(j)) * ones(cliq_noalle(j),1); - counts_now = repmat(prior, [1 1 npops]) + cliq_counts{i}(1:cliq_noalle(j),j,:); - exp_cliq_freqs{i}(1:cliq_noalle(j),j,:) = ... - counts_now ./ repmat(sum(counts_now,1), [cliq_noalle(j) 1 1]); - end -end - - -%---------------------------------------------------------- - - -function [ref_cliq_data, ref_sep_data] = ... - simulateLinkageIndividuals(n, rowsFromInd, exp_cliq_freqs, gene_lengths, ... - noalle, component_mat, linkage_model) - -% Simulates n individuals from each population using expected frequencies -% for cliques and separators. - -% ref_cliq_data: cell array, each cell corresponds to one gene. Elements -% ((i-1)*rowsFromInd+1:i*rowsFromInd, j) in cell k are the codes of the allele -% combinations in the j:th clique in gene k, for individual i. - -% ref_sep_data: like ref_cliq_data. i:th separator separates cliques i and i+1. - -n_genes = length(gene_lengths); -if strcmp(linkage_model,'codon') - n_cliques = gene_lengths-2; -else - n_cliques = gene_lengths-1; % Linear model -end -npops = size(exp_cliq_freqs{1},3); -ninds = n*npops; - -ref_cliq_data = cell(n_genes,1); -ref_sep_data = cell(n_genes,1); - -for i = 1:n_genes - ref_cliq_data{i} = zeros(ninds*rowsFromInd, max(n_cliques(i),1)); % Added: rowsFromInd - - positions = component_mat(i,1:gene_lengths(i)); - - if strcmp(linkage_model,'codon') - sep_noalle = noalle(positions(2:end-2)) .* noalle(positions(3:end-1)); - else - sep_noalle = noalle(positions(2:end-1)); - end - ref_sep_data{i} = zeros(ninds*rowsFromInd, n_cliques(i)-1); % Added: rowsFromInd - - for ind = 1:ninds - pop = ceil(ind/n); - - % First clique: - freqs = exp_cliq_freqs{i}(:,1,pop); - freqs = repmat(freqs, [1 rowsFromInd]); - codes = simulateCodes(freqs); - ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, 1) = codes; - - for j = 2:n_cliques(i) - previous_cliq = ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j-1); - - % Value for j-1:th separator: - sep_codes = rem(previous_cliq, sep_noalle(j-1)); - sep_codes(find(sep_codes==0)) = sep_noalle(j-1); - ref_sep_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j-1) = sep_codes; - - % Value for j:th clique: - if strcmp(linkage_model,'codon') - freqs = zeros(noalle(positions(j+2)),rowsFromInd); - values = zeros(noalle(positions(j+2)),rowsFromInd); - for k = 1:rowsFromInd - values(:,k) = ((sep_codes(k)-1)*noalle(positions(j+2))+1 : sep_codes(k)*noalle(positions(j+2)))'; - freqs(:,k) = exp_cliq_freqs{i}(values(:,k), j, pop); - end - freqs = freqs ./ repmat(sum(freqs,1), [noalle(positions(j+2)) 1]); - else - freqs = zeros(noalle(positions(j+1)),rowsFromInd); - values = zeros(noalle(positions(j+1)),rowsFromInd); - for k = 1:rowsFromInd - values(:,k) = ((sep_codes(k)-1)*noalle(positions(j+1))+1 : sep_codes(k)*noalle(positions(j+1)))'; - freqs(:,k) = exp_cliq_freqs{i}(values(:,k), j, pop); - end - freqs = freqs ./ repmat(sum(freqs,1), [noalle(positions(j+1)) 1]); - end - codes = simulateCodes(freqs); - codes = values(sub2ind(size(values),codes,(1:rowsFromInd)')); - ref_cliq_data{i}((ind-1)*rowsFromInd+1:ind*rowsFromInd, j) = codes; - end - end -end - -function codes = simulateCodes(freqs) -% Freqs is a table where each column is a distribution. The -% number of columns in freqs must be equal to rowsFromInd. -% A value is drawn from each distribution in different columns. The values -% are saved in codes, which is (rowsFromInd*1) table. - -[nrows, rowsFromInd] = size(freqs); -codes = nrows+1-sum(cumsum(freqs)>repmat(rand(1,rowsFromInd),[nrows 1]),1); -codes = codes'; \ No newline at end of file diff --git a/matlab/parallel/linkage_parallel.m b/matlab/parallel/linkage_parallel.m deleted file mode 100644 index f8d196d..0000000 --- a/matlab/parallel/linkage_parallel.m +++ /dev/null @@ -1,1633 +0,0 @@ -function linkage_parallel(options) -% LINKAGE_PARALLEL is the command line version of the baps partition with -% linkage models. -% Input: options is a struct generated by parallel.m - -%-------------------------------------------------------------------------- -%- Syntax check out -%-------------------------------------------------------------------------- -outp = [options.outputMat '.txt']; -inp = options.dataFile; - -if strcmp(options.fixedK, 'yes') - fixedK = 1; -else - fixedK = 0; -end - -switch options.mixtureType - case 'linear_mix' - linkage_model = 'linear'; - case 'codon_mix' - linkage_model = 'codon'; - case '' - error('*** ERROR: ''modeltype'' not specified.'); - otherwise - error('*** ERROR: unknown modeltype.'); -end - -%-------------------------------------------------------------------------- -%- Get data file location -%-------------------------------------------------------------------------- -switch options.dataType - case {'numeric', 'sequence'} - if isempty(options.linkageMap) - error('*** ERROR: ''linkagemap'' not specified.'); - end - if strcmp(options.dataType, 'numeric') - try - data = load(options.dataFile); - catch - disp('*** ERROR: Incorrect BAPS numerical data.'); - return - end - else - %[data, filename] = readbaps(inp); - [data, filename] = silentReadBaps(inp); % modified by Lu Cheng, 29.06.2010 - if isempty(data) - return - end - end - - %------------------------------------------------------------------ - %- Get name and index file location - %------------------------------------------------------------------ - if ~isempty(options.nameFile) & ~isempty(options.indexFile) - popnames = initPopNames(options.nameFile{1}, options.indexFile{1}); - else - popnames(:,1) = num2cell(unique(data(:,end))); - popnames(:,2) = popnames(:,1); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - end - - if isempty(data) - error('*** ERROR: Failed in loading the data'); - end - data = uint16(data); - % Check that the data is rational: - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - error('*** ERROR: failed in loading the data.'); - else - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - % display('Finished.'); - end; - - % Check if the data is discrete or continuous - if any(any(fix(data)~=data)) - disp('Found decimal numbers. Continuous model will be used.'); - error('** ERROR: continuous model is under construction.'); - end - - display('---------------------------------------------------'); - % Load the linkage map - display(['Reading linkage map from: ',options.linkageMap,'...']); - try - component_mat = load(options.linkageMap); - catch - disp('*** ERROR: Incorrect linkage map.'); - return; - end - % Check if the linkage map matches the data - if (size(data,2)-1) ~= max(component_mat(:)) - disp('*** ERROR: Incosistent linkage map.'); - return; - else - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - end; - - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...\n'); - % Make the missing data complete - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - % Distance between individuals is computed as if the loci are - % independent. - fprintf(1, 'Caculating the distances ...'); - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; - c.Z = Z; - - - c.component_mat = component_mat; - - % Data transformation - % display('---------------------------------------------------'); - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - clear c; - case 'matlab' - struct_array = load(options.dataFile); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - display('*** ERROR: Incorrect file format'); - return - end - clear struct_array; - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - clear struct_array; - else - display('*** ERROR: Incorrect file format'); - return; - end - % The following are the same as in the basic clustering - data = c.data; popnames = c.popnames; Z = c.Z; - noalle = c.noalle; adjprior = c.adjprior; - rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - dist = c.dist; priorTerm = c.priorTerm; - - if ~isfield(c,'component_mat') - display('*** ERROR: Incorrect linkage data'); - return - end - - % This is new - component_mat = c.component_mat; - data = uint16(data); - - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - display(['# of linkage groups: ', num2str(size(component_mat,1))]); - - if strcmpi(options.mixtureType, 'codon_mix') - linkage_model = 'codon'; - else - linkage_model = 'linear'; - end - if ~isfield(c, 'mixtureType') - clear c; % save the memory usage - % Data transformation - fprintf(1,'Transforming the data ...'); - index = data(:,end); - % [data_clique, data_separator, noalle_clique, noalle_separator] = ... - % transform2(data, component_mat, linkage_model); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - else - if ~strcmpi(c.mixtureType, options.mixtureType) - error('*** ERROR: incorrect mixture type'); - end - %Linkage model is specified in the preprocessed file. - counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; - counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; - linkage_model = c.linkage_model; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - clear c; - display(['linkage model: ', linkage_model]); - end - - - case 'excel' - display('---------------------------------------------------'); - display(['Reading sequence profile from: ', options.dataFile,'...']); - [data, component_mat, popnames] = processxls(options.dataFile); - if isempty(data) - display('*** ERROR: Failed in loading the data'); - return; - end - - display('---------------------------------------------------'); - fprintf(1,'Preprocessing the data ...\n'); - - % Make the missing data complete - % missing values are denoted as -999 - data = uint16(data); - % data = uint8(data); - data = makecomplete(data); - if isempty(data) - display('*** ERROR: Failed in completing the missing data'); - return; - end - - isRational = isTheLoadedNewDataRational(data); - if isRational == 0 - return; - else - display(['# of haplotypes: ', num2str(size(data,1))]); - display(['# of loci: ', num2str(size(data,2)-1)]); - end - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(data); - - - % Distance between individuals is computed as if the loci are - % independent. - [Z,dist] = newGetDistances(data,rowsFromInd); - fprintf(1,'Finished.\n'); - ninds = max(data(:,end)); - popnames = fixPopnames(popnames, ninds); - - c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.popnames = popnames; c.component_mat = component_mat; - c.dist = dist; c.Z = Z; - - % Data transformation - display('---------------------------------------------------'); - fprintf(1,'Transforming the data ...'); - index = data(:,end); - [data_clique, data_separator, noalle_clique, noalle_separator] = ... - transform4(data, component_mat, linkage_model); - data_clique = [data_clique index]; - data_separator = [data_separator index]; - - [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... - = allfreqsnew2(data_clique, double(noalle_clique)); - clear data_clique; - [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... - = allfreqsnew2(data_separator, double(noalle_separator)); - clear data_separator; - counts_cq = uint8(counts_cq); - counts_sp = uint8(counts_sp); - fprintf(1,'Finished.\n'); - clear c; - otherwise - error('*** ERROR: data type is not specified or unknown.'); -end - - -global POP_LOGML; global PARTITION; -global CQ_COUNTS; global SP_COUNTS; %These counts are for populations -global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals -clearGlobalVars; -npopstext = []; -npopstextExtra = options.initialK; -if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; -else - % ----------------------------------------------------- - % Set the limit of the input value. - % Modified by Jing Tang, 30.12.2005 - if max(npopstextExtra) > size(data,1) - error('Values larger than the sample size are not accepted. '); - else - npopstext = [npopstext ' ' num2str(npopstextExtra)]; - end -end - -clear ready; clear teksti; -if isempty(npopstext) || length(npopstext)==1 - return -else - npopsTable = str2num(npopstext); - % ykkoset = find(npopsTable==1); - npopsTable(logical(npopsTable==1)) = []; - if isempty(npopsTable) - return - end - % clear ykkoset; -end - -c.noalle = noalle; -c.adjprior = adjprior; %priorTerm = c.priorTerm; -c.rowsFromInd = rowsFromInd; -c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; -c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; -c.dist = dist; c.Z = Z; - -if fixedK - % Only the first value of npopsTaulu is used - npops = npopsTable(1); - nruns = length(npopsTable); - - % [logml, npops, partitionSummary]=linkageMix_fixK(c,npops,nruns,1); - display('*** ERROR: fixed K for linkage module is not available.'); - return -else - [logml, npops, partitionSummary]=linkageMix(c,npopsTable); -end - -if logml==1 - return -end - -data = noIndex(data,noalle); - -%This is basically the same as in BAPS 3. -changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outp, inp, partitionSummary, popnames, linkage_model, ... - fixedK); - -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',outp) - delete('baps4_output.baps') -end - -[sumcounts, counts] = indLociCounts(PARTITION, data, npops, noalle); -% NB! Index column is removed in data matrix. -c.PARTITION = PARTITION; c.CQ_COUNTS = CQ_COUNTS; c.CQ_SUMCOUNTS = CQ_SUMCOUNTS; -c.SP_COUNTS = SP_COUNTS; c.SP_SUMCOUNTS = SP_SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior_cq = adjprior_cq; c.adjprior_sp = adjprior_sp; c.popnames = popnames; -c.rowsFromInd = rowsFromInd; c.data = uint16(data); c.npops = npops; -% c.nalleles_cq = nalleles_cq; c.nalleles_sp = nalleles_sp; -if strcmp(linkage_model,'linear') % Added on 03.11.06 - c.mixtureType = 'linear_mix'; -elseif strcmp(linkage_model,'codon') - c.mixtureType = 'codon_mix'; -end -c.changesInLogml = changesInLogml; % this variable stores the change of likelihoods. -% [ncluster ninds] -% -Added on 02.11.2006 - -% The next ones are for the admixture input -c.COUNTS = counts; c.SUMCOUNTS = sumcounts; -c.adjprior = adjprior; c.rowsFromInd = rowsFromInd; c.noalle = noalle; c.alleleCodes = alleleCodes; - -% The two variables below are for the new linkage admixture model -c.linkage_model = linkage_model; -c.gene_lengths = calcGeneLengths(component_mat); - -% The logml is saved for parallel computing -c.logml = logml; - -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - - - -%-------------------------------------------------------------------------- -% The next three functions are for computing the initial partition -% according to the distance between the individuals - -function initial_partition=admixture_initialization(nclusters,Z) -T=cluster_own(Z,nclusters); -initial_partition=T; - -%-------------------------------------------------------------------------- -function T = cluster_own(Z,nclust) -% true=logical(1); -% false=logical(0); - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------------- - -function Z = computeLinkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 || m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -% monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... - indCqCounts, indSpCounts) -% Computes changes in log-marginal likelihood if individual ind is -% moved to another population -% -% Input: -% ind - the individual to be moved -% adjprior_cq & _sp - adjpriors for cliques and separators -% indCqCounts, indSpCounts - counts for individual ind -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% ind is move to population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[npops-1 1]); - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - -%------------------------------------------------------------------------------------ - -function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if population i1 is combined -% with another population -% -% Input: -% i1 - the population to be combined -% adjprior_cq & _sp - adjpriors for cliques and separators -% -% Output: -% changes - table of size 1*npops. changes(i) = difference in logml if -% i1 is combined with population i. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); - -i1_logml = POP_LOGML(i1); -indCqCounts = CQ_COUNTS(:,:,i1); -indSpCounts = SP_COUNTS(:,:,i1); -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -new_i1_logml = 0; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); -% a = repmat(sumSp,[npops-1 1]); -% if ~any(sumSp) -% a(:,[1:size(a,2)])=[]; -% end -% SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; - - -new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - -changes(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - - - -%------------------------------------------------------------------------------------ - - -function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes changes in log marginal likelihood if subpopulation of i2 is -% moved to another population -% -% Input: -% T2 - partition of inds2 to subpopulations -% inds2 - individuals in population i1 -% i2 -% counts_cq, counts_sp - counts for individuals -% -% Output: -% changes - table of size length(unique(T2))*npops. -% changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of -% i2 is moved to population j - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; -npops = size(CQ_COUNTS,3); -npops2 = length(unique(T2)); -changes = zeros(npops2,npops); - -%cq_counts = CQ_COUNTS; -%sp_counts = SP_COUNTS; -%cq_sumcounts = CQ_SUMCOUNTS; -%sp_sumcounts = SP_SUMCOUNTS; - - -i1_logml = POP_LOGML(i1); - -for pop2 = 1:npops2 - % inds = inds2(find(T2==pop2)); - inds = inds2(logical(T2==pop2)); - ninds = length(inds); - if ninds>0 - indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); - indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - - new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; - CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; - SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); - - new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); - CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); - SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); - - changes(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%-------------------------------------------------------------------------- - -function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Computes change in logml if individual of inds is moved between -% populations i1 and i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global POP_LOGML; global PARTITION; - -ninds = length(inds); -changes = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %from - pop2 = i2; %to - else - pop1 = i2; - pop2 = i1; - end - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; - - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; - - new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - changes(i) = sum(new_logmls); - - CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; - CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; - SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; - SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; - CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; - CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; - SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; - SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; -end - -changes = changes - i1_logml - i2_logml; - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individual ind is moved to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) -% Updates global variables when all individuals from population i1 are moved -% to population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -% inds = find(PARTITION==i1); -% PARTITION(inds) = i2; -PARTITION(logical(PARTITION==i1)) = i2; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); - -CQ_COUNTS(:,:,i1) = 0; -CQ_SUMCOUNTS(i1,:) = 0; -SP_COUNTS(:,:,i1) = 0; -SP_SUMCOUNTS(i1,:) = 0; - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... - adjprior_cq, adjprior_sp) -% Updates global variables when individuals muuttuvat are moved to -% population i2 - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; -global PARTITION; global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -sumCq = uint16(sum(indCqCounts,1)); -sumSp = uint16(sum(indSpCounts,1)); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; -CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; -SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; -CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; -SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; - - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... - adjprior_cq, adjprior_sp) -% Returns individuals inds in order according to the change in the logml if -% they are moved out of the population pop - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - sumCq = uint16(sum(indCqCounts,1)); - sumSp = uint16(sum(indSpCounts,1)); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; - - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); - - CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; - CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; - SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; - SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - - -%------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(double(ninds),2); - -% empties = find(data<0); -% data(empties)=0; -data(logical(data<0)) = 0; -data = uint16(data); - -pariTaulu = zeros(riviLkm,2); -aPointer=1; - -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; - pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); - aPointer = aPointer+double(ninds-a); -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -eka = uint16(eka); -toka = uint16(toka); - -clear pariTaulu; clear miinus; - -summa = uint16(zeros(riviLkm,1)); -vertailuja = uint16(zeros(riviLkm,1)); - -x = zeros(size(eka)); x = uint16(x); -y = zeros(size(toka)); y = uint16(y); -% fprintf(1,'%%10'); -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa + uint16(lisays); - end - end - % fprintf(1,'\b\b'); - % fprintf(1,'%d',floor(10+80*j/nloci)); -end - -clear x; clear y; clear vertailutNyt; -dist = zeros(length(vertailuja),1); -% nollat = find(vertailuja==0); -% dist(nollat) = 1; -dist(logical(vertailuja==0)) = 1; -muut = find(vertailuja>0); -dist(muut) = double(summa(muut))./double(vertailuja(muut)); -clear summa; clear vertailuja; - -Z = computeLinkage(dist'); -% fprintf(1,'\b\b'); -% fprintf(1,'%d\n',100); -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global CQ_COUNTS; CQ_COUNTS = []; -global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SP_SUMCOUNTS; SP_SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - -%-------------------------------------------------------------------------- - -function npops = removeEmptyPops -% Removes empty pops from all global COUNTS variables. -% Updates PARTITION and npops - -global CQ_COUNTS; -global CQ_SUMCOUNTS; -global SP_COUNTS; -global SP_SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(CQ_SUMCOUNTS,2)); -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); -SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) -% apu = find(PARTITION==notEmpty(n)); -% PARTITION(apu)=n; -PARTITION(logical(PARTITION==notEmpty(n))) = n; -end -npops = length(notEmpty); - -%-------------------------------------------------------------------------- - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; -apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); -if apu - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function [counts, sumcounts] = initialCounts(ind_counts) - -global PARTITION; - -pops = unique(PARTITION); -npops = max(pops); - -counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); -sumcounts = zeros(npops, size(ind_counts,2),'uint16'); - -for i = 1:npops - inds = find(PARTITION == i); - counts(:,:,i) = sum(ind_counts(:,:,inds), 3); - sumcounts(i,:) = sum(counts(:,:,i),1); -end - -%-------------------------------------------------------------------------- - -function logml = computeLogml(adjprior_cq, adjprior_sp) - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -npops = size(CQ_COUNTS, 3); - -cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_cq))) - ... - sum(sum(gammaln(1+cq_sumcounts))); - -sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior_sp))) - ... - sum(sum(gammaln(1+sp_sumcounts))); - -logml = cq_logml - sp_logml; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%-------------------------------------------------------------------------- - -function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global CQ_COUNTS; global CQ_SUMCOUNTS; -global SP_COUNTS; global SP_SUMCOUNTS; - -cq_counts = double(CQ_COUNTS); -cq_sumcounts = double(CQ_SUMCOUNTS); -sp_counts = double(SP_COUNTS); -sp_sumcounts = double(SP_SUMCOUNTS); - -nall_cq = size(CQ_COUNTS,1); -nall_sp = size(SP_COUNTS, 1); -ncliq = size(CQ_COUNTS,2); -nsep = size(SP_COUNTS, 2); - -z = length(pops); - -popLogml_cq = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... - ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_cq))); - -popLogml_sp = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... - ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... - sum(sum(gammaln(adjprior_sp))); - -popLogml = popLogml_cq - popLogml_sp; -clear cq_counts cq_sumcounts sp_counts sp_sumcounts; - -%------------------------------------------------------------------- - - -function changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... - adjprior_sp, outPutFile, inputFile, partitionSummary, popnames, linkage_model, ... - fixedK) - -global PARTITION; -global CQ_COUNTS; -global LOGDIFF; - -%global CQ_SUMCOUNTS; -%global SP_COUNTS; global SP_SUMCOUNTS; -ninds = length(PARTITION); -npops = size(CQ_COUNTS,3); -names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin - -if length(outPutFile)>0 - fid = fopen(outPutFile,'w'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file/ Linkage map: ' inputFile]); -disp(['Model: ' linkage_model]); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - fprintf(fid,'\n'); - end - end; -end - -if npops == 1 - changesInLogml = []; -else - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to group j:'); fprintf(fid, '\n'); - end - - if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; - else - ekarivi = ' ind '; - end - - for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; - end - disp(ekarivi); - if (fid ~= -1) - fprintf(fid, '%s \n', ekarivi); fprintf(fid, '\n'); - end - - %ninds = size(data,1)/rowsFromInd; - changesInLogml = LOGDIFF'; - for ind = 1:ninds - indCqCounts = uint16(counts_cq(:,:,ind)); - indSpCounts = uint16(counts_sp(:,:,ind)); - %changesInLogml(:,ind) = computeChanges(ind, adjprior_cq, ... - % adjprior_sp, indCqCounts, indSpCounts); - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(changesInLogml(j,ind)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', rivi); fprintf(fid, '\n'); - end - end - - - % % KL-divergence has to be calculated otherwise... - % % { - % disp(' '); disp(' '); - % disp('KL-divergence matrix:'); - % - % if (fid ~= -1) - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - % fprintf(fid, '%s \n', ['KL-divergence matrix:']); fprintf(fid, '\n'); - % end - % - % maxnoalle = size(COUNTS,1); - % nloci = size(COUNTS,2); - % d = zeros(maxnoalle, nloci, npops); - % prior = adjprior; - % prior(find(prior==1))=0; - % nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. - % prior(1,nollia)=1; - % for pop1 = 1:npops - % d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - % dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); - % end - % ekarivi = blanks(7); - % for pop = 1:npops - % ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; - % end - % disp(ekarivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); - % end - % - % for pop1 = 1:npops - % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - % for pop2 = 1:pop1-1 - % dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - % div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - % div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - % div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - % end - % disp(rivi); - % if (fid ~= -1) - % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - % end - % end - % % } -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -% partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Probabilities for number of clusters'); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - % npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - npopstn = sum(exp(partitionSummary(logical(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); fprintf(fid, '\n'); - end - end - end -end - - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - -%------------------------------------------------------------------------- - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -% dataApu = []; -% nollat = []; -% isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - %alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function popnames = fixPopnames(popnames, ninds) - -if length(popnames) == ninds - for i=1:ninds - if isnumeric(popnames{i}) - popnames{i} = num2str(popnames{i}); - % popnames(i) = num2str(popnames{i}); - end - popnames{i} = cellstr(popnames{i}); - % popnames(i) = cellstr(popnames{i}); - end -end - -%-------------------------------------------------------------------------- -function isRational = isTheLoadedNewDataRational(data) -% The last column of the data must include numbers 1-npops -% If so, isRational = 1, otherwise isRational = 0. -% The row numbers must be larger than 1. -if size(data,1) == 1 - isRational = 0; - display('*** ERROR: Sample size must be larger than one'); - return; -end -last_column = data(:,end); -last_column = sort(last_column); -current = 1; -if last_column(1) ~= current - isRational = 0; - display('*** ERROR: Wrong Indexes in the data'); - return; -end; -lengthcol = length(last_column); -for n = 2:lengthcol - if ~(last_column(n) == current || last_column(n) == current + 1) - %Some population is missing from the last column - isRational = 0; - display('*** ERROR: Missing indexes in the data'); - return; - end; - current = last_column(n); -end; -isRational = 1; - - -% %------------------------------------------------------------------------- -% function isRational = isTheLoadedNewLinkageRational(linkage_data) -% % Each positive element must be unique. -% % If so, isRational = 1, otherwise isRational = 0; -% nonzero = find(linkage_data~=0); -% dif = diff(linkage_data(nonzero)); -% if ~all(dif) -% isRational = 0; return; -% end; -% isRational = 1; - -%-------------------------------------------------------------------------- - -function [sumcounts, counts] = ... - indLociCounts(partition, data, npops, noalle) - -nloci=size(data,2)-1; -% ninds = size(data,1); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - % havainnotLokuksessa = find(partition==i & data(:,j)>=0); - havainnotLokuksessa = find(ismember(data(:,end),find(partition==i))); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) && ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - - diff --git a/matlab/parallel/parallel.m b/matlab/parallel/parallel.m deleted file mode 100644 index a3379db..0000000 --- a/matlab/parallel/parallel.m +++ /dev/null @@ -1,297 +0,0 @@ -function parallel(varargin) -% PARALLEL the main function of doing parallel classification/admixture. -% input: order of input is arbitrary. The first option is the default. -% 'datafile' - the full path of the data. -% 'mixturetype', ['mix';'codon_mix';'linear_mix';'spatial';'ad_mix']; -% - the classification/admixture model. -% 'initialk', a row vector of positive integers; -% - the initial number of clusters. -% 'fixedk',['no';'yes']; -% - whether the number of clusters is fixed during the computation. -% 'outputmat' - the full path of the output .mat file. -% 'datatype', ['numeric';'sequence';'matlab';'excel';'genepop'] -% - the data format; -% 'namefile' - the full path of the population name file. -% 'indexfile' - the full path of the index file. -% 'linkagemap' - the full path of the linkage map, needed only for the -% unpreprocessed data under the linkage model. -% 'coordinatefile' - needed with the spatial model. -% 'groups', ['no';'yes'] - clustering of groups instead of individuals. -% 'groupname' - the full path of the group name file. - -% Examples: -% - Linkage model: -% parallel('datafile','e:\baps4\baps_source\data\bpseudomallei.xls',... -% 'mixturetype','codon_mix','initialk','[10 15]','fixedk','no',... -% 'outputmat','E:\test_link.mat','datatype','excel') -% - Independent model: -% parallel('datafile','e:\baps4\baps_source\data\baps_data.txt',... -% 'mixturetype','mix','initialk','[10:15]','fixedk','no',... -% 'outputmat','e:\test_ind.mat','datatype','numeric'); -% - Spatial model: -% parallel('datafile','e:\baps4\baps_source\data\wolverines_spatial_preprocessed.mat',... -% 'mixturetype','spatial','initialk', '[10 11]', 'fixedk','no',... -% 'outputmat','e:\test_spatial.mat','datatype','matlab'); -% -% -% - Admixture model: -% parallel('datafile','e:\baps5\data\data1_mixture.mat', ... -% 'mixturetype','ad_mix', ... -% 'clusters','[1 3 5]',... -% 'iters','2',... -% 'refinds','3',... -% 'refiters','4',... -% 'outputmat','e:\baps5\data\data1_admixture_parallel.mat'); - -% A group of result files can be later compared by using compare.m -% function. -%------------------------------------------------------------------------------- -%- Set up options and default parameters -%------------------------------------------------------------------------------- -msgInvalidPair = '***ERROR: Bad value for argument: ''%s'''; - -% default options -options = struct('dataFile', '',... - 'dataType', 'numeric',... - 'mixtureType', 'mix',... - 'initialK', 1, ... - 'fixedK', 'no', ... - 'nameFile', '', ... - 'indexFile', '', ... - 'outputMat', '', ... - 'linkageMap','', ... - 'coordinateFile', '', ... - 'groups','no', ... - 'groupname', '', ... - 'clusters', '', ... - 'minSize', '', ... - 'iters', '', ... - 'refInds', '', ... - 'refIters', '' ... - ); - -if nargin == 1 && isstruct(varargin{1}) - paramlist = [ fieldnames(varargin{1}) ... - struct2cell(varargin{1}) ]'; - paramlist = { paramlist{:} }; -else - if mod(nargin,2) - error('Invalid parameter/value pair arguments.'); - end - paramlist = varargin; -end - -optionsnames = lower(fieldnames(options)); -for i=1:2:length(paramlist) - pname = paramlist{i}; - pvalue = paramlist{i+1}; - ind = strmatch(lower(pname),optionsnames); - if isempty(ind) - error(['Invalid parameter: ''' pname '''.']); - elseif length(ind) > 1 - error(['Ambiguous parameter: ''' pname '''.']); - end - switch(optionsnames{ind}) - case 'datafile' - if ischar(pvalue) - options.dataFile = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end -% if ~isempty(findstr(pvalue , '.txt')) -% options.dataType = 'text'; -% elseif ~isempty(findstr(pvalue, '.mat')) -% options.dataType = 'matlab'; -% elseif ~isempty(findstr(pvalue, '.xls')) -% options.dataType = 'excel'; -% else -% error('*** ERROR: unrecognized data format'); -% end - case 'mixturetype' - if ischar(pvalue) - if ~strmatch(pvalue, strvcat('mix','ad_mix','linear_mix','codon_mix','spatical'),'exact') - error('*** ERROR: unrecoganized model type'); - end - if isempty(pvalue), - options.mixtureType = '.'; - else - options.mixtureType = pvalue; - end - else - error(sprintf(msgInvalidPair,pname)); - end - case 'initialk' - pvalue = str2num(pvalue); - if isnumeric(pvalue) - if isempty(pvalue), - options.initialK = 0; - else - options.initialK = pvalue; - end - else - error(sprintf(msgInvalidPair,pname)); - end - case 'fixedk' - if ischar(pvalue) - if isempty(pvalue), - options.fixedK = 'no'; - else - options.fixedK = pvalue; - end - else - error(sprintf(msgInvalidPair,pname)); - end - case 'namefile' - if ischar(pvalue) - options.nameFile = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'indexfile' - if ischar(pvalue) - options.indexFile = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'outputmat' - if ischar(pvalue) - options.outputMat = pvalue; - directoryName = fileparts(pvalue); - if ~exist(directoryName) - fprintf(1,'*** ERROR: Output directory ''%s'' does not exist.\n', directoryName); - return - end - else - error(sprintf(msgInvalidPair,pname)); - end - case 'datatype' - if ischar(pvalue) - options.dataType = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'linkagemap' - if ischar(pvalue) - options.linkageMap = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'coordinatefile' - if ischar(pvalue) - options.coordinateFile = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'groups' - if ischar(pvalue) - options.groups = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - case 'groupname' - if ischar(pvalue) - options.groupname = pvalue; - else - error(sprintf(msgInvalidPair,pname)); - end - - % the options below are for admixture analysis - case 'clusters' - if ischar(pvalue) - options.clusters = str2num(pvalue); - else - error(sprintf(msgInvalidPair,pname)); - end - case 'minsize' - if ischar(pvalue) - options.minSize = str2num(pvalue); - else - error(sprintf(msgInvalidPair,pname)); - end - case 'iters' - if ischar(pvalue) - options.iters = str2num(pvalue); - else - error(sprintf(msgInvalidPair,pname)); - end - case 'refinds' - if ischar(pvalue) - options.refInds = str2num(pvalue); - else - error(sprintf(msgInvalidPair,pname)); - end - case 'refiters' - if ischar(pvalue) - options.refIters = str2num(pvalue); - else - error(sprintf(msgInvalidPair,pname)); - end - otherwise - error(['Invalid parameter: ''' pname '''.']); - end -end - -% The subfunction to check syntax -if ~checkSyntax(options) - return -end - -switch options.mixtureType - case 'mix' - if isequal(options.groups,'yes') - greedyPopMix_parallel(options); - else - independent_parallel(options); - end - case 'linear_mix' - linkage_parallel(options); - case 'codon_mix' - linkage_parallel(options); - case 'spatial' - if isequal(options.groups, 'yes') - spatialPopMixture_parallel(options); - else - spatial_parallel(options); - end - case 'ad_mix' - admix_parallel(options); -end - -% ------------------------------------------------------------------------- -% Subfunctions -% ------------------------------------------------------------------------- -function isOK = checkSyntax(options) -isOK = 1; -if strcmp(options.fixedK, 'yes') && length(options.initialK)>1 - display('*** ERROR: conflicting in options fixedk and initialk.'); - isOK = 0; -end - -if strcmp(options.mixtureType, 'mix') - if strcmp(options.dataType, 'excel') || strcmp(options.dataType,'sequence') - display('*** ERROR: unknown datatype for the independence module.'); - isOK = 0; - end -end - -% check the admixture parameters -admix_str = {options.clusters, options.minSize, options.iters, ... - options.refInds, options.refIters}; -pt = cellfun('isempty', admix_str); - -if all(pt) - if ~strcmp(options.mixtureType, 'ad_mix') - isOK = 1; - else - display('*** ERROR: problematic mixture type.'); - isOK = 0; - end -end - -if any(pt) && strcmp(options.mixtureType, 'ad_mix') - display('*** ERROR: incomplete admixture parameters.'); - isOK = 0; -else - isOK = 1; -end - diff --git a/matlab/parallel/proportion2str.m b/matlab/parallel/proportion2str.m deleted file mode 100644 index 9ff5fbf..0000000 --- a/matlab/parallel/proportion2str.m +++ /dev/null @@ -1,18 +0,0 @@ -function str = proportion2str(prob) -%prob belongs to [0.00, 0.01, ... ,1]. -%str is a 4-mark presentation of proportion. - -if abs(prob)<1e-3 - str = '0.00'; -elseif abs(prob-1) < 1e-3; - str = '1.00'; -else - prob = round(100*prob); - if prob<10 - str = ['0.0' num2str(prob)]; - else - str = ['0.' num2str(prob)]; - end; -end; - -%------------------------------------------------------- \ No newline at end of file diff --git a/matlab/parallel/readScript.m b/matlab/parallel/readScript.m deleted file mode 100644 index ab56333..0000000 --- a/matlab/parallel/readScript.m +++ /dev/null @@ -1,66 +0,0 @@ -function readScript(filename) -% READSCRIPT read the script file and output the parameters -% this function does not perform syntax checking. -% Example: -% readScript('script.txt') - -% read the script -ind = readfile(filename); -if isempty(ind) - return -end -nLines = size(ind,1); - -% extract command information -optionStr = []; -for k = 1:nLines - [cmdName, paraStr] = extract(ind(k,:)); - optionStr = [optionStr cmdName ',' paraStr ',']; -end -optionStr = optionStr(1:end-1); % remove the last coma - -% call function parallel -eval(['parallel(' optionStr ')']) - -% ------------------------------------------------------------------------- -% Subfunctions -% ------------------------------------------------------------------------- -function [cmdName, paraStr] = extract(commandline) -% function to extract the command name and the parameter string - -[cmdName, remainStr] = strtok(commandline,'('); -boundary = regexp(remainStr,''''); - -if isempty(boundary) % if paraStr does not contain quotation marks - % use parenthesis as boundaries - startPt = regexp(remainStr,'(') + 1; - endPt = regexp(remainStr,')') - 1; -else - startPt = boundary(1) + 1; - endPt = boundary(2) - 1; -end -paraStr = remainStr(startPt: endPt); - -cmdName = strcat('''',cmdName,''''); -paraStr = strcat('''',paraStr,''''); - -% ------------------------------------------------------------------------- -function T = readfile(filename); -f = fopen(filename,'r'); -if f == -1 - % error(filename); - display('*** ERROR: invalid script name.'); - T = []; - return -end -i = 1; -while 1 - clear line; - line = fgetl(f); - if ~isstr(line), break, end - n = length(line); - T(i,1:n) = line(1:n); - i = i+1; -end -fclose(f); - diff --git a/matlab/parallel/sc.txt b/matlab/parallel/sc.txt deleted file mode 100644 index 02886a1..0000000 --- a/matlab/parallel/sc.txt +++ /dev/null @@ -1,6 +0,0 @@ -datafile('r:\baps5\data\simple_data.txt'); -mixturetype('mix') -outputmat('r:\baps5\data\simple_mix.mat') -initialk(2 3) -fixedk('no') -datatype('numeric') diff --git a/matlab/parallel/sc2.txt b/matlab/parallel/sc2.txt deleted file mode 100644 index af3cf7e..0000000 --- a/matlab/parallel/sc2.txt +++ /dev/null @@ -1,8 +0,0 @@ -datafile('e:\baps5\data\ssuis_mixture_results_codon.mat') -mixturetype('ad_mix') -outputmat('e:\baps5\data\ssuis_small_script_2.mat') -clusters(3:4) -minsize(5) -iters(2) -refinds(3) -refiters(4) \ No newline at end of file diff --git a/matlab/parallel/sort_partition.m b/matlab/parallel/sort_partition.m deleted file mode 100644 index 03d7019..0000000 --- a/matlab/parallel/sort_partition.m +++ /dev/null @@ -1,17 +0,0 @@ -function srt_partition = sort_partition(partition) -% SORT_PARTITION sorts a given partition (row vector) into the canonical order, where every -% new class has the smallest possible index. - -% input: -% partition is a column vector. -% output: -% srt_partition is a row vector. - -n_classes=max(partition); -srt_partition=zeros(1,n_classes); -for i=1:n_classes - nonz=find(partition); - here=find(partition==partition(nonz(1))); - srt_partition(here)=i; - partition(here)=0; -end \ No newline at end of file diff --git a/matlab/parallel/spatialPopMixture_parallel.m b/matlab/parallel/spatialPopMixture_parallel.m deleted file mode 100644 index 1876e21..0000000 --- a/matlab/parallel/spatialPopMixture_parallel.m +++ /dev/null @@ -1,2275 +0,0 @@ -function spatialPopMixture_parallel(options) -% SPATIALPOPMIXTURE_PARALLEL is the command line version of the group partition with -% spaticial models. -% Input: options is a struct generated by parallel.m - - -%-------------------------------------------------------------------------- -%- Syntax check out -%-------------------------------------------------------------------------- -outp = [options.outputMat '.txt']; -inp = [options.dataFile ' & ' options.coordinateFile]; - -if strcmp(options.fixedK, 'yes') - fixedK = 1; -else - fixedK = 0; -end - -switch options.dataType - -case 'numeric' - data = load(options.dataFile); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('*** ERROR: Incorrect Data-file.'); - return; - end - - coordinates = load(options.coordinateFile); - viallinen = testaaKoordinaatit(ninds, coordinates); - if viallinen - disp('*** ERROR: Incorrect coordinates.'); - return - end - - if ~isempty(options.groupname) - popnames = initPopNames(options.groupname); - if (size(popnames,1)~=ninds) - disp('*** ERROR: Incorrect name-file.'); - popnames = []; - end - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rows); - - rowsFromInd = 0; % Ei tiedet? - -% save_preproc = questdlg('Do you wish to save pre-processed data?',... -% 'Save pre-processed data?',... -% 'Yes','No','Yes'); -% if isequal(save_preproc,'Yes'); -% waitALittle; -% [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); -% kokonimi = [pathname filename]; -% c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; -% c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -% c.dist = dist; c.popnames = popnames; c.Z = Z; -% c.cliques = cliques; c.separators = separators; -% c.vorPoints = vorPoints; c.rowsFromInd = rowsFromInd; -% c.vorCells = vorCells; c.pointers = pointers; -% c.coordinates = coordinates; -% save(kokonimi,'c'); -% clear c; -% end; - -case 'genepop' - kunnossa = testaaGenePopData(options.dataFile); - if kunnossa==0 - return - end - [data,popnames]=lueGenePopData(options.dataFile); - - ninds = max(data(:,end)); - coordinates = load(options.coordinateFile); - viallinen = testaaKoordinaatit(ninds, coordinates); - - if viallinen - disp('*** ERROR: Incorrect coordinates'); - return - end - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rows); - - rowsFromInd = 2; %Tiedetään - -% save_preproc = questdlg('Do you wish to save pre-processed data?',... -% 'Save pre-processed data?',... -% 'Yes','No','Yes'); -% if isequal(save_preproc,'Yes'); -% waitALittle; -% [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); -% kokonimi = [pathname filename]; -% c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; -% c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -% c.dist = dist; c.popnames = popnames; c.Z = Z; -% c.cliques = cliques; c.separators = separators; -% c.vorPoints = vorPoints; c.rowsFromInd = rowsFromInd; -% c.vorCells = vorCells; c.pointers = pointers; -% c.coordinates = coordinates; -% save(kokonimi,'c'); -% clear c; -% end; - -case 'matlab' - struct_array = load(options.dataFile); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('*** ERROR: Incorrect file format'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('*** ERROR: Incorrect file format'); - return; - end - data = double(c.data); rows = c.rows; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; rowsFromInd = c.rowsFromInd; - - if isfield(c, 'cliques') - cliques = c.cliques; separators = c.separators; - vorPoints = c.vorPoints; vorCells = c.vorCells; - pointers = c.pointers; coordinates = c.coordinates; - clear c; - else - ninds = max(data(:,end)); - coordinates = load(options.coordinateFile); - viallinen = testaaKoordinaatit(ninds, coordinates); - - if viallinen - disp('*** ERROR: Incorrect coordinates'); - return - end - - disp('Pre-processing the data. This may take several minutes.'); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); -% save_preproc = questdlg('Do you wish to save pre-processed data?',... -% 'Save pre-processed data?',... -% 'Yes','No','Yes'); -% if isequal(save_preproc,'Yes'); -% waitALittle; -% [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); -% kokonimi = [pathname filename]; -% c.cliques = cliques; c.separators = separators; -% c.vorPoints = vorPoints; c.vorCells = vorCells; -% c.pointers = pointers; c.coordinates = coordinates; -% save(kokonimi,'c'); -% clear c; -% end; - end -end - -global PARTITION; global COUNTS; -global SUMCOUNTS; %global POP_LOGML; -global SEPCOUNTS; global CLIQCOUNTS; -clearGlobalVars; -npopstext = []; -npopstextExtra = options.initialK; -if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; -else - if max(npopstextExtra) > size(data,1) - error('Initial K larger than the sample size are not accepted. '); - else - npopstext = [npopstext ' ' num2str(npopstextExtra)]; - end -end -clear teksti; - -if isempty(npopstext) || length(npopstext)==1 - return -else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösi?annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - return - end - clear ykkoset; -end - -if fixedK - % Only the first value of npopsTaulu is used - npops = npopsTaulu(1); - nruns = length(npopsTaulu); - [logml, npops, partitionSummary]=spatialMix_fixK(c,npops,nruns); -else - [logml, npops, partitionSummary]=spatialMix(c,npopsTaulu); -end - -if logml==1 - return -end - -data = noIndex(data,noalle); - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); -[varmuus,changesInLogml] = writeMixtureInfo(logml, rows, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, cliques, separators, fixedK); - -%checkLogml(priorTerm, adjprior, cliques, separators); - - -viewPopMixPartition(PARTITION, rows, popnames); - -if isequal(popnames, []) - names = pointers; -else - %Etsitään voronoi-soluja vastaavat nimet. - names = cell(size(pointers)); - indices = 1:length(popnames); - for i = 1:length(pointers) - inds = pointers{i}; - namesInCell = []; - for j = 1:length(inds) - ind = inds(j); - I = find(indices > ind); - if isempty(I) - nameIndex = indices(end); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -vorPlot(vorPoints, vorCells, PARTITION, pointers, coordinates, names); - - -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',[pathname filename '.txt']) - delete('baps4_output.baps') -end - - -if rowsFromInd==0 - %Käytettiin BAPS-formaattia, eik?rowsFromInd ole tunnettu. - [popnames, rowsFromInd] = findOutRowsFromInd(popnames, rows); -end - -groupPartition = PARTITION; - -fiksaaPartitioYksiloTasolle(rows, rowsFromInd); - -c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; -c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; -c.noalle = noalle; c.groupPartition = groupPartition; -c.pointers = pointers; c.vorPoints = vorPoints; c.vorCells = vorCells; -c.coordinates = coordinates; c.names = names; c.varmuus = varmuus; -c.rows = rows; c.mixtureType = 'spatialPop'; c.changesInLogml = changesInLogml; -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - - -% ------------------------------------------------------------------------- -% - Subfunctions -% ------------------------------------------------------------------------- - - -%-------------------------------------------------------------------------- -%-------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -%global POP_LOGML; POP_LOGML = []; -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global CLIQCOUNTS; -global SEPCOUNTS; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksilöt inds siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. -% Huom! Laskee muutoksen vain yhdelle tyhjälle populaatiolle, muiille -% tyhjille tulee muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -counts = COUNTS; -sumcounts = SUMCOUNTS; - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -i2 = sort(i2); - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -COUNTS = counts; -SUMCOUNTS = sumcounts; - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -ninds = length(inds); - -rows = []; -for i = 1:ninds - rows = [rows globalRows(inds(i),1):globalRows(inds(i),2)]; -end -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - rows = [rows; (globalRows(ind,1):globalRows(ind,2))']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- -function muutokset = laskeMuutokset5(inds, globalRows, data, ... - adjprior, priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -for i = 1:ninds - ind = inds(i); - - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -% -% Muutettu vastaamaan greedyPopMixin handlePopDataa. - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -%rows = cell(nind,1); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end -newData = data; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, initRows) - -ninds = size(initRows,1); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -%eka = pariTaulu(:,ones(1,rowsFromInd)); -%eka = eka * rowsFromInd; -%miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -%eka = eka - miinus; - -koot = initRows(:,2) - initRows(:,1); -maxSize = max(koot) + 1; - -rows = zeros(ninds, maxSize); - -for i=1:ninds - apu = initRows(i,1):initRows(i,2); - rows(i, 1:length(apu)) = apu; -end -eka = zeros(riviLkm, maxSize); -toka = zeros(riviLkm, maxSize); - -for i = 1:riviLkm - eka(i, :) = rows(pariTaulu(i, 1), :); - toka(i, :) = rows(pariTaulu(i,2), :); -end - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:maxSize - I = find(eka(:,k)>0); - x(I,k) = data(eka(I,k),j); - I = find(toka(:,k)>0); - y(I,k) = data(toka(I,k),j); - end - - for a=1:maxSize - for b=1:maxSize - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]=getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile) - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = 0; -end - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops) - -global PARTITION; -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen? -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm) - -%global GAMMA_LN; -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -notEmpty = any(CLIQCOUNTS); -npops = length(find(notEmpty == 1)); -sumcliq=sum(CLIQCOUNTS, 2); -sumsep=sum(SEPCOUNTS, 2); -ncliq = size(CLIQCOUNTS, 1); -nsep = size(SEPCOUNTS, 1); - -cliqsizes = sum(CLIQCOUNTS, 2)'; -sepsizes = sum(SEPCOUNTS, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - - -%klikkitn = sum(sum(gammaln(CLIQCOUNTS + 1/npops))) ... -% - ncliq*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumcliq + 1)); -%septn = sum(sum(gammaln(SEPCOUNTS + 1/npops))) ... -% - nsep*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -%if spatialPrior > 0 -% keyboard -%end - - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; - -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%---------------------------------------------------------------------------- - - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - -npops = size(popnames,1); -ind = 1; -for pop = 1:npops - if pop0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered groups: ' ownNum2Str(ninds)]); -disp(['Number of clusters in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clusters in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end -end - -disp(' '); -disp(' '); -disp('Changes in log(marginal likelihood) if group i is moved to cluster j:'); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if group i is moved to cluster j:']); fprintf(fid, '\n'); -end - -if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku 'group' blanks(6+erotus)]; -else - ekarivi = 'group '; -end - -for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -end -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); -end - -varmuus = zeros(ninds,1); -changesInLogml = LOGDIFF'; -for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, data, ... - % adjprior, priorTerm, logml, cliques, separators); - muutokset = changesInLogml(:,ind); - if sum(exp(muutokset))>0 - varmuus(ind) = 1 - 1/sum(exp(muutokset)); - else - varmuus(ind) = 0; - end - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end -end - -disp(' '); disp(' '); -disp('KL-divergence matrix:'); -dist_mat = zeros(npops, npops); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); -end - -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end -ekarivi = num2str(npops); -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); -end - -for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -% end -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -[I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); -%keyboard - - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - - -%-------------------------------------------------------------------------- - -function [cliques, separators, vorPoints, vorCells, pointers] ... - = handleCoords(coordinates); -%Laskee yksilöiden luonnolliset naapurit koordinaateista. -%Naapurit lasketaan lisäämäll?koordinaatteihin pisteit? -%jotta kutakin yksilöä vastaisi rajoitettu voronoi-solu -%Puuttuvat koordinaatit (negatiiviset) tulevat erakkopisteiksi -% -%Määrittää lisäksi yksilöit?vastaavat voronoi tesselaation solut. -%vorPoints:ssa on solujen kulmapisteet ja vorCells:ss?kunkin solun -%kulmapisteiden indeksit. Pointers{i} sisältää solussa i olevien yksilöiden -%indeksit. - - - -ninds = length(coordinates); -[I,J] = find(coordinates>0 | coordinates <0); %Käsitellään vain yksilöit? joilta koordinaatit -I = unique(I); %olemassa -ncoords = length(I); -puuttuvat = setdiff(1:ninds, I); -new_coordinates = addPoints(coordinates(I,:)); %Ympäröidään yksilöt apupisteill? - - -apuData = [new_coordinates(1:ncoords,:) (1:ncoords)']; -apuData = sortrows(apuData,[1 2]); -erot = [diff(apuData(:,1)) diff(apuData(:,2))]; -empties = find(erot(:,1)==0 & erot(:,2)==0); -samat = cell(length(empties),1); -pointer = 0; - -for i = 1:length(empties) - if i == 1 | empties(i) - empties(i-1) > 1 %Tutkitaan onko eri pisteess?kuin edellinen - pointer = pointer+1; - samat{pointer} = [apuData(empties(i),3) apuData(empties(i)+1,3)]; - else - samat{pointer} = [samat{pointer} apuData(empties(i)+1,3)]; - end -end - -samat = samat(1:pointer); - -erot = []; apuData = []; empties = []; - -tri = delaunay(new_coordinates(:,1), new_coordinates(:,2), {'Qt','Qbb','Qc','Qz'}); %Apupisteiden takia ok. -%[rivi,sarake] = find(tri>ncoords); %Jätetään huomiotta apupisteet -%tri(rivi,:) = []; -pituus = tri(:,1); -pituus = length(pituus); -parit = zeros(6*pituus,2); -for i = 1:pituus %Muodostetaan kolmikoista parit - j = 6*(i-1)+1; - parit(j,:) = tri(i,1:2); - parit(j+1,:) = tri(i,1:2:3); - parit(j+2,:) = tri(i,2:3); - parit(j+3:j+5,:) = [parit(j:j+2,2) parit(j:j+2,1)]; -end -parit = unique(parit,'rows'); -[rivi,sarake] = find(parit>ncoords); %Jätetään huomiotta apupisteet -parit(rivi,:) = []; -parit = I(parit); %Otetaan poistetut takaisin mukaan -graph = sparse(parit(:,1),parit(:,2),1, ninds, ninds); - - -%Kopioidaan samassa pisteess?olevien yksilöiden naapurustot -%silt? jolle ne laitettu. - - for i = 1:length(samat); - taulu = I(samat{i}); - [rivi,sarake] = find(graph(taulu,:)>0); - if length(rivi) > 0 - kopioitava = graph(taulu(rivi(1)),:); - for j = 1:length(taulu); - graph(taulu(j),:) = kopioitava; - graph(:,taulu(j)) = kopioitava'; - end - end - end - - %Asetetaan samassa pisteess?olevat yksilöt toistensa naapureiksi - - for i = 1:length(samat) - for j = I(samat{i}) - for k = I(samat{i}) - if k ~= j - graph(j,k) = 1; - end - end - end - end - -%Laskee maksimin klikkien ja separaattorien koolle -%Määritetään myös klikit ja separaattorit - -[ncliq, nsep, cliq, sep] = laskeKlikit(graph, ninds, ninds); - -sumcliq = sum(ncliq); -sumsep = sum(nsep); -maxCliqSize = max(find(sumcliq > 0)); -maxSepSize = max(find(sumsep > 0)); - -cliques = zeros(length(cliq), maxCliqSize); -separators = zeros(length(sep), maxSepSize); - -nollia = zeros(1, length(cliq)); -for i = 1:length(cliq); - klikki = cliq{i}; - if length(klikki)>1 - cliques(i, 1:length(klikki)) = klikki; - else - nollia(i)=1; - end -end -cliques(find(nollia==1), :) = []; - -for i = 1:length(sep); - klikki = sep{i}; - separators(i, 1:length(klikki)) = klikki; -end - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%Määritetään yksilöit?vastaavat voronoi tesselaation solut - -[vorPoints, vorCells] = voronoin(new_coordinates, {'Qbb', 'Qz'}); - -bounded = ones(length(vorCells),1); -for i=1:length(vorCells) - if isempty(vorCells{i}) || length(find(vorCells{i}==1))>0 - bounded(i)=0; - end -end - - - -vorCells = vorCells(find(bounded == 1)); -pointers = cell(length(vorCells),1); -empties = zeros(1,length(vorCells)); -X = coordinates(:,1); -Y = coordinates(:,2); - -for i=1:length(pointers) - vx = vorPoints(vorCells{i},1); - vy = vorPoints(vorCells{i},2); - IN = inpolygon(X,Y,vx,vy); - if any(IN)==0 - empties(i) = 1; - else - pointers{i} = find(IN ==1)'; - end -end - -vorCells = vorCells(find(empties == 0)); -pointers = pointers(find(empties == 0)); - -%-------------------------------------------------------------------------- - -function [ncliques, nseparators, cliques, separators] = ... - laskeKlikit(M, maxCliqSize,maxSepSize) -%Laskee samankokoisten klikkien määrän verkosta M -%ncliques(i)=kokoa i olevien klikkien määr? -%nseparators vastaavasti - -ncliques=zeros(1,maxCliqSize); -nseparators=zeros(1,maxSepSize); - -if isequal(M,[]) - return; -end - -[cliques,separators]=findCliques(M); - -for i=1:length(cliques) - ncliques(length(cliques{i}))=ncliques(length(cliques{i}))+1; -end - -%cliqmax=max(find(ncliques~=0)); -%ncliques=ncliques(1:cliqmax); - -for i=1:length(separators) - nseparators(length(separators{i}))=nseparators(length(separators{i}))+1; -end - -%sepmax=max(find(nseparators~=0)); -%nseparators=nseparators(1:sepmax); - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -CLIQCOUNTS = cliqcounts; -SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; - -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function viallinen = testaaKoordinaatit(ninds, coordinates) -% Testaa onko koordinaatit kunnollisia. - -viallinen = 1; -if ~isnumeric(coordinates) - return -end - -oikeanKokoinen = (size(coordinates,1) == ninds) & (size(coordinates,2) == 2); -if oikeanKokoinen - viallinen = 0; -end - - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%-------------------------------------------------------------------------- - -function [popnames2, rowsFromInd] = findOutRowsFromInd(popnames, rows) - -ploidisuus = questdlg('Specify the type of individuals in the data: ',... - 'Individual type?', 'Haploid', 'Diploid', 'Tetraploid', ... - 'Diploid'); - -switch ploidisuus -case 'Haploid' - rowsFromInd = 1; -case 'Diploid' - rowsFromInd = 2; -case 'Tetraploid' - rowsFromInd = 4; -end - -if ~isempty(popnames) - for i = 1:size(rows,1) - popnames2{i,1} = popnames{i,1}; - rivi = rows(i,1):rows(i,2); - popnames2{i,2} = (rivi(rowsFromInd))/rowsFromInd; - end -else - popnames2 = []; -end - -%-------------------------------------------------------------------------- - -function fiksaaPartitioYksiloTasolle(rows, rowsFromInd) - -global PARTITION; -totalRows = 0; -for ind = 1:size(rows,1) - totalRows = totalRows + (rows(ind,2)-rows(ind,1)+1); -end -partitio2 = zeros(totalRows/rowsFromInd,1); - -for ind = 1:size(rows,1) - kaikkiRivit = rows(ind,1):rows(ind,2); - for riviNumero = rowsFromInd:rowsFromInd:length(kaikkiRivit) - %for riviNumero = rowsFromInd:rowsFromInd:length(rows{ind}) - %rivi = rows{ind}(riviNumero); - rivi = kaikkiRivit(riviNumero); - partitio2(rivi/rowsFromInd) = PARTITION(ind); - end -end -PARTITION = partitio2; diff --git a/matlab/parallel/spatial_parallel.m b/matlab/parallel/spatial_parallel.m deleted file mode 100644 index d44b9de..0000000 --- a/matlab/parallel/spatial_parallel.m +++ /dev/null @@ -1,2175 +0,0 @@ -function spatial_parallel(options) -% SPATICIAL_PARALLEL is the command line version of the baps partition with -% spaticial models. -% Input: options is a struct generated by parallel.m - -%-------------------------------------------------------------------------- -%- Syntax check out -%-------------------------------------------------------------------------- -outp = [options.outputMat '.txt']; -inp = [options.dataFile ' & ' options.coordinateFile]; - -if strcmp(options.fixedK, 'yes') - fixedK = 1; -else - fixedK = 0; -end - -%-------------------------------------------------------------------------- -%- Get data file location -%-------------------------------------------------------------------------- -switch options.dataType - case 'numeric' - %------------------------------------------------------------------ - %- Get name and index file location - %------------------------------------------------------------------ - try - data = load(options.dataFile); - catch - disp('*** ERROR: Incorrect BAPS numerical data.'); - return - end - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('*** ERROR: Incorrect BAPS numerical data.'); - return; - end - - try - coordinates = load(options.coordinateFile); - catch - disp('*** ERROR: Incorrect BAPS coordinate data.'); - return - end - viallinen = testaaKoordinaatit(ninds, coordinates); - if viallinen - disp('*** ERROR: Incorrect coordinates.'); - return - end - - if ~isempty(options.nameFile) && ~isempty(options.indexFile) - popnames = initPopNames(options.nameFile{1}, options.indexFile{1}); - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - - case 'genepop' - kunnossa = testaaGenePopData(options.dataFile); - if kunnossa == 0 - return - end - [data,popnames] = lueGenePopData(options.dataFile); - ninds = max(data(:,end)); - coordinates = load(options.coordinateFile); - viallinen = testaaKoordinaatit(ninds, coordinates); - - if viallinen - disp('*** ERROR: Incorrect coordinates'); - return - end - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - - case 'matlab' - struct_array = load(options.dataFile); - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('*** ERROR: Incorrect matlab format.'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('*** ERROR: Incorrect matlab format.'); - return; - end - - data = double(c.data); rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; - if isfield(c, 'cliques') - cliques = c.cliques; separators = c.separators; - vorPoints = c.vorPoints; vorCells = c.vorCells; - pointers = c.pointers; coordinates = c.coordinates; - clear c; - else - % loading separate coordinate file - ninds = max(data(:,end)); - coordinates = load(options.coordinateFile); - viallinen = testaaKoordinaatit(ninds, coordinates); - - if viallinen - disp('*** ERROR: Incorrect coordinates.'); - return - end - - disp('Pre-processing the data. This may take several minutes.'); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - end - otherwise - error('*** ERROR: data type is not specified or unknown.'); -end - -% --------------------------------------------------------- -% - Stochastic search algorithm -% --------------------------------------------------------- -global PARTITION; global COUNTS; -global SUMCOUNTS; -global SEPCOUNTS; global CLIQCOUNTS; -clearGlobalVars; -npopstext = []; - -npopstextExtra = options.initialK; -if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; -else - % ----------------------------------------------------- - % Set the limit of the input value. - % Modified by Jing Tang, 30.12.2005 - if max(npopstextExtra) > size(data,1) - error('Values larger than the sample size are not accepted. '); - else - npopstext = [npopstext ' ' num2str(npopstextExtra)]; - end -end - -clear ready; clear teksti; -if isempty(npopstext) || length(npopstext)==1 - return -else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösi?annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - return - end - clear ykkoset; -end - -c.data=data; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; -c.cliques = cliques; c.separators = separators; - -ninds = length(unique(data(:,end))); -ekat = (1:rowsFromInd:ninds*rowsFromInd)'; -c.rows = [ekat ekat+rowsFromInd-1]; - -if fixedK - % Only the first value of npopsTaulu is used - npops = npopsTaulu(1); - nruns = length(npopsTaulu); - [logml, npops, partitionSummary]=spatialMix_fixK(c,npops,nruns); -else - [logml, npops, partitionSummary]=spatialMix(c,npopsTaulu); -end - -if logml==1 - return -end - -data = noIndex(data,noalle); - -[varmuus,changesInLogml] = writeMixtureInfo(logml, rowsFromInd, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, cliques, separators, fixedK); - - -viewMixPartition(PARTITION, popnames); -if isequal(popnames, []) - names = pointers; -else - names = cell(size(pointers)); - indices = zeros(size(popnames(:,2))); - for i=1:length(popnames(:,2)); - indices(i) = popnames{i,2}; - end - for i = 1:length(pointers) - inds = pointers{i}; - namesInCell = []; - for j = 1:length(inds) - ind = inds(j); - I = find(indices > ind); - if isempty(I) - nameIndex = length(indices); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -vorPlot(vorPoints, vorCells, PARTITION, pointers, coordinates, names); - -if exist('baps4_output.baps','file') - copyfile('baps4_output.baps',outp) - delete('baps4_output.baps') -end - -c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; -c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; -c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; -c.noalle = noalle; c.mixtureType = 'spatial'; -c.pointers = pointers; c.vorPoints = vorPoints; c.vorCells = vorCells; -c.coordinates = coordinates; c.varmuus = varmuus; c.names = names; -c.changesInLogml = changesInLogml; % added by jing - 22.11.2006 -c.logml = logml; % added for result comparing. - -fprintf(1,'Saving the result...') -try -% save(options.outputMat, 'c'); - save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - fprintf(1,'Finished.\n'); -catch - display('*** ERROR in saving the result.'); -end - -% --------------------------------------------------------- -% - Subfunctions -% --------------------------------------------------------- - - -%---------------------------------------------------------- -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; - -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, rowsFromInd, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, rowsFromInd, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -rows = computeRows(rowsFromInd, inds, length(inds)); - -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = computeRows(rowsFromInd, inds, ninds); - - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- - -function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... - priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); -cliqsize = size(CLIQCOUNTS,2); -sepsize = size(SEPCOUNTS, 2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; - -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]=getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops) - -global PARTITION; -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen? -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm) - -%global GAMMA_LN; -global CLIQCOUNTS; -global SEPCOUNTS; -%global PARTITION; - -notEmpty = any(CLIQCOUNTS); -npops = length(find(notEmpty == 1)); -sumcliq=sum(CLIQCOUNTS, 2); -sumsep=sum(SEPCOUNTS, 2); -ncliq = size(CLIQCOUNTS, 1); -nsep = size(SEPCOUNTS, 1); - -cliqsizes = sum(CLIQCOUNTS, 2)'; -sepsizes = sum(SEPCOUNTS, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; - -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%---------------------------------------------------------------------------- - - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - - -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; - - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen väliss?täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntej? 1 jos odotetaan muita merkkej? -for i=1:pit - merkki = line(i); - if (isspace(merkki) & tila==0) - %Ei tehd?mitään. - elseif (isspace(merkki) & tila==1) - tila = 0; - elseif (~isspace(merkki) & tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) & tila==1) - %Ei tehd?mitään - end -end - -%------------------------------------------------------- - -function pal = testaaPop(rivi) -% pal=1, mikäli rivi alkaa jollain seuraavista -% kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa -% tapauksissa pal=0. - -if length(rivi)<3 - pal = 0; - return -end -if (all(rivi(1:3)=='Pop') | ... - all(rivi(1:3)=='pop') | ... - all(rivi(1:3)=='POP')) - pal = 1; - return -else - pal = 0; - return -end - - -%-------------------------------------------------------- - - -function data = addAlleles(data, ind, line, divider) -% Lisaa BAPS-formaatissa olevaan datataulukkoon -% yksilöä ind vastaavat rivit. Yksilön alleelit -% luetaan genepop-formaatissa olevasta rivist? -% line. Jos data on 3 digit formaatissa on divider=1000. -% Jos data on 2 digit formaatissa on divider=100. - -nloci = size(data,2)-1; -if size(data,1) < 2*ind - data = [data; zeros(100,nloci+1)]; -end - -k=1; -merkki=line(k); -while ~isequal(merkki,',') - k=k+1; - merkki=line(k); -end -line = line(k+1:end); -clear k; clear merkki; - -alleeliTaulu = sscanf(line,'%d'); - -if length(alleeliTaulu)~=nloci - disp('Incorrect data format.'); -end - -for j=1:nloci - ekaAlleeli = floor(alleeliTaulu(j)/divider); - if ekaAlleeli==0 ekaAlleeli=-999; end; - tokaAlleeli = rem(alleeliTaulu(j),divider); - if tokaAlleeli==0 tokaAlleeli=-999; end - - data(2*ind-1,j) = ekaAlleeli; - data(2*ind,j) = tokaAlleeli; -end - -data(2*ind-1,end) = ind; -data(2*ind,end) = ind; - -%------------------------------------------------------------------- - - -function [varmuus,changesInLogml] = writeMixtureInfo(logml, rowsFromInd, data, adjprior, ... - priorTerm, outPutFile, inputFile, partitionSummary, popnames, ... - cliques, separators, fixedK) - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global LOGDIFF; -ninds = size(data,1)/rowsFromInd; -npops = size(COUNTS,3); -names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin - -if length(outPutFile)>0 - fid = fopen(outPutFile,'w'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end -end - -disp(' '); -disp(' '); -disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if indvidual i is moved to group j:']); fprintf(fid, '\n'); -end - -if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; -else - ekarivi = ' ind '; -end - -for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -end -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); -end - -ninds = size(data,1)/rowsFromInd; -varmuus = zeros(ninds,1); - -changesInLogml = LOGDIFF'; -for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - % adjprior, priorTerm, logml, cliques, separators); - %changesInLogml(:,ind) = muutokset; - muutokset = changesInLogml(:,ind); - if sum(exp(muutokset))>0 - varmuus(ind) = 1 - 1/sum(exp(muutokset)); - else - varmuus(ind) = 0; - end - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end -end - -disp(' '); disp(' '); -disp('KL-divergence matrix in PHYLIP format:'); -dist_mat = zeros(npops, npops); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); -end - -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end -ekarivi = num2str(npops); -disp(ekarivi); - -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); -end - -for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -% end -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -[I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - - -%-------------------------------------------------------------------------- - -function [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates) -%Laskee yksilöiden luonnolliset naapurit koordinaateista. -%Naapurit lasketaan lisäämäll?koordinaatteihin pisteit? -%jotta kutakin yksilöä vastaisi rajoitettu voronoi-solu -%Puuttuvat koordinaatit (0,0) tulevat erakkopisteiksi -% -%Määrittää lisäksi yksilöit?vastaavat voronoi tesselaation solut. -%vorPoints:ssa on solujen kulmapisteet ja vorCells:ss?kunkin solun -%kulmapisteiden indeksit. Pointers{i} sisältää solussa i olevien yksilöiden -%indeksit. - - - -ninds = length(coordinates); -[I,J] = find(coordinates>0 | coordinates <0); %Käsitellään vain yksilöit? joilta koordinaatit -I = unique(I); %olemassa -ncoords = length(I); -new_coordinates = addPoints(coordinates(I,:)); %Ympäröidään yksilöt apupisteill? - -apuData = [new_coordinates(1:ncoords,:) (1:ncoords)']; -apuData = sortrows(apuData,[1 2]); -erot = [diff(apuData(:,1)) diff(apuData(:,2))]; -empties = find(erot(:,1)==0 & erot(:,2)==0); -samat = cell(length(empties),1); -pointer = 0; - -for i = 1:length(empties) - if i == 1 | empties(i) - empties(i-1) > 1 %Tutkitaan onko eri pisteess?kuin edellinen - pointer = pointer+1; - samat{pointer} = [apuData(empties(i),3) apuData(empties(i)+1,3)]; - else - samat{pointer} = [samat{pointer} apuData(empties(i)+1,3)]; - end -end - -samat = samat(1:pointer); -erot = []; apuData = []; empties = []; - -tri = delaunay(new_coordinates(:,1), new_coordinates(:,2), {'Qt','Qbb','Qc','Qz'}); %Apupisteiden takia ok. -%[rivi,sarake] = find(tri>ncoords); %Jätetään huomiotta apupisteet -%tri(rivi,:) = []; -pituus = tri(:,1); -pituus = length(pituus); -parit = zeros(6*pituus,2); -for i = 1:pituus %Muodostetaan kolmikoista parit - j = 6*(i-1)+1; - parit(j,:) = tri(i,1:2); - parit(j+1,:) = tri(i,1:2:3); - parit(j+2,:) = tri(i,2:3); - parit(j+3:j+5,:) = [parit(j:j+2,2) parit(j:j+2,1)]; -end -parit = unique(parit,'rows'); -[rivi,sarake] = find(parit>ncoords); %Jätetään huomiotta apupisteet -parit(rivi,:) = []; -parit = I(parit); %Otetaan poistetut takaisin mukaan -graph = sparse(parit(:,1),parit(:,2),1, ninds, ninds); - - -%Kopioidaan samassa pisteess?olevien yksilöiden naapurustot -%silt? jolle ne laitettu. - -for i = 1:length(samat); - taulu = I(samat{i}); - [rivi,sarake] = find(graph(taulu,:)>0); - if length(rivi) > 0 - kopioitava = graph(taulu(rivi(1)),:); - for j = 1:length(taulu); - graph(taulu(j),:) = kopioitava; - graph(:,taulu(j)) = kopioitava'; - end - end -end - -%Asetetaan samassa pisteess?olevat yksilöt toistensa naapureiksi - -for i = 1:length(samat) - for j = I(samat{i}) - for k = I(samat{i}) - if k ~= j - graph(j,k) = 1; - end - end - end -end - -%Laskee maksimin klikkien ja separaattorien koolle -%Määritetään myös klikit ja separaattorit - -[ncliq, nsep, cliq, sep] = laskeKlikit(graph, ninds, ninds); - -sumcliq = sum(ncliq); -sumsep = sum(nsep); -maxCliqSize = max(find(sumcliq > 0)); -maxSepSize = max(find(sumsep > 0)); - -cliques = zeros(length(cliq), maxCliqSize); -separators = zeros(length(sep), maxSepSize); - -nollia = zeros(1, length(cliq)); -for i = 1:length(cliq); - klikki = cliq{i}; - if length(klikki)>1 - cliques(i, 1:length(klikki)) = klikki; - else - nollia(i)=1; - end -end -cliques(find(nollia==1), :) = []; - - -for i = 1:length(sep); - klikki = sep{i}; - separators(i, 1:length(klikki)) = klikki; -end - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%Määritetään yksilöit?vastaavat voronoi tesselaation solut - - - -[vorPoints, vorCells] = voronoin(new_coordinates, {'Qbb', 'Qz'}); - - -bounded = ones(length(vorCells),1); -for i=1:length(vorCells) - if (isempty(vorCells{i})) | (length(find(vorCells{i}==1))>0) - bounded(i)=0; - end -end - - - -vorCells = vorCells(bounded == 1); - -pointers = cell(length(vorCells),1); -empties = zeros(1,length(vorCells)); -X = coordinates(:,1); -Y = coordinates(:,2); - -for i=1:length(pointers) - vx = vorPoints(vorCells{i},1); - vy = vorPoints(vorCells{i},2); - IN = inpolygon(X,Y,vx,vy); - if any(IN)==0 - empties(i) = 1; - else - pointers{i} = find(IN ==1); - end -end - - -%figure -%hold on -% -%for i = 1:length(vorCells) -% vx = vorPoints(vorCells{i},1); -% vy = vorPoints(vorCells{i},2); -% k = convhull(vx,vy); -% if any(pointers{i}) -% patch(vx(k), vy(k),'y'); -% else -% plot(vx(k), vy(k)); -% end -%end - -%plot(coordinates(:,1), coordinates(:,2), 'r*'); -%plot(new_coordinates(ninds+1:end,1), new_coordinates(ninds+1:end,2), 'b+'); -%axis([-2 7 -2 8]); - -vorCells = vorCells(find(empties == 0)); -pointers = pointers(find(empties == 0)); - - - - -%-------------------------------------------------------------------------- - -function [ncliques, nseparators, cliques, separators] = ... - laskeKlikit(M, maxCliqSize,maxSepSize) -%Laskee samankokoisten klikkien määrän verkosta M -%ncliques(i)=kokoa i olevien klikkien määr? -%nseparators vastaavasti - -ncliques=zeros(1,maxCliqSize); -nseparators=zeros(1,maxSepSize); - -if isequal(M,[]) - return; -end - -[cliques,separators]=findCliques(M); - -for i=1:length(cliques) - ncliques(length(cliques{i}))=ncliques(length(cliques{i}))+1; -end - -%cliqmax=max(find(ncliques~=0)); -%ncliques=ncliques(1:cliqmax); - -for i=1:length(separators) - nseparators(length(separators{i}))=nseparators(length(separators{i}))+1; -end - -%sepmax=max(find(nseparators~=0)); -%nseparators=nseparators(1:sepmax); - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -CLIQCOUNTS = cliqcounts; -SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function viallinen = testaaKoordinaatit(ninds, coordinates) -% Testaa onko koordinaatit kunnollisia. - -viallinen = 1; -if ~isnumeric(coordinates) - return -end - -oikeanKokoinen = (size(coordinates,1) == ninds) & (size(coordinates,2) == 2); -if oikeanKokoinen - viallinen = 0; -end - -%-------------------------------------------------------------------------- - -function varmuus = laskeVarmuus(rowsFromInd, data, adjprior, priorTerm, ... - logml, cliques, separators, ninds); - -varmuus = zeros(ninds,1); - -for ind=1:ninds - muutokset = laskeMuutokset(ind, rowsFromInd, data, adjprior, ... - priorTerm, logml, cliques, separators); - varmuus(ind) = 1/sum(exp(muutokset)); - -end - - - diff --git a/matlab/parallel/tulostaAdmixtureTiedot.m b/matlab/parallel/tulostaAdmixtureTiedot.m deleted file mode 100644 index cf84ff9..0000000 --- a/matlab/parallel/tulostaAdmixtureTiedot.m +++ /dev/null @@ -1,63 +0,0 @@ -%-------------------------------------------------------------------------- - - -function tulostaAdmixtureTiedot(proportions, uskottavuus, alaRaja, niter) -h0 = findobj('Tag','filename1_text'); -inputf = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outf = get(h0,'String'); clear h0; - -if length(outf)>0 - fid = fopen(outf,'a'); -else - fid = -1; - diary('baps4_output.baps'); % save in text anyway. -end - -ninds = length(uskottavuus); -npops = size(proportions,2); -disp(' '); -dispLine; -disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); -disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); -disp(['Data file: ' inputf]); -disp(['Number of individuals: ' num2str(ninds)]); -disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); -disp(' '); -if fid ~= -1 - fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); - fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); - fprintf(fid, '\n'); -end - -ekaRivi = blanks(6); -for pop = 1:npops - ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; -end -ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 -disp(ekaRivi); -for ind = 1:ninds - rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; - if any(proportions(ind,:)>0) - for pop = 1:npops-1 - rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - end - rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; - rivi = [rivi ownNum2Str(uskottavuus(ind))]; - end - disp(rivi); - if fid ~= -1 - fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); - end -end -if fid ~= -1 - fclose(fid); -else - diary off -end - diff --git a/matlab/parallel/uipickfiles.m b/matlab/parallel/uipickfiles.m deleted file mode 100644 index b3120ab..0000000 --- a/matlab/parallel/uipickfiles.m +++ /dev/null @@ -1,803 +0,0 @@ -function out = uipickfiles(varargin) -%uipickfiles: GUI program to select file(s) and/or directories. -% -% Syntax: -% files = uipickfiles('PropertyName',PropertyValue,...) -% -% The current directory can be changed by operating in the file navigator: -% double-clicking on a directory in the list to move further down the tree, -% using the popup menu to move up the tree, typing a path in the box to -% move to any directory or right-clicking on the path box to revisit a -% previously-listed directory. -% -% Files can be added to the list by double-clicking or selecting files -% (non-contiguous selections are possible with the control key) and -% pressing the Add button. Files in the list can be removed or re-ordered. -% When finished, a press of the Done button will return the full paths to -% the selected files in a cell array, structure array or character array. -% If the Cancel button is pressed then zero is returned. -% -% The following optional property/value pairs can be specified as arguments -% to control the indicated behavior: -% -% Property Value -% ---------- ---------------------------------------------------------- -% FilterSpec String to specify starting directory and/or file filter. -% Ex: 'C:\bin' will start up in that directory. '*.txt' -% will list only files ending in '.txt'. 'c:\bin\*.txt' will -% do both. Default is to start up in the current directory -% and list all files. Can be changed with the GUI. -% -% REFilter String containing a regular expression used to filter the -% file list. Ex: '\.m$|\.mat$' will list files ending in -% '.m' and '.mat'. Default is empty string. Can be used -% with FilterSpec and both filters are applied. Can be -% changed with the GUI. -% -% Prompt String containing a prompt appearing in the title bar of -% the figure. Default is 'Select files'. -% -% NumFiles Scalar or vector specifying number of files that must be -% selected. A scalar specifies an exact value; a two-element -% vector can be used to specify a range, [min max]. The -% function will not return unless the specified number of -% files have been chosen. Default is [] which accepts any -% number of files. -% -% Output String specifying the data type of the output: 'cell', -% 'struct' or 'char'. Specifying 'cell' produces a cell -% array of strings, the strings containing the full paths of -% the chosen files. 'Struct' returns a structure array like -% the result of the dir function except that the 'name' field -% contains a full path instead of just the file name. 'Char' -% returns a character array of the full paths. This is most -% useful when you have just one file and want it in a string -% instead of a cell array containing just one string. The -% default is 'cell'. -% -% All properties and values are case-insensitive and need only be -% unambiguous. For example, -% -% files = uipickfiles('num',1,'out','ch') -% -% is valid usage. - -% Version: 1.0, 25 April 2006 -% Author: Douglas M. Schwarz -% Email: dmschwarz=ieee*org, dmschwarz=urgrad*rochester*edu -% Real_email = regexprep(Email,{'=','*'},{'@','.'}) - - -% Define properties and set default values. -prop.filterspec = '*'; -prop.refilter = ''; -prop.prompt = 'Select files'; -prop.numfiles = []; -prop.output = 'cell'; - -% Process inputs and set prop fields. -properties = fieldnames(prop); -arg_index = 1; -while arg_index <= nargin - arg = varargin{arg_index}; - if ischar(arg) - prop_index = find(strncmpi(arg,properties,length(arg))); - if length(prop_index) == 1 - prop.(properties{prop_index}) = varargin{arg_index + 1}; - else - error('Property ''%s'' does not exist or is ambiguous.',arg) - end - arg_index = arg_index + 2; - elseif isstruct(arg) - arg_fn = fieldnames(arg); - for i = 1:length(arg_fn) - prop_index = find(strncmpi(arg_fn{i},properties,... - length(arg_fn{i}))); - if length(prop_index) == 1 - prop.(properties{prop_index}) = arg.(arg_fn{i}); - else - error('Property ''%s'' does not exist or is ambiguous.',... - arg_fn{i}) - end - end - arg_index = arg_index + 1; - else - error(['Properties must be specified by property/value pairs',... - ' or structures.']) - end -end - -% Validate FilterSpec property. -if isempty(prop.filterspec) - prop.filterspec = '*'; -end -if ~ischar(prop.filterspec) - error('FilterSpec property must contain a string.') -end - -% Validate REFilter property. -if ~ischar(prop.refilter) - error('REFilter property must contain a string.') -end - -% Validate Prompt property. -if ~ischar(prop.prompt) - error('Prompt property must contain a string.') -end - -% Validate NumFiles property. -if numel(prop.numfiles) > 2 || any(prop.numfiles < 0) - error('NumFiles must be empty, a scalar or two-element vector.') -end -prop.numfiles = unique(prop.numfiles); -if isequal(prop.numfiles,1) - numstr = 'Select exactly 1 file.'; -elseif length(prop.numfiles) == 1 - numstr = sprintf('Select exactly %d files.',prop.numfiles); -else - numstr = sprintf('Select %d to %d files.',prop.numfiles); -end - -% Validate Output property. -legal_outputs = {'cell','struct','char'}; -out_idx = find(strncmpi(prop.output,legal_outputs,length(prop.output))); -if length(out_idx) == 1 - prop.output = legal_outputs{out_idx}; -else - error(['Value of ''Output'' property, ''%s'', is illegal or '... - 'ambiguous.'],prop.output) -end - - -% Initialize file lists. -[current_dir,f,e] = fileparts(prop.filterspec); -filter = [f,e]; -if isempty(current_dir) - current_dir = pwd; -end -if isempty(filter) - filter = '*'; -end -re_filter = prop.refilter; -full_filter = fullfile(current_dir,filter); -path_cell = path2cell(current_dir); -fdir = filtered_dir(full_filter,re_filter); -filenames = {fdir.name}'; -filenames = annotate_file_names(filenames,fdir); - -% Initialize some data. -file_picks = {}; -full_file_picks = {}; -dir_picks = dir(' '); % Create empty directory structure. -show_full_path = false; -nodupes = true; -history = {current_dir}; - -% Create figure. -gray = get(0,'DefaultUIControlBackgroundColor'); -fig = figure('Position',[0 0 740 445],... - 'Color',gray,... - 'WindowStyle','modal',... - 'Resize','off',... - 'NumberTitle','off',... - 'Name',prop.prompt,... - 'IntegerHandle','off',... - 'CloseRequestFcn',@cancel,... - 'CreateFcn',{@movegui,'center'}); - -% Create uicontrols. -uicontrol('Style','frame',... - 'Position',[255 260 110 70]) -uicontrol('Style','frame',... - 'Position',[275 135 110 100]) - -navlist = uicontrol('Style','listbox',... - 'Position',[10 10 250 320],... - 'String',filenames,... - 'Value',[],... - 'BackgroundColor','w',... - 'Callback',@clicknav,... - 'Max',2); -pickslist = uicontrol('Style','listbox',... - 'Position',[380 10 350 320],... - 'String',{},... - 'BackgroundColor','w',... - 'Callback',@clickpicks,... - 'Max',2); - -openbut = uicontrol('Style','pushbutton',... - 'Position',[270 300 80 20],... - 'String','Open',... - 'Enable','off',... - 'Callback',@open); -arrow = [2 2 2 2 2 2 2 2 1 2 2 2;... - 2 2 2 2 2 2 2 2 2 0 2 2;... - 2 2 2 2 2 2 2 2 2 2 0 2;... - 0 0 0 0 0 0 0 0 0 0 0 0;... - 2 2 2 2 2 2 2 2 2 2 0 2;... - 2 2 2 2 2 2 2 2 2 0 2 2;... - 2 2 2 2 2 2 2 2 1 2 2 2]; -arrow(arrow == 2) = NaN; -arrow_im = NaN*ones(16,76); -arrow_im(6:12,45:56) = arrow/2; -im = repmat(arrow_im,[1 1 3]); -addbut = uicontrol('Style','pushbutton',... - 'Position',[270 270 80 20],... - 'String','Add ',... - 'Enable','off',... - 'CData',im,... - 'Callback',@add); - -removebut = uicontrol('Style','pushbutton',... - 'Position',[290 205 80 20],... - 'String','Remove',... - 'Enable','off',... - 'Callback',@remove); -moveupbut = uicontrol('Style','pushbutton',... - 'Position',[290 175 80 20],... - 'String','Move Up',... - 'Enable','off',... - 'Callback',@moveup); -movedownbut = uicontrol('Style','pushbutton',... - 'Position',[290 145 80 20],... - 'String','Move Down',... - 'Enable','off',... - 'Callback',@movedown); - -uicontrol('Position',[10 380 250 16],... - 'Style','text',... - 'String','Current Directory',... - 'HorizontalAlignment','center') -dir_popup = uicontrol('Style','popupmenu',... - 'Position',[10 335 250 20],... - 'BackgroundColor','w',... - 'String',path_cell(end:-1:1),... - 'Value',1,... - 'Callback',@dirpopup); -hist_cm = uicontextmenu; -pathbox = uicontrol('Position',[10 360 250 20],... - 'Style','edit',... - 'BackgroundColor','w',... - 'String',current_dir,... - 'HorizontalAlignment','left',... - 'Callback',@change_path,... - 'UIContextMenu',hist_cm); -hist_menus = []; -hist_cb = @history_cb; -hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,history); - -uicontrol('Position',[10 425 80 16],... - 'Style','text',... - 'String','File Filter',... - 'HorizontalAlignment','left') -uicontrol('Position',[100 425 160 16],... - 'Style','text',... - 'String','Reg. Exp. Filter',... - 'HorizontalAlignment','left') -showallfiles = uicontrol('Position',[270 405 100 20],... - 'Style','checkbox',... - 'String','Show All Files',... - 'Value',0,... - 'HorizontalAlignment','left',... - 'Callback',@togglefilter); -filter_ed = uicontrol('Position',[10 405 80 20],... - 'Style','edit',... - 'BackgroundColor','w',... - 'String',filter,... - 'HorizontalAlignment','left',... - 'Callback',@setfilspec); -refilter_ed = uicontrol('Position',[100 405 160 20],... - 'Style','edit',... - 'BackgroundColor','w',... - 'String',re_filter,... - 'HorizontalAlignment','left',... - 'Callback',@setrefilter); - -viewfullpath = uicontrol('Style','checkbox',... - 'Position',[380 335 230 20],... - 'String','Show full paths',... - 'Value',show_full_path,... - 'HorizontalAlignment','left',... - 'Callback',@showfullpath); -remove_dupes = uicontrol('Style','checkbox',... - 'Position',[380 360 230 20],... - 'String','Remove duplicates (as per full path)',... - 'Value',nodupes,... - 'HorizontalAlignment','left',... - 'Callback',@removedupes); -uicontrol('Position',[380 405 350 20],... - 'Style','text',... - 'String','Selected Files',... - 'HorizontalAlignment','center') -uicontrol('Position',[280 80 80 30],'String','Done',... - 'Callback',@done); -uicontrol('Position',[280 30 80 30],'String','Cancel',... - 'Callback',@cancel); - -if ~isempty(prop.numfiles) - uicontrol('Position',[380 385 350 16],... - 'Style','text',... - 'String',numstr,... - 'ForegroundColor','r',... - 'HorizontalAlignment','center') -end - -set(fig,'HandleVisibility','off') - -uiwait(fig) - -% Compute desired output. -switch prop.output - case 'cell' - out = full_file_picks; - case 'struct' - out = dir_picks(:); - case 'char' - out = char(full_file_picks); - case 'cancel' - out = 0; -end - -% -------------------- Callback functions -------------------- - - function add(varargin) - values = get(navlist,'Value'); - for i = 1:length(values) - dir_pick = fdir(values(i)); - pick = dir_pick.name; - pick_full = fullfile(current_dir,pick); - dir_pick.name = pick_full; - if ~nodupes || ~any(strcmp(full_file_picks,pick_full)) - file_picks{end + 1} = pick; - full_file_picks{end + 1} = pick_full; - dir_picks(end + 1) = dir_pick; - end - end - if show_full_path - set(pickslist,'String',full_file_picks,'Value',[]); - else - set(pickslist,'String',file_picks,'Value',[]); - end - set([removebut,moveupbut,movedownbut],'Enable','off'); - end - - function remove(varargin) - values = get(pickslist,'Value'); - file_picks(values) = []; - full_file_picks(values) = []; - dir_picks(values) = []; - top = get(pickslist,'ListboxTop'); - num_above_top = sum(values < top); - top = top - num_above_top; - num_picks = length(file_picks); - new_value = min(min(values) - num_above_top,num_picks); - if num_picks == 0 - new_value = []; - set([removebut,moveupbut,movedownbut],'Enable','off') - end - if show_full_path - set(pickslist,'String',full_file_picks,'Value',new_value,... - 'ListboxTop',top) - else - set(pickslist,'String',file_picks,'Value',new_value,... - 'ListboxTop',top) - end - end - - function open(varargin) - values = get(navlist,'Value'); - if fdir(values).isdir - if strcmp(fdir(values).name,'.') - return - elseif strcmp(fdir(values).name,'..') - set(dir_popup,'Value',min(2,length(path_cell))) - dirpopup(); - return - end - current_dir = fullfile(current_dir,fdir(values).name); - history{end+1} = current_dir; - history = unique(history); - hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,... - history); - full_filter = fullfile(current_dir,filter); - path_cell = path2cell(current_dir); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(dir_popup,'String',path_cell(end:-1:1),'Value',1) - set(pathbox,'String',current_dir) - set(navlist,'ListboxTop',1,'Value',[],'String',filenames) - set(addbut,'Enable','off') - set(openbut,'Enable','off') - end - end - - function clicknav(varargin) - value = get(navlist,'Value'); - nval = length(value); - dbl_click_fcn = @add; - switch nval - case 0 - set([addbut,openbut],'Enable','off') - case 1 - set(addbut,'Enable','on'); - if fdir(value).isdir - set(openbut,'Enable','on') - dbl_click_fcn = @open; - else - set(openbut,'Enable','off') - end - otherwise - set(addbut,'Enable','on') - set(openbut,'Enable','off') - end - if strcmp(get(fig,'SelectionType'),'open') - dbl_click_fcn(); - end - end - - function clickpicks(varargin) - value = get(pickslist,'Value'); - if isempty(value) - set([removebut,moveupbut,movedownbut],'Enable','off') - else - set(removebut,'Enable','on') - if min(value) == 1 - set(moveupbut,'Enable','off') - else - set(moveupbut,'Enable','on') - end - if max(value) == length(file_picks) - set(movedownbut,'Enable','off') - else - set(movedownbut,'Enable','on') - end - end - if strcmp(get(fig,'SelectionType'),'open') - remove(); - end - end - - function dirpopup(varargin) - value = get(dir_popup,'Value'); - len = length(path_cell); - path_cell = path_cell(1:end-value+1); - if ispc && value == len - current_dir = ''; - full_filter = filter; - fdir = struct('name',getdrives,'date',datestr(now),... - 'bytes',0,'isdir',1); - else - current_dir = cell2path(path_cell); - history{end+1} = current_dir; - history = unique(history); - hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,... - history); - full_filter = fullfile(current_dir,filter); - fdir = filtered_dir(full_filter,re_filter); - end - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(dir_popup,'String',path_cell(end:-1:1),'Value',1) - set(pathbox,'String',current_dir) - set(navlist,'String',filenames,'Value',[]) - set(addbut,'Enable','off') - end - - function change_path(varargin) - proposed_path = get(pathbox,'String'); - % Process any directories named '..'. - proposed_path_cell = path2cell(proposed_path); - ddots = strcmp(proposed_path_cell,'..'); - ddots(find(ddots) - 1) = true; - proposed_path_cell(ddots) = []; - proposed_path = cell2path(proposed_path_cell); - % Check for existance of directory. - if ~exist(proposed_path,'dir') - uiwait(errordlg(['Directory "',proposed_path,... - '" does not exist.'],'','modal')) - return - end - current_dir = proposed_path; - history{end+1} = current_dir; - history = unique(history); - hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,history); - full_filter = fullfile(current_dir,filter); - path_cell = path2cell(current_dir); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(dir_popup,'String',path_cell(end:-1:1),'Value',1) - set(pathbox,'String',current_dir) - set(navlist,'String',filenames,'Value',[]) - set(addbut,'Enable','off') - set(openbut,'Enable','off') - end - - function showfullpath(varargin) - show_full_path = get(viewfullpath,'Value'); - if show_full_path - set(pickslist,'String',full_file_picks) - else - set(pickslist,'String',file_picks) - end - end - - function removedupes(varargin) - nodupes = get(remove_dupes,'Value'); - if nodupes - num_picks = length(full_file_picks); - [unused,rev_order] = unique(full_file_picks(end:-1:1)); - order = sort(num_picks + 1 - rev_order); - full_file_picks = full_file_picks(order); - file_picks = file_picks(order); - if show_full_path - set(pickslist,'String',full_file_picks,'Value',[]) - else - set(pickslist,'String',file_picks,'Value',[]) - end - set([removebut,moveupbut,movedownbut],'Enable','off') - end - end - - function moveup(varargin) - value = get(pickslist,'Value'); - set(removebut,'Enable','on') - n = length(file_picks); - omega = 1:n; - index = zeros(1,n); - index(value - 1) = omega(value); - index(setdiff(omega,value - 1)) = omega(setdiff(omega,value)); - file_picks = file_picks(index); - full_file_picks = full_file_picks(index); - value = value - 1; - if show_full_path - set(pickslist,'String',full_file_picks,'Value',value) - else - set(pickslist,'String',file_picks,'Value',value) - end - if min(value) == 1 - set(moveupbut,'Enable','off') - end - set(movedownbut,'Enable','on') - end - - function movedown(varargin) - value = get(pickslist,'Value'); - set(removebut,'Enable','on') - n = length(file_picks); - omega = 1:n; - index = zeros(1,n); - index(value + 1) = omega(value); - index(setdiff(omega,value + 1)) = omega(setdiff(omega,value)); - file_picks = file_picks(index); - full_file_picks = full_file_picks(index); - value = value + 1; - if show_full_path - set(pickslist,'String',full_file_picks,'Value',value) - else - set(pickslist,'String',file_picks,'Value',value) - end - if max(value) == n - set(movedownbut,'Enable','off') - end - set(moveupbut,'Enable','on') - end - - function togglefilter(varargin) - value = get(showallfiles,'Value'); - if value - filter = '*'; - re_filter = ''; - set([filter_ed,refilter_ed],'Enable','off') - else - filter = get(filter_ed,'String'); - re_filter = get(refilter_ed,'String'); - set([filter_ed,refilter_ed],'Enable','on') - end - full_filter = fullfile(current_dir,filter); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(navlist,'String',filenames,'Value',[]) - set(addbut,'Enable','off') - end - - function setfilspec(varargin) - filter = get(filter_ed,'String'); - if isempty(filter) - filter = '*'; - set(filter_ed,'String',filter) - end - % Process file spec if a subdirectory was included. - [p,f,e] = fileparts(filter); - if ~isempty(p) - newpath = fullfile(current_dir,p,''); - set(pathbox,'String',newpath) - filter = [f,e]; - if isempty(filter) - filter = '*'; - end - set(filter_ed,'String',filter) - change_path(); - end - full_filter = fullfile(current_dir,filter); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(navlist,'String',filenames,'Value',[]) - set(addbut,'Enable','off') - end - - function setrefilter(varargin) - re_filter = get(refilter_ed,'String'); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(navlist,'String',filenames,'Value',[]) - set(addbut,'Enable','off') - end - - function done(varargin) - % Optional shortcut: click on a file and press 'Done'. -% if isempty(full_file_picks) && strcmp(get(addbut,'Enable'),'on') -% add(); -% end - numfiles = length(full_file_picks); - if ~isempty(prop.numfiles) - if numfiles < prop.numfiles(1) - msg = {'Too few files selected.',numstr}; - uiwait(errordlg(msg,'','modal')) - return - elseif numfiles > prop.numfiles(end) - msg = {'Too many files selected.',numstr}; - uiwait(errordlg(msg,'','modal')) - return - end - end - delete(fig) - end - - function cancel(varargin) - prop.output = 'cancel'; - delete(fig) - end - - function history_cb(varargin) - current_dir = history{varargin{3}}; - full_filter = fullfile(current_dir,filter); - path_cell = path2cell(current_dir); - fdir = filtered_dir(full_filter,re_filter); - filenames = {fdir.name}'; - filenames = annotate_file_names(filenames,fdir); - set(dir_popup,'String',path_cell(end:-1:1),'Value',1) - set(pathbox,'String',current_dir) - set(navlist,'ListboxTop',1,'Value',[],'String',filenames) - set(addbut,'Enable','off') - set(openbut,'Enable','off') - end -end - - -% -------------------- Subfunctions -------------------- - -function c = path2cell(p) -% Turns a path string into a cell array of path elements. -c = strread(p,'%s','delimiter','\\/'); -if ispc - c = [{'My Computer'};c]; -else - c = [{filesep};c(2:end)]; -end -end - - -function p = cell2path(c) -% Turns a cell array of path elements into a path string. -if ispc - p = fullfile(c{2:end},''); -else - p = fullfile(c{:},''); -end -end - - -function d = filtered_dir(full_filter,re_filter) -% Like dir, but applies filters and sorting. -p = fileparts(full_filter); -if isempty(p) && full_filter(1) == '/' - p = '/'; -end -if exist(full_filter,'dir') - c = cell(0,1); - dfiles = struct('name',c,'date',c,'bytes',c,'isdir',c); -else - dfiles = dir(full_filter); -end -if ~isempty(dfiles) - dfiles([dfiles.isdir]) = []; -end -ddir = dir(p); -ddir = ddir([ddir.isdir]); -% Additional regular expression filter. -if nargin > 1 && ~isempty(re_filter) - if ispc - no_match = cellfun('isempty',regexpi({dfiles.name},re_filter)); - else - no_match = cellfun('isempty',regexp({dfiles.name},re_filter)); - end - dfiles(no_match) = []; -end -% Set navigator style: -% 1 => mix file and directory names -% 2 => means list all files before all directories -% 3 => means list all directories before all files -% 4 => same as 2 except put . and .. directories first -if isunix - style = 4; -else - style = 4; -end -switch style - case 1 - d = [dfiles;ddir]; - [unused,index] = sort({d.name}); - d = d(index); - case 2 - [unused,index1] = sort({dfiles.name}); - [unused,index2] = sort({ddir.name}); - d = [dfiles(index1);ddir(index2)]; - case 3 - [unused,index1] = sort({dfiles.name}); - [unused,index2] = sort({ddir.name}); - d = [ddir(index2);dfiles(index1)]; - case 4 - [unused,index1] = sort({dfiles.name}); - dot1 = find(strcmp({ddir.name},'.')); - dot2 = find(strcmp({ddir.name},'..')); - ddot1 = ddir(dot1); - ddot2 = ddir(dot2); - ddir([dot1,dot2]) = []; - [unused,index2] = sort({ddir.name}); - d = [ddot1;ddot2;dfiles(index1);ddir(index2)]; -end -end - - -function drives = getdrives -% Returns a cell array of drive names on Windows. -letters = char('A':'Z'); -num_letters = length(letters); -drives = cell(1,num_letters); -for i = 1:num_letters - if exist([letters(i),':\'],'dir'); - drives{i} = [letters(i),':']; - end -end -drives(cellfun('isempty',drives)) = []; -end - - -function filenames = annotate_file_names(filenames,dir_listing) -% Adds a trailing filesep character to directory names. -fs = filesep; -for i = 1:length(filenames) - if dir_listing(i).isdir - filenames{i} = [filenames{i},fs]; - end -end -end - - -function hist_menus = make_history_cm(cb,hist_cm,hist_menus,history) -% Make context menu for history. -if ~isempty(hist_menus) - delete(hist_menus) -end -num_hist = length(history); -hist_menus = zeros(1,num_hist); -for i = 1:num_hist - hist_menus(i) = uimenu(hist_cm,'Label',history{i},... - 'Callback',{cb,i}); -end -end diff --git a/matlab/spatial/addPoints.m b/matlab/spatial/addPoints.m deleted file mode 100644 index b9c9ddf..0000000 --- a/matlab/spatial/addPoints.m +++ /dev/null @@ -1,86 +0,0 @@ -function uusiData=addPoints(data) -%Lisää koordinaatipisteiden joukkoon pisteitä, jotta jokainen datapiste -%kuuluisi äärelliseen voronoi soluun voronoi tessellaatiota -%muodostettaessa. Apupisteet lisätään muodostamalla hila -%koordinaattipisteiden päälle ja ottamalla voronoi tessellaatio hilasta. Ne -%hilan pisteet, joita vastaavien solujen sisällä ei ole yhtään -%koordinaattipistettä, jäävät apupisteiksi - -x = data(:,1); -y = data(:,2); - -xmax = max(x); -xmin = min(x); -ymax = max(y); -ymin = min(y); - -npist = size(unique(data, 'rows'),1); -nstep = ceil(npist^0.4) + 7; -xstep = (xmax-xmin)/(nstep-7); -ystep = (ymax-ymin)/(nstep-7); - -apuPisteet = zeros(nstep^2,2); - -for i=1:nstep - apuPisteet((i-1)*nstep+1 : i*nstep,1) = xmin + (i-4)*xstep; - apuPisteet((i-1)*nstep+1 : i*nstep,2) = ymin + ((1:nstep)-4)*ystep; -end - - - -[V,C] = voronoin(apuPisteet,{'Qt','Qbb','Qc','Qz'}); - -if 0 - figure - hold on - for i=1:length(C) - if isempty(find(C{i} == 1)) - X = V(C{i},:); - hull = convhull(X(:,1),X(:,2)); - plot(X(hull,1), X(hull,2)); - end - end - axis([-2 7 -2 8]); - plot(data(:,1), data(:,2), 'r*'); - plot(apuPisteet(:,1), apuPisteet(:,2), 'b+'); - - hold off -end -empty = zeros(nstep^2,1); - -for i = 1:length(C) - if isempty(find(C{i} == 1)) %Tutkitaan vain rajoitetut solut - vx = V(C{i},1); - vy = V(C{i},2); - IN = any(inpolygon(x,y,vx,vy)); - if IN == 0 - empty(i) = 1; - end - - end -end - -empty = find(empty == 1); -C = C(empty); - -apuPisteet = apuPisteet(empty, :); - -if 0 - figure - hold on - for i=1:length(C) - if isempty(find(C{i} == 1)) - X = V(C{i},:); - hull = convhull(X(:,1),X(:,2)); - plot(X(hull,1), X(hull,2)); - end - end - plot(data(:,1), data(:,2), 'r*'); - plot(apuPisteet(:,1), apuPisteet(:,2), 'b+'); - axis([-2 7 -2 8]); - hold off -end - -uusiData = [data; apuPisteet]; - - diff --git a/matlab/spatial/findCliques.m b/matlab/spatial/findCliques.m deleted file mode 100644 index 90effd6..0000000 --- a/matlab/spatial/findCliques.m +++ /dev/null @@ -1,509 +0,0 @@ -function [cliques, separators, G] = findCliques(M) -%Muuttaa graafin M kolmioituvaksi ja laskee siitä klikit ja -%separaattorit. -%Hyödynnetään Kevin Murphyn algoritmeja Graph Theory toolboxista. -%Päivitetty 12.8.2005 - -order=elim_order(M,ones(length(M))); -[G,cliques]=triangulate(M,order); -[jtree,root]=cliques_to_jtree(cliques,ones(length(M))); -ncliq=length(cliques); -separators=cell(ncliq-1,1); %n-solmuisessa puussa n-1 viivaa - -jono=zeros(length(ncliq)); -jono(1)=root; -i=1; -pointer=2; %Seuraava tyhjä paikka - -while ~isempty(find(jono~=0)) %Puun leveyssuuntainen läpikäynti - lapset=find(jtree(jono(i),:)~=0); - jtree(:,jono(i))=0; %Klikki käsitelty - jono(pointer:pointer+length(lapset)-1)=lapset; - for j=1:length(lapset) - ehdokas = myintersect(cliques{jono(i)},cliques{lapset(j)}); - kelpaa = 1; - for k = 1:(pointer+j-3) - % Tutkitaan, että separaattoriehdokasta ei vielä käsitelty - if isequal(ehdokas,separators{k}) - kelpaa = 0; - end - end - if kelpaa - separators{pointer+j-2} = ehdokas; - end - end - jono(i)=0; - pointer=pointer+length(lapset); - i=i+1; -end - -notEmpty=zeros(ncliq-1,1); -for i=1:ncliq-1 - if ~isempty(separators{i}) - notEmpty(i)=1; - end -end -notEmpty=find(notEmpty==1); -separators=separators(notEmpty); - - -%-------------------------------------------------------------------------- -%-------------------------------------------------------------------------- - -function order = elim_order(G, node_sizes) -% BEST_FIRST_ELIM_ORDER Greedily search for an optimal elimination order. -% order = best_first_elim_order(moral_graph, node_sizes) -% -% Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the -% weight of the resulting triangulated graph. The weight of a graph is the sum of the weights of each -% of its cliques; the weight of a clique is the product of the weights of each of its members; the -% weight of a node is the number of values it can take on. -% -% Since this is an NP-hard problem, we use the following greedy heuristic: -% at each step, eliminate that node which will result in the addition of the least -% number of fill-in edges, breaking ties by choosing the node that induces the lighest clique. -% For details, see -% - Kjaerulff, "Triangulation of graphs -- algorithms giving small total state space", -% Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk) -% - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide", -% Intl. J. Approx. Reasoning, 11, 1994 -% - -% Warning: This code is pretty old and could probably be made faster. - -n = length(G); -%if nargin < 3, stage = { 1:n }; end % no constraints - -% For long DBNs, it may be useful to eliminate all the nodes in slice t before slice t+1. -% This will ensure that the jtree has a repeating structure (at least away from both edges). -% This is why we have stages. -% See the discussion of splicing jtrees on p68 of -% Geoff Zweig's PhD thesis, Dept. Comp. Sci., UC Berkeley, 1998. -% This constraint can increase the clique size significantly. - -MG = G; % copy the original graph -uneliminated = ones(1,n); -order = zeros(1,n); -%t = 1; % Counts which time slice we are on -for i=1:n - U = find(uneliminated); - %valid = myintersect(U, stage{t}); - valid = U; - % Choose the best node from the set of valid candidates - min_fill = zeros(1,length(valid)); - min_weight = zeros(1,length(valid)); - for j=1:length(valid) - k = valid(j); - nbrs = myintersect(neighbors(G, k), U); - l = length(nbrs); - M = MG(nbrs,nbrs); - min_fill(j) = l^2 - sum(M(:)); % num. added edges - min_weight(j) = prod(node_sizes([k nbrs])); % weight of clique - end - lightest_nbrs = find(min_weight==min(min_weight)); - % break ties using min-fill heuristic - best_nbr_ndx = argmin(min_fill(lightest_nbrs)); - j = lightest_nbrs(best_nbr_ndx); % we will eliminate the j'th element of valid - %j1s = find(score1==min(score1)); - %j = j1s(argmin(score2(j1s))); - k = valid(j); - uneliminated(k) = 0; - order(i) = k; - ns = myintersect(neighbors(G, k), U); - if ~isempty(ns) - G(ns,ns) = 1; - G = setdiag(G,0); - end - %if ~any(logical(uneliminated(stage{t}))) % are we allowed to the next slice? - % t = t + 1; - %end -end - -%-------------------------------------------------------------------------- - -function [G, cliques, fill_ins] = triangulate(G, order) -% TRIANGULATE Ensure G is triangulated (chordal), i.e., every cycle of length > 3 has a chord. -% [G, cliques, fill_ins, cliques_containing_node] = triangulate(G, order) -% -% cliques{i} is the i'th maximal complete subgraph of the triangulated graph. -% fill_ins(i,j) = 1 iff we add a fill-in arc between i and j. -% -% To find the maximal cliques, we save each induced cluster (created by adding connecting -% neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete, -% but not necessarily maximal, set of nodes.) - -MG = G; -n = length(G); -eliminated = zeros(1,n); -cliques = {}; -for i=1:n - u = order(i); - U = find(~eliminated); % uneliminated - nodes = myintersect(neighbors(G,u), U); % look up neighbors in the partially filled-in graph - nodes = myunion(nodes, u); % the clique will always contain at least u - G(nodes,nodes) = 1; % make them all connected to each other - G = setdiag(G,0); - eliminated(u) = 1; - - exclude = 0; - for c=1:length(cliques) - if mysubset(nodes,cliques{c}) % not maximal - exclude = 1; - break; - end - end - if ~exclude - cnum = length(cliques)+1; - cliques{cnum} = nodes; - end -end - -%fill_ins = sparse(triu(max(0, G - MG), 1)); -fill_ins=1; - -%-------------------------------------------------------------------------- - -function [jtree, root, B, w] = cliques_to_jtree(cliques, ns) -% MK_JTREE Make an optimal junction tree. -% [jtree, root, B, w] = mk_jtree(cliques, ns) -% -% A junction tree is a tree that satisfies the jtree property, which says: -% for each pair of cliques U,V with intersection S, all cliques on the path between U and V -% contain S. (This ensures that local propagation leads to global consistency.) -% -% We can create a junction tree by computing the maximal spanning tree of the junction graph. -% (The junction graph connects all cliques, and the weight of an edge (i,j) is -% |C(i) intersect C(j)|, where C(i) is the i'th clique.) -% -% The best jtree is the maximal spanning tree which minimizes the sum of the costs on each edge, -% where cost(i,j) = w(C(i)) + w(C(j)), and w(C) is the weight of clique C, -% which is the total number of values C can take on. -% -% For details, see -% - Jensen and Jensen, "Optimal Junction Trees", UAI 94. -% -% Input: -% cliques{i} = nodes in clique i -% ns(i) = number of values node i can take on -% Output: -% jtree(i,j) = 1 iff cliques i and j aer connected -% root = the clique that should be used as root -% B(i,j) = 1 iff node j occurs in clique i -% w(i) = weight of clique i - - - -num_cliques = length(cliques); -w = zeros(num_cliques, 1); -B = sparse(num_cliques, 1); -for i=1:num_cliques - B(i, cliques{i}) = 1; - w(i) = prod(ns(cliques{i})); -end - - -% C1(i,j) = length(intersect(cliques{i}, cliques{j})); -% The length of the intersection of two sets is the dot product of their bit vector representation. -C1 = B*B'; -C1 = setdiag(C1, 0); - -% C2(i,j) = w(i) + w(j) -num_cliques = length(w); -W = repmat(w, 1, num_cliques); -C2 = W + W'; -C2 = setdiag(C2, 0); - -jtree = sparse(minimum_spanning_tree(-C1, C2)); % Using -C1 gives *maximum* spanning tree - -% The root is arbitrary, but since the first pass is towards the root, -% we would like this to correspond to going forward in time in a DBN. -root = num_cliques; - -%-------------------------------------------------------------------------- - - -function C = myintersect(A,B) -% MYINTERSECT Intersection of two sets of positive integers (much faster than built-in intersect) -% C = myintersect(A,B) - -A = A(:)'; B = B(:)'; - -if isempty(A) - ma = 0; -else - ma = max(A); -end - -if isempty(B) - mb = 0; -else - mb = max(B); -end - -if ma==0 | mb==0 - C = []; -else - %bits = sparse(1, max(ma,mb)); - bits = zeros(1, max(ma,mb)); - bits(A) = 1; - C = B(logical(bits(B))); -end - -%sum( bitget( bitand( cliquesb(i), cliquesb(j) ), 1:52 ) ); - -%-------------------------------------------------------------------------- - -function ns = neighbors(adj_mat, i) -% NEIGHBORS Find the parents and children of a node in a graph. -% ns = neighbors(adj_mat, i) - -%ns = myunion(children(adj_mat, i), parents(adj_mat, i)); -ns = find(adj_mat(i,:)); - -%-------------------------------------------------------------------------- - -function C = myunion(A,B) -% MYUNION Union of two sets of positive integers (much faster than built-in union) -% C = myunion(A,B) - -if isempty(A) - ma = 0; -else - ma = max(A); -end - -if isempty(B) - mb = 0; -else - mb = max(B); -end - -if ma==0 & mb==0 - C = []; -elseif ma==0 & mb>0 - C = B; -elseif ma>0 & mb==0 - C = A; -else - %bits = sparse(1, max(ma,mb)); - bits = zeros(1, max(ma,mb)); - bits(A) = 1; - bits(B) = 1; - C = find(bits); -end - -%-------------------------------------------------------------------------- - - -function ps = parents(adj_mat, i) -% PARENTS Return the list of parents of node i -% ps = parents(adj_mat, i) - -ps = find(adj_mat(:,i))'; - -%-------------------------------------------------------------------------- - -function cs = children(adj_mat, i, t) -% CHILDREN Return the indices of a node's children in sorted order -% c = children(adj_mat, i, t) -% -% t is an optional argument: if present, dag is assumed to be a 2-slice DBN - -if nargin < 3 - cs = find(adj_mat(i,:)); -else - if t==1 - cs = find(adj_mat(i,:)); - else - ss = length(adj_mat)/2; - j = i+ss; - cs = find(adj_mat(j,:)) + (t-2)*ss; - end -end - -%-------------------------------------------------------------------------- - -function p=mysubset(small,large) -% MYSUBSET Is the small set of +ve integers a subset of the large set? -% p = mysubset(small, large) - -% Surprisingly, this is not built-in. - -if isempty(small) - p = 1; % isempty(large); -else - p = length(myintersect(small,large)) == length(small); -end - -%-------------------------------------------------------------------------- - -function A = minimum_spanning_tree(C1, C2) -% -% Find the minimum spanning tree using Prim's algorithm. -% C1(i,j) is the primary cost of connecting i to j. -% C2(i,j) is the (optional) secondary cost of connecting i to j, used to break ties. -% We assume that absent edges have 0 cost. -% To find the maximum spanning tree, used -1*C. -% See Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237. - -% Prim's is O(V^2). Kruskal's algorithm is O(E log E) and hence is more efficient -% for sparse graphs, but is implemented in terms of a priority queue. - -% We partition the nodes into those in U and those not in U. -% closest(i) is the vertex in U that is closest to i in V-U. -% lowcost(i) is the cost of the edge (i, closest(i)), or infinity is i has been used. -% In Aho, they say C(i,j) should be "some appropriate large value" if the edge is missing. -% We set it to infinity. -% However, since lowcost is initialized from C, we must distinguish absent edges from used nodes. - -n = length(C1); -if nargin==1, C2 = zeros(n); end -A = zeros(n); - -closest = ones(1,n); -used = zeros(1,n); % contains the members of U -used(1) = 1; % start with node 1 -C1(find(C1==0))=inf; -C2(find(C2==0))=inf; -lowcost1 = C1(1,:); -lowcost2 = C2(1,:); - -for i=2:n - ks = find(lowcost1==min(lowcost1)); - k = ks(argmin(lowcost2(ks))); - A(k, closest(k)) = 1; - A(closest(k), k) = 1; - lowcost1(k) = inf; - lowcost2(k) = inf; - used(k) = 1; - NU = find(used==0); - for ji=1:length(NU) - for j=NU(ji) - if C1(k,j) < lowcost1(j) - lowcost1(j) = C1(k,j); - lowcost2(j) = C2(k,j); - closest(j) = k; - end - end - end -end - -%-------------------------------------------------------------------------- - -function indices = argmin(v) -% ARGMIN Return as a subscript vector the location of the smallest element of a multidimensional array v. -% indices = argmin(v) -% -% Returns the first minimum in the case of ties. -% Example: -% X = [2 8 4; 7 3 9]; -% argmin(X) = [1 1], i.e., row 1 column 1 - -[m i] = min(v(:)); -indices = ind2subv(mysize(v), i); - -%-------------------------------------------------------------------------- - -function M = setdiag(M, v) -% SETDIAG Set the diagonal of a matrix to a specified scalar/vector. -% M = set_diag(M, v) - -n = length(M); -if length(v)==1 - v = repmat(v, 1, n); -end - -% e.g., for 3x3 matrix, elements are numbered -% 1 4 7 -% 2 5 8 -% 3 6 9 -% so diagnoal = [1 5 9] - - -J = 1:n+1:n^2; -M(J) = v; - -%------------------------------------------------------------------------- - -function sz = mysize(M) -% MYSIZE Like the built-in size, except it returns n if M is a vector of length n, and 1 if M is a scalar. -% sz = mysize(M) -% -% The behavior is best explained by examples -% - M = rand(1,1), mysize(M) = 1, size(M) = [1 1] -% - M = rand(2,1), mysize(M) = 2, size(M) = [2 1] -% - M = rand(1,2), mysize(M) = 2, size(M) = [1 2] -% - M = rand(2,2,1), mysize(M) = [2 2], size(M) = [2 2] -% - M = rand(1,2,1), mysize(M) = 2, size(M) = [1 2] - -if myisvector(M) - sz = length(M); -else - sz = size(M); -end - -%-------------------------------------------------------------------------- - -function sub = ind2subv(siz, ndx) -% IND2SUBV Like the built-in ind2sub, but returns the answer as a row vector. -% sub = ind2subv(siz, ndx) -% -% siz and ndx can be row or column vectors. -% sub will be of size length(ndx) * length(siz). -% -% Example -% ind2subv([2 2 2], 1:8) returns -% [1 1 1 -% 2 1 1 -% ... -% 2 2 2] -% That is, the leftmost digit toggle fastest. -% -% See also SUBV2IND - -n = length(siz); - -if n==0 - sub = ndx; - return; -end - -if all(siz==2) - sub = dec2bitv(ndx-1, n); - sub = sub(:,n:-1:1)+1; - return; -end - -cp = [1 cumprod(siz(:)')]; -ndx = ndx(:) - 1; -sub = zeros(length(ndx), n); -for i = n:-1:1 % i'th digit - sub(:,i) = floor(ndx/cp(i))+1; - ndx = rem(ndx,cp(i)); -end - -%%%%%%%%%% - -function bits = dec2bitv(d,n) -% DEC2BITV Convert a decimal integer to a bit vector. -% bits = dec2bitv(d,n) is just like the built-in dec2bin, except the answer is a vector, not a string. -% n is an optional minimum length on the bit vector. -% If d is a vector, each row of the output array will be a bit vector. - - -if (nargin<2) - n=1; % Need at least one digit even for 0. -end -d = d(:); - -[f,e]=log2(max(d)); % How many digits do we need to represent the numbers? -bits=rem(floor(d*pow2(1-max(n,e):0)),2); - - -%------------------------------------------------------------------------ - -function r = myisvector(V) -%Kuten isvector(V) - -A = size(V); -r = (length(A) == 2) & (min(A) == 1); diff --git a/matlab/spatial/handleIndiFastaCase.m b/matlab/spatial/handleIndiFastaCase.m deleted file mode 100644 index 6041906..0000000 --- a/matlab/spatial/handleIndiFastaCase.m +++ /dev/null @@ -1,307 +0,0 @@ -function handleIndiFastaCase(cc,dist,Z) -% specicially written to handle FASTA file format of individual clustering -% Lu Cheng, 15.12.2012 - -OUTPUT_FILE = 'baps6_output.baps'; - -teksti = 'Input upper bound to the number of populations (only one value): '; -npopstextExtra = inputdlg(teksti ,'Input maximum number of populations',1,{'20'}); -if isempty(npopstextExtra) % Painettu Cancel:ia - return -else - nMaxPops = str2num(npopstextExtra{1}); - nMaxPops = nMaxPops(1); -end - -initPart = cluster_own(Z,nMaxPops); - -roundTypes = [2*ones(1,nMaxPops) ... - 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... - 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... - 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 ... - 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 1 1 1 1 ... - 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ... - 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ... - 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4]; - -[partition, logml, partitionSummary, logmldiff] = model_search_parallel(cc, initPart, dist, roundTypes); -cc.PARTITION = partition; -cc.npops = length(unique(partition)); -cc.logml = logml; -cc.partitionSummary = partitionSummary; -cc.logmldiff = logmldiff; - -if cc.npops==nMaxPops - choice = questdlg(sprintf('%d populations discovered, which is the same as input. We suggest you to set a larger number. Do you want to quit?', cc.npops),... - 'Yes''No','Yes'); - if strcmp(choice,'Yes') - return - end -end - -writeMixtureInfo(cc); - -popnames = cc.popnames; -pointers = cc.pointers; -vorPoints = cc.vorPoints; -vorCells = cc.vorCells; -coordinates = cc.coordinates; -heds = cc.heds; - -viewMixPartition(partition, popnames); - -if isequal(popnames, []) - names = pointers; -else - names = cell(size(pointers)); - indices = zeros(size(popnames(:,2))); - for i=1:length(popnames(:,2)); - indices(i) = popnames{i,2}; - end - for i = 1:length(pointers) - inds = pointers{i}; - namesInCell = []; - for j = 1:length(inds) - ind = inds(j); - I = find(indices > ind); - if isempty(I) - nameIndex = length(indices); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis or plot ' ... - 'additional images?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%waitALittle; % Hetki odotusta, jotta muistaa kysy?.. - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist(OUTPUT_FILE,'file') - copyfile(OUTPUT_FILE,[pathname filename '.txt']) - delete(OUTPUT_FILE) - end - end - - % added by Lu Cheng, 05.12.2012 - tmpFile = [pathname filename '.mapfile.txt']; - fid = fopen(tmpFile,'w+'); - fprintf(fid,'Name\tLatitude\tLongitude\tDescription\tLabel\n'); - if exist('heds','var') - for i=1:length(heds) - fprintf(fid,'%s\t%.10f\t%.10f\t%s_%d\t%d\n',heds{i},coordinates(i,1),coordinates(i,2),... - heds{i},partition(i),partition(i)); - end - else - for i=1:ninds - fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),... - i,partition(i),partition(i)); - end - end - fclose(fid); - -% save([pathname filename], 'c'); - format_type = 'FASTA'; - save([pathname filename], 'cc','dist','Z','format_type','-v7.3'); -else - if exist(OUTPUT_FILE,'file') - delete(OUTPUT_FILE) - end -end - - - -%%%%%%%%%%%%% -function writeMixtureInfo(c) - -outputFile = 'baps6_output.baps'; - -% output the semi-supervised clustering results to the outputFile -% modified by Lu Cheng, 28.03.2010 - -ninds = length(c.PARTITION); -npops = c.npops; -popnames = c.popnames; -logml = c.logml; -partition = c.PARTITION; -partitionSummary = c.partitionSummary; - -if isempty(popnames) - popnames = c.heds; -end - -if ~isempty(outputFile) - fid = fopen(outputFile,'w+'); -else - fid = -1; - %diary('baps5_semi_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); - fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); - fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); - fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -end - -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); -end -for m=1:npops - indsInM = find(partition==m); - - if isempty(indsInM) - continue; - end - - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - text = [text '}']; - - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - end - end; -end - -names = true; - -logmldiff = c.logmldiff; -if npops == 1 - logmldiff = []; -else - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n'); - end - - text = sprintf('%10s','ind'); - for ii = 1:npops - tmpstr = sprintf('\t%10s',num2str(ii)); - text = [text tmpstr]; - end - - disp(text); - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - - for ii = 1:ninds - text = sprintf('%10s',popnames{ii}); - for jj = 1:npops - tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f')); - text = [text tmpstr]; - end - - if ii<100 - disp(text); - elseif ii==101 - disp('.......................................'); - disp('..........see output file..............'); - end - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - text = []; - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n\n', ' '); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%%%%%%%%%% - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n ind); - if isempty(I) - nameIndex = length(indices); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -viewMixPartition(partition, popnames); -vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis or plot ' ... - 'additional images?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%waitALittle; % Hetki odotusta, jotta muistaa kysy?.. - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist(OUTPUT_FILE,'file') - copyfile(OUTPUT_FILE,[pathname filename '.txt']) - delete(OUTPUT_FILE) - end - end - - % added by Lu Cheng, 05.12.2012 - tmpFile = [pathname filename '.mapfile.txt']; - fid = fopen(tmpFile,'w+'); - fprintf(fid,'GroupLabel\tLatitude\tLongitude\tDescription\tLabel\n'); - for i=1:nPregroup - fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),... - i,partition(i),partition(i)); - end - fclose(fid); - -% save([pathname filename], 'c'); - format_type = 'FASTA'; - save([pathname filename], 'cc','partition','pgDist','pgPart','format_type','-v7.3'); -else - if exist(OUTPUT_FILE,'file') - delete(OUTPUT_FILE) - end -end - - - -%%%%%%%%%%%%% -function writeMixtureInfo(c) - -outputFile = 'baps6_output.baps'; - -% output the semi-supervised clustering results to the outputFile -% modified by Lu Cheng, 28.03.2010 - -ninds = length(c.PARTITION); -npops = c.npops; -popnames = c.popnames; -logml = c.logml; -partition = c.PARTITION; -partitionSummary = c.partitionSummary; - -if isempty(popnames) - popnames = cell(c.nPregroup,1); - for i=1:c.nPregroup - popnames{i} = num2str(i); - end -end - -if ~isempty(outputFile) - fid = fopen(outputFile,'w+'); -else - fid = -1; - %diary('baps5_semi_output.baps'); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); - fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); - fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); - fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -end - -disp('Best Partition: '); -if (fid ~= -1) - fprintf(fid,'%s \n','Best Partition: '); -end -for m=1:npops - indsInM = unique(c.groupPartition(partition==m)); - - if isempty(indsInM) - continue; - end - - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - text = [text '}']; - - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',new_line); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',text); - end - end; -end - -names = true; - -logmldiff = c.logmldiff; -if npops == 1 - logmldiff = []; -else - disp(' '); - disp(' '); - disp('Changes in log(marginal likelihood) if pregroup i is moved to cluster j:'); - - if (fid ~= -1) - fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); - fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n'); - end - - text = sprintf('%10s','ind'); - for ii = 1:npops - tmpstr = sprintf('\t%10s',num2str(ii)); - text = [text tmpstr]; - end - - disp(text); - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - - for ii = 1:c.nPregroup - text = sprintf('%10s',popnames{ii}); - for jj = 1:npops - tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f')); - text = [text tmpstr]; - end - - if ii<100 - disp(text); - elseif ii==101 - disp('.......................................'); - disp('..........see output file..............'); - end - if (fid ~= -1) - fprintf(fid, '%s \n', text); - end - text = []; - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n\n', ' '); - fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', line); - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - - -%%%%%%%%%% - - -%-------------------------------------------------------------- -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -% newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) && n1e-5) - % Tapahtui muutos - muutoksia = 1; - kivaluku = kivaluku+1; - updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts,... - adjprior, priorTerm); - logml = logml+maxMuutos; - - end - end - - elseif round==2 %Populaation yhdistäminen toiseen. - maxMuutos = 0; - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rowsFromInd, ... - data, adjprior, priorTerm); - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - if maxMuutos>1e-5 - muutoksia = 1; - updateGlobalVariables2(i1,i2,rowsFromInd, diffInCountsBest, ... - adjprior, priorTerm); - logml = logml + maxMuutos; - end - - - elseif round==3 | round==4 %Populaation jakaminen osiin. - maxMuutos = 0; - ninds = size(data,1)/rowsFromInd; - for pop = 1:npops - inds2 = find(PARTITION_IN==pop); - ninds2 = length(inds2); - if ninds2>5 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, data, ... - adjprior, priorTerm, pop); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat)); - diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), ... - size(COUNTS_IN,2), data); - i1 = PARTITION_IN(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... - adjprior, priorTerm, i2); - logml = logml + maxMuutos; - - end - - elseif round == 5 | round == 6 - pop=0; - muutettu = 0; - poplogml = POP_LOGML_IN; - partition = PARTITION_IN; - counts = COUNTS_IN; - sumcounts = SUMCOUNTS_IN; - - while (pop < npops & muutettu == 0) - pop = pop+1; - totalMuutos = 0; - inds = find(PARTITION_IN==pop); - if round == 5 - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm); - end - - i=0; - - while (length(inds)>0 & i 1e-5 - i=length(inds); - end - end - end - - if totalMuutos>1e-5 - muutettu=1; - muutoksia = 1; % Ulompi kirjanpito. - else - % Missään vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION_IN = partition; - SUMCOUNTS_IN = sumcounts; - POP_LOGML_IN = poplogml; - COUNTS_IN = counts; - logml = logml - totalMuutos; - end - end - clear partition; clear sumcounts; clear counts; clear poplogml; - end - end - - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - vaihe = 4; - elseif vaihe==4; - vaihe = 5; - elseif vaihe==5 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes = [2]; - elseif vaihe==3 - roundTypes=[5]; - elseif vaihe==4 - roundTypes=[4 3 1]; - elseif vaihe - roundTypes=[6 2 3 4 1]; - end - end -end - -partition = PARTITION_IN; -counts = COUNTS_IN; -sumcounts = SUMCOUNTS_IN; - - - -%------------------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -initializeGammaln(ninds, rowsFromInd, max(noalle)); - -logml = computeLogml(counts, sumcounts, noalle, data, rowsFromInd); - - -%----------------------------------------------------------------------- - - -function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -nloci = size(counts,2); -npops = size(counts,3); -adjnoalle = zeros(max(noalle),nloci); -for j=1:nloci - adjnoalle(1:noalle(j),j)=noalle(j); - if (noalle(j)0 - rows = computeRows(rowsFromInd, inds, ninds); - diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts; - SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts; - SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML_IN(i2)'; - - COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - - -%------------------------------------------------------------------------------------ - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS_IN:issa), jotka ovat data:n -% riveillä rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS_IN; -global SUMCOUNTS_IN; -x = size(COUNTS_IN,1); -y = size(COUNTS_IN,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS_IN(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS_IN(pops,:)),2) - priorTerm; - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end \ No newline at end of file diff --git a/matlab/spatial/initSpatialMultiMixture.m b/matlab/spatial/initSpatialMultiMixture.m deleted file mode 100644 index 4916db1..0000000 --- a/matlab/spatial/initSpatialMultiMixture.m +++ /dev/null @@ -1,18 +0,0 @@ -function [partition, counts, sumcounts] = initSpatialMultiMixture(initData, ... - npops, Z, rows, noalle, dist, adjprior, priorTerm, fixedK); -% Etsii spatial multimixturelle alkutilan baps 3.1:n ahneella algoritmilla. -% toimii! - -global PARTITION; global COUNTS; -global SUMCOUNTS; global POP_LOGML; - -c.data = initData; c.Z = Z; c.rows=rows; c.rowsFromInd=0; c.noalle=noalle; -c.dist = dist; c.adjprior = adjprior; c.priorTerm = priorTerm; - -indMix_fixK(c,npops,1,0); - -partition = PARTITION; counts = COUNTS; sumcounts = SUMCOUNTS; - - - - diff --git a/matlab/spatial/private/calcLogmlChanges.m b/matlab/spatial/private/calcLogmlChanges.m deleted file mode 100644 index 22f4eb8..0000000 --- a/matlab/spatial/private/calcLogmlChanges.m +++ /dev/null @@ -1,82 +0,0 @@ -function changes = calcLogmlChanges(inds, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml) -% compute the logml change if the given inds are moved to another cluster -% the input inds are supposed to come from the same cluster -% changes is a npops*1 vector -% Lu Cheng, 15.12.2012 - -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global PARTITION; - -global LOC_CQ_COUNTS; -global LOC_SP_COUNTS; - -npops = size(CQ_COUNTS,3); -changes = zeros(npops,1); -indsToBeMoved = inds; - -if isempty(indsToBeMoved), return, end - -i1 = PARTITION(indsToBeMoved(1)); -[diffCqCounts diffCqSumCounts]= computeDiffInCounts(indsToBeMoved, cqData, nCqCodes); -[diffSpCounts diffSpSumCounts]= computeDiffInCounts(indsToBeMoved, spData, nSpCodes); - -diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved); -diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts; - -SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts; -SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts; - -LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts; -LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts; - -% PARTITION(inds) = -1; -updateLogmlTable(i1); - -for i2 = 1:npops - if i2 ~= i1 - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts; - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts; - - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts; - -% PARTITION(inds) = i2; - updateLogmlTable(i2); - logml_new = computeTotalLogml(); - changes(i2) = logml_new - logml; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts; - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts; - - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts; - -% PARTITION(inds) = -1; - updateLogmlTable(i2); - end -end - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts; -SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts; - -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts; -SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts; - -LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts; -LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts; - -% PARTITION(inds) = i1; -updateLogmlTable(i1); - - -%--------------------------------------------------------------------- \ No newline at end of file diff --git a/matlab/spatial/private/clearGlobalVars.m b/matlab/spatial/private/clearGlobalVars.m deleted file mode 100644 index a6fd932..0000000 --- a/matlab/spatial/private/clearGlobalVars.m +++ /dev/null @@ -1,18 +0,0 @@ -function clearGlobalVars -% Lu Cheng, 15.12.2012 - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global LOGML_TABLE; LOGML_TABLE = []; -global ADDITION_DIFFERENCE; ADDITION_DIFFERENCE = []; -global REMOVAL_DIFFERENCE; REMOVAL_DIFFERENCE = []; -global JOIN_DIFFERENCE; JOIN_DIFFERENCE = []; -global CQ_COUNTS; CQ_COUNTS = []; -global SP_COUNTS; SP_COUNTS = []; -global SUM_CQ_COUNTS; SUM_CQ_COUNTS = []; -global SUM_SP_COUNTS; SUM_SP_COUNTS = []; -global CQ_PRIOR; CQ_PRIOR = []; -global SP_PRIOR; SP_PRIOR = []; -global LOC_SP_COUNTS; LOC_SP_COUNTS = []; -global LOC_CQ_COUNTS; LOC_CQ_COUNTS = []; \ No newline at end of file diff --git a/matlab/spatial/private/cluster_own.m b/matlab/spatial/private/cluster_own.m deleted file mode 100644 index 6d9dab6..0000000 --- a/matlab/spatial/private/cluster_own.m +++ /dev/null @@ -1,52 +0,0 @@ -function T = cluster_own(Z,nclust) -% search down the dendogram from the root, until nclust clusters are found -% comments added by Lu Cheng -% 04.01.2011 - -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - -% maximum number of clusters based on inconsistency -if m <= maxclust - T = (1:m)'; -elseif maxclust==1 - T = ones(m,1); -else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end -end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - diff --git a/matlab/spatial/private/computeCounts.m b/matlab/spatial/private/computeCounts.m deleted file mode 100644 index a03967f..0000000 --- a/matlab/spatial/private/computeCounts.m +++ /dev/null @@ -1,27 +0,0 @@ -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops) - -global PARTITION; -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- \ No newline at end of file diff --git a/matlab/spatial/private/computeDiffInCliqCounts.m b/matlab/spatial/private/computeDiffInCliqCounts.m deleted file mode 100644 index 1a99b6c..0000000 --- a/matlab/spatial/private/computeDiffInCliqCounts.m +++ /dev/null @@ -1,19 +0,0 @@ -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen? -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt -% siirretään. - -% taken from spatial model of Jukka Siren's code -% Lu Cheng -% 15.12.2012 - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end \ No newline at end of file diff --git a/matlab/spatial/private/computeDiffInCounts.m b/matlab/spatial/private/computeDiffInCounts.m deleted file mode 100644 index df807fa..0000000 --- a/matlab/spatial/private/computeDiffInCounts.m +++ /dev/null @@ -1,7 +0,0 @@ -function [counts sumcounts] = computeDiffInCounts(rows, data, nLetters) -% calculate the counts of the given rows of the data (ninds*nLoci) -% nLetters is the maximum number of different symbols over all loci -% Lu Cheng, 25.05.2011 - -counts = histc(data(rows,:),1:nLetters,1); -sumcounts = sum(counts,1)'; diff --git a/matlab/spatial/private/computeTotalLogml.m b/matlab/spatial/private/computeTotalLogml.m deleted file mode 100644 index d12588e..0000000 --- a/matlab/spatial/private/computeTotalLogml.m +++ /dev/null @@ -1,42 +0,0 @@ -function logml = computeTotalLogml -% compute the log marginal likelihood of the data -% Lu Cheng, 15.12.2012 - -global LOGML_TABLE; -global LOC_CQ_COUNTS; -global LOC_SP_COUNTS; - - -notEmpty = any(LOC_CQ_COUNTS,1); -npops = length(find(notEmpty == 1)); - -% the following codes added by Lu Cheng, 15.12.2012 -% some lines might all be zero if some sequence is deleted -tmpIndsCq = find(any(LOC_CQ_COUNTS,2)); -tmpIndsSp = find(any(LOC_SP_COUNTS,2)); - -locCqCounts = LOC_CQ_COUNTS(tmpIndsCq,notEmpty); -locSpCounts = LOC_SP_COUNTS(tmpIndsSp,notEmpty); - -sumcliq=sum(locCqCounts, 2); -sumsep=sum(locSpCounts, 2); - -ncliq = length(tmpIndsCq); -nsep = length(tmpIndsSp); -cliqsizes = sum(locCqCounts, 2)'; -sepsizes = sum(locSpCounts, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(locCqCounts + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(locSpCounts + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - - -logml = sum(LOGML_TABLE) + spatialPrior; \ No newline at end of file diff --git a/matlab/spatial/private/initialCounts2.m b/matlab/spatial/private/initialCounts2.m deleted file mode 100644 index 03d80cf..0000000 --- a/matlab/spatial/private/initialCounts2.m +++ /dev/null @@ -1,17 +0,0 @@ -function [sumcounts, counts] = initialCounts2(partition, data, npops, nLetters) -% initialize counts and sumcounts for the initial partition -% npops: number of populations in the partition -% nLetters: the maximum number of different symbols over all loci -% Lu Cheng, 25.05.2011 - -[nSeq nLoci] = size(data); - -counts = zeros(nLetters,nLoci,npops); -sumcounts = zeros(nLoci,npops); - -for i=1:npops - inds = (partition==i); - counts(:,:,i) = histc(data(inds,:),1:nLetters,1); - sumcounts(:,i) = sum(counts(:,:,i),1); -end - diff --git a/matlab/spatial/private/model_search_parallel.m b/matlab/spatial/private/model_search_parallel.m deleted file mode 100644 index 14304a3..0000000 --- a/matlab/spatial/private/model_search_parallel.m +++ /dev/null @@ -1,366 +0,0 @@ -function [partition, logml, partitionSummary, logmldiff] = model_search_parallel(c, partition, orig_dist, roundTypes) -% This function clusters DNA alignment using "codon" model in Corander and Tang's -% paper: Bayesian analysis of population structure based on linked -% molecular information (2007), Mathematical Biosciences -% c: preprocessed data for the sequence alignment -% partition: initial partition of the individuals -% origdist: hamming distance between individuals, indexed by -% (1,2)(1,3)(14)...(2,3)(2,4).....(3,4)...(n-1,n) -% roundTypes: array of operation types - -% Lu Cheng -% 15.12.2012 - -interactive = false; - -global PARTITION; -global CQ_COUNTS;global SUM_CQ_COUNTS; -global SP_COUNTS;global SUM_SP_COUNTS; -global CQ_PRIOR; global SP_PRIOR; -global LOGML_TABLE; -global ADDITION_DIFFERENCE; -global REMOVAL_DIFFERENCE; -global JOIN_DIFFERENCE; - -global LOC_SP_COUNTS; -global LOC_CQ_COUNTS; - -clearGlobalVars; - -nPOPS = length(unique(partition)); - -% PRIOR VALUES: -CQ_PRIOR = c.cqPrior; -SP_PRIOR = c.spPrior; - -% Initialize PARTITION, **_COUNTS, SUM_**_COUNTS, alnMat -[sumCqCounts, cqCounts] = initialCounts2(partition, c.cqData, nPOPS, c.nMaxCqCodes); -[sumSpCounts, spCounts] = initialCounts2(partition, c.spData, nPOPS, c.nMaxSpCodes); - -CQ_COUNTS = cqCounts; SUM_CQ_COUNTS = sumCqCounts; -SP_COUNTS = spCounts; SUM_SP_COUNTS = sumSpCounts; - -PARTITION = partition; -[cliqcounts, sepcounts] = computeCounts(c.locCliques, c.locSeparators, nPOPS); - -LOC_CQ_COUNTS = cliqcounts; -LOC_SP_COUNTS = sepcounts; - -% alnMat = c.alnMat; -partitionSummary = -Inf*ones(30,2,nPOPS); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1,:) = zeros(30,1,nPOPS); -worstLogml = -Inf*ones(1, nPOPS); worstIndex = ones(1, nPOPS); - -clear partition cqCounts sumCqCounts spCounts sumSpCounts - -% Initialize LOGML_TABLE: -nINDS = c.nSeq; -LOGML_TABLE = zeros(nPOPS,1); -updateLogmlTable(1:nPOPS); - -REMOVAL_DIFFERENCE = zeros(nINDS,1); -REMOVAL_DIFFERENCE(:,:) = nan; -ADDITION_DIFFERENCE = zeros(nINDS,nPOPS); -ADDITION_DIFFERENCE(:,:) = nan; -JOIN_DIFFERENCE = zeros(nPOPS, nPOPS); -JOIN_DIFFERENCE(:,:) = nan; - -% ***********Doc:******************** -% REMOVAL_DIFFERENCE(ind) tells the change in logml if ind is removed from -% its cluster. nan, if the cluster has changed, since the value was last -% calculated. -% -% ADDITION_DIFFERENCE(ind, pop) tells the change in logml if ind is added -% to cluster pop. nan, if the cluster has changed since the value was last -% calculated. Always nan, if pop is ind's own cluster. -% -% JOIN_DIFFERENCE(pop1,pop2) = tells the change in logml if pop1 and pop2 -% are combined. nan, if either cluster has changed since the value was last -% calculated. -% ***********Doc end***************** - -logml = computeTotalLogml; - -disp('The beginning:'); -% disp(['Partition: ' num2str(PARTITION')]); -disp(['Nclusters: ' num2str(length(unique(PARTITION)))]); -disp(['Log(ml*prior): ' num2str(logml)]); -disp(' '); - - -nnotEmptyPops = length(unique(PARTITION)); -if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end -end - -% START SEARCH OF THE BEST PARTITION: - -vipu = zeros(1,14); -if interactive - roundTypes = input('Input steps: '); - if ischar(roundTypes), roundTypes = str2num(roundTypes); end -end -ready = 0; - - -while ready ~= 1 - -% disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - round = roundTypes(n); - moveCounter = 0; - - if round==1 && vipu(1)==0 % move an individual to another population - -% inds = randperm(nINDS); - inds = getMoveInds(orig_dist,nINDS); % get inds to be moved - - for ind = inds(:)' - update_difference_tables(ind, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - tmpDiff = REMOVAL_DIFFERENCE(ind) + ADDITION_DIFFERENCE(ind,:); - tmpDiff(PARTITION(ind)) = 0; - [maxChange, maxIndex] = max(tmpDiff); - if maxChange>1e-5 - updateGlobalVariables(ind, maxIndex, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators); -% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex) - logml = computeTotalLogml(); - moveCounter = moveCounter+1; - vipu = zeros(1,14); - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - if moveCounter==0, vipu(1)=1; end - disp(['Step 1: ' num2str(moveCounter) ' individuals were moved.']); - - elseif round==2 && vipu(2)==0 % join two populations - - update_join_difference(c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - [maxChange, aux] = max(JOIN_DIFFERENCE(:)); - [i1, i2] = ind2sub([nPOPS,nPOPS],aux); - - if maxChange>1e-5 - tmpInds = find(PARTITION==i1); - updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators); - logml = computeTotalLogml; - - disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']); - vipu = zeros(1,14); - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - else - disp('Step 2: no changes.'); - vipu(2)=1; - end - elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population - - pops = randperm(nPOPS); - - splitFlags = zeros(nPOPS,1); - for pop = pops(:)' - - maxChange = 0; - indsToBeMoved = []; - - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>4 - - if round==3 - dist3 = getDistance(inds2, orig_dist, nINDS); - npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan - elseif round==4 - dist3 = getDistance(inds2, orig_dist, nINDS); - npops2 = 2; - end - - Z3 = linkage(dist3); - T3 = cluster_own(Z3, npops2); - - for i = 1:npops2 - indsX = inds2(T3==i); indsX = indsX'; - tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - [tmpMaxChange, tmpMaxPop] = max(tmpChanges); - if tmpMaxChange>maxChange - maxChange = tmpMaxChange; - % i1 = pop; - i2 = tmpMaxPop; - indsToBeMoved = indsX; - end - end - if maxChange>1e-5 - updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators); - logml = computeTotalLogml; - splitFlags(pop)=1; - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - end - if any(splitFlags) - disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']); - vipu = zeros(1,14); - else - disp(['Step ' num2str(round) ': no changes.']); - vipu(round)=1; - end - end - end - - if interactive - roundTypes = input('Input extra steps: '); - if ischar(roundTypes), roundTypes = str2num(roundTypes); end - else - roundTypes = []; - end - - if isempty(roundTypes) - ready = 1; - end -end - -%disp(' '); -disp('BEST PARTITION: '); -%disp(['Partition: ' num2str(PARTITION')]); -disp(['Nclusters: ' num2str(length(unique(PARTITION)))]); -disp(['Log(ml): ' num2str(logml)]); -disp(' '); - -nPOPS= rmEmptyPopulation(c.locCliques, c.locSeparators); -ADDITION_DIFFERENCE(:) = NaN; -REMOVAL_DIFFERENCE(:) = NaN; - -logmldiff = zeros(nINDS,nPOPS); % the change of logml if individual i is moved to group j -for i=1:nINDS - update_difference_tables(i, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - logmldiff(i,:) = REMOVAL_DIFFERENCE(i)+ADDITION_DIFFERENCE(i,:); - if all(isnan(logmldiff(i,:))) - keyboard - end -end -logmldiff(isnan(logmldiff))=0; -partition = PARTITION; - - - - -%---------------------------------------------------------------------------- - - -function [dist2, dind1, dind2] = getDistance(inds2, dist_orig, ninds) -% pick out the distrances between samples in "inds2" from "dist_orig" -% dist_orig specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds) -% Lu Cheng, 22.06.2011 - -if ~issorted(inds2) - error('inds2 is not in ascending order!'); -end - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -irow = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(irow, 1) = inds2(i); - apu(irow, 2) = inds2(j); - irow = irow+1; - end -end - -dind1 = apu(:,1); -dind2 = apu(:,2); - -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist_orig(apu); - - -%--------------------------------------------------------------- - - -function inds = getMoveInds(dist_orig, ninds) -% get individual indexs to be moved to another cluster -% we always take the 30% individuals of each cluster which are most distant -% to each other -% Lu Cheng, 25.05.2011 - -global PARTITION; - -pops = unique(PARTITION); -inds = []; - -for tmpPop = pops(:)' - tmpInds = find(PARTITION == tmpPop)'; - - if(length(tmpInds)<20) - inds = [inds tmpInds(:)']; %#ok - continue; - end - - [tmpDist, dind1, dind2] = getDistance(tmpInds,dist_orig,ninds); - tmpSDist = sort(tmpDist,'Descend'); - tmpInds2 = find(tmpDist>tmpSDist(round(length(tmpSDist)*0.3))); - tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2))); - inds = [inds tmpInds3(:)']; %#ok -end - - - - - -% ------------------------------------------------------------ - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end \ No newline at end of file diff --git a/matlab/spatial/private/model_search_pregroup.m b/matlab/spatial/private/model_search_pregroup.m deleted file mode 100644 index 2c57d1a..0000000 --- a/matlab/spatial/private/model_search_pregroup.m +++ /dev/null @@ -1,385 +0,0 @@ -function [partition, logml, partitionSummary, logmldiff] = model_search_pregroup(c, pgPart, pgDist, roundTypes, nMaxPops) -% This function clusters DNA alignment using "codon" model in Corander and Tang's -% paper: Bayesian analysis of population structure based on linked -% molecular information (2007), Mathematical Biosciences -% c: preprocessed data for the sequence alignment -% pgPart: partition which assign sequences to pregroups -% pgDist: distances between the pregroups -% (1,2)(1,3)(1,4)...(2,3)(2,4).....(3,4)...(n-1,n) -% roundTypes: array of operation types - -% Lu Cheng -% 21.03.2012 - -interactive = false; - -global PARTITION; -global CQ_COUNTS;global SUM_CQ_COUNTS; -global SP_COUNTS;global SUM_SP_COUNTS; -global CQ_PRIOR; global SP_PRIOR; -global LOGML_TABLE; -global ADDITION_DIFFERENCE; -global REMOVAL_DIFFERENCE; -global JOIN_DIFFERENCE; - -global LOC_SP_COUNTS; -global LOC_CQ_COUNTS; - -clearGlobalVars; - -nINDS = c.nSeq; -nPOPS = nMaxPops; - -% load pregroup information -nPregroup = length(unique(pgPart)); -if nPregroupworstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end -end - -% START SEARCH OF THE BEST PARTITION: - -vipu = zeros(1,14); -if interactive - roundTypes = input('Input steps: '); - if ischar(roundTypes), roundTypes = str2num(roundTypes); end -end -ready = 0; - - -while ready ~= 1 - -% disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - round = roundTypes(n); - moveCounter = 0; - - if round==1 && vipu(1)==0 % move an individual to another population - - pgInds = getMoveInds(pgPart,pgDist,nPregroup); % get pregroup inds to be moved - - for pgind = pgInds(:)' -% inds = cell2mat(pregroups(pgInds)); - tmpInds = pregroups{pgind}; - tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - - [maxChange, maxIndex] = max(tmpChanges); - if maxChange>1e-5 - updateGlobalVariables(tmpInds, maxIndex, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes,c.locCliques, c.locSeparators); -% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex) - logml = computeTotalLogml(); - moveCounter = moveCounter+length(pgInds); - vipu = zeros(1,14); - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - if moveCounter==0, vipu(1)=1; end - disp(['Step 1: ' num2str(moveCounter) ' pregroups were moved.']); - - elseif round==2 && vipu(2)==0 % join two populations - - update_join_difference(c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - [maxChange, aux] = max(JOIN_DIFFERENCE(:)); - [i1, i2] = ind2sub([nPOPS,nPOPS],aux); - - if maxChange>1e-5 - tmpInds = find(PARTITION==i1); - updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators); - logml = computeTotalLogml; - - disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']); - vipu = zeros(1,14); - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - else - disp('Step 2: no changes.'); - vipu(2)=1; - end - elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population - - pops = randperm(nPOPS); - - splitFlags = zeros(nPOPS,1); - for pop = pops(:)' - - maxChange = 0; - indsToBeMoved = []; - - inds2 = find(PARTITION==pop); - pgInds2 = unique(pgPart(inds2)); - nPgInds2 = length(unique(pgPart(inds2))); - if nPgInds2>4 - - if round==3 - dist3 = getDistance(pgInds2,pgDist,nPregroup); - npops2 = min(20, floor(nPgInds2 / 5)); - elseif round==4 - dist3 = getDistance(pgInds2,pgDist,nPregroup); - npops2 = 2; - end - - Z3 = linkage(dist3(:)','complete'); - T3 = cluster(Z3, 'maxclust', npops2); - - for i = 1:npops2 - indsX = pgInds2(T3==i); - indsX = cell2mat(pregroups(indsX)); - tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - [tmpMaxChange, tmpMaxPop] = max(tmpChanges); - if tmpMaxChange>maxChange - maxChange = tmpMaxChange; - % i1 = pop; - i2 = tmpMaxPop; - indsToBeMoved = indsX; - end - end - if maxChange>1e-5 - updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators); - logml = computeTotalLogml; - splitFlags(pop)=1; - - nnotEmptyPops = length(unique(PARTITION)); - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - end - if any(splitFlags) - disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']); - vipu = zeros(1,14); - else - disp(['Step ' num2str(round) ': no changes.']); - vipu(round)=1; - end - end - end - - if interactive - roundTypes = input('Input extra steps: '); - if ischar(roundTypes), roundTypes = str2num(roundTypes); end - else - roundTypes = []; - end - - if isempty(roundTypes) - ready = 1; - end -end - -% disp(' '); -disp('BEST PARTITION: '); -% disp(['Partition: ' num2str(PARTITION')]); -disp(['Nclusters: ' num2str(length(unique(PARTITION)))]); -disp(['Log(ml): ' num2str(logml)]); -disp(' '); - -nPOPS = rmEmptyPopulation(c.locCliques, c.locSeparators); - -logmldiff = zeros(nPregroup,nPOPS); % the change of logml if pregroup i is moved to group j -for i=1:nPregroup - tmpInds = pregroups{i}; - tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ... - c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml); - logmldiff(i,:) = tmpChanges'; -end -logmldiff(isnan(logmldiff))=0; - -partition = zeros(nPregroup,1); -for i=1:nPregroup - partition(i)=unique(PARTITION(pgPart==i)); -end - -%---------------------------------------------------------------------------- - - -function [dist2, dind1, dind2] = getDistance(inds2, origDist, ninds) -% pick out the distrances between samples in "inds2" from "origDist" -% origDist specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds) -% Lu Cheng, 22.06.2011 - -if ~issorted(inds2) - error('inds2 is not in ascending order!'); -end - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -irow = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(irow, 1) = inds2(i); - apu(irow, 2) = inds2(j); - irow = irow+1; - end -end - -dind1 = apu(:,1); -dind2 = apu(:,2); - -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = origDist(apu); - -%--------------------------------------------------------------- - - -function inds = getMoveInds(pgPart, pgDist, nPregroup) -% get pregroup indexs to be moved to another cluster -% we always take the 35% pregroups of each cluster which are most distant -% to each other -% Lu Cheng, 22.06.2011 - -global PARTITION; - -pops = unique(PARTITION); -inds = []; - -for tmpPop = pops(:)' - tmpInds = unique(pgPart(PARTITION==tmpPop)); - - if(length(tmpInds)<20) - inds = [inds tmpInds(:)']; %#ok - continue; - end - - [tmpDist, dind1, dind2] = getDistance(tmpInds,pgDist,nPregroup); - tmpVal = quantile(tmpDist,0.65); - tmpInds2 = find(tmpDist>tmpVal); - tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2))); - inds = [inds tmpInds3(:)']; %#ok -end - - -% ------------------------------------------------------------ - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. -global PARTITION; - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - - - diff --git a/matlab/spatial/private/preprocAln.m b/matlab/spatial/private/preprocAln.m deleted file mode 100644 index 517791b..0000000 --- a/matlab/spatial/private/preprocAln.m +++ /dev/null @@ -1,97 +0,0 @@ -function c = preprocAln(alnMat) -% This function preprocess the alignment matrix to cliques and separators -% Lu Cheng, 24.05.2011 - -[nSeq nLoci] = size(alnMat); - -alnCell = mat2cell(alnMat,nSeq,ones(1,nLoci)); - -arrUniqBase = cellfun(@unique,alnCell,'UniformOutput',false); % unique base at each loci -arrUniqBaseNum = cellfun(@length,arrUniqBase); - -arrCqNum = arrUniqBaseNum(1:end-2).*arrUniqBaseNum(2:end-1).*arrUniqBaseNum(3:end); -arrSpNum = arrUniqBaseNum(2:end-2).*arrUniqBaseNum(3:end-1); - -nMaxCqCodes = max(arrCqNum); -nMaxSpCodes = max(arrSpNum); - -cqCodes = cellfun(@myProd,arrUniqBase(1:end-2),arrUniqBase(2:end-1),arrUniqBase(3:end), ... - 'UniformOutput',false); -spCodes = cellfun(@myProd,arrUniqBase(2:end-2),arrUniqBase(3:end-1), ... - 'UniformOutput',false); - -cqData = zeros(nSeq,length(cqCodes)); -spData = zeros(nSeq,length(spCodes)); - -cqCounts = zeros(nMaxCqCodes,length(cqCodes)); -spCounts = zeros(nMaxSpCodes,length(spCodes)); - -cqPrior = ones(nMaxCqCodes,length(cqCodes)); -spPrior = ones(nMaxSpCodes,length(spCodes)); - -for i=1:nLoci-2 - - nCodeTmp = size(cqCodes{i},1); - for j=1:nCodeTmp - tmpInds = ismember(alnMat(:,i:i+2),cqCodes{i}(j,:),'rows'); - cqData(tmpInds,i) = j; - cqCounts(j,i) = sum(tmpInds); - end - - cqPrior(1:nCodeTmp,i) = 1/nCodeTmp; - - if i==1 - continue; - end - - k=i-1; - nCodeTmp = size(spCodes{k},1); - for j=1:nCodeTmp - tmpInds = ismember(alnMat(:,i:i+1),spCodes{k}(j,:),'rows'); - spData(tmpInds,k) = j; - spCounts(j,k) = sum(tmpInds); - end - - spPrior(1:nCodeTmp,k) = 1/nCodeTmp; -end - -c.nSeq = nSeq; -% c.alnMat = alnMat; - -c.arrUniqBase = arrUniqBase; -c.arrUniqBaseNum = arrUniqBaseNum; - -c.nMaxCqCodes = nMaxCqCodes; -c.nMaxSpCodes = nMaxSpCodes; - -c.cqCodes = cqCodes; -c.spCodes = spCodes; - -c.cqData = cqData; -c.spData = spData; - -c.cqCounts = cqCounts; -c.spCounts = spCounts; - -c.cqPrior = cqPrior; -c.spPrior = spPrior; - - -function y = myProd(varargin) -% calculate the cartesian product for the input -% Lu Cheng, 24.05.2011 - -if nargin==2 - set1 = varargin{1}; - set2 = varargin{2}; - [t1 t2] = meshgrid(set1,set2); - y = [t1(:) t2(:)]; -elseif nargin==3 - set1 = varargin{1}; - set2 = varargin{2}; - set3 = varargin{3}; - [t1 t2 t3] = meshgrid(set1,set2,set3); - y = [t1(:) t2(:) t3(:)]; -else - y = []; -end \ No newline at end of file diff --git a/matlab/spatial/private/rmEmptyPopulation.m b/matlab/spatial/private/rmEmptyPopulation.m deleted file mode 100644 index 93f3b95..0000000 --- a/matlab/spatial/private/rmEmptyPopulation.m +++ /dev/null @@ -1,41 +0,0 @@ -function [npops notEmpty] = rmEmptyPopulation(locCliques,locSeparators) -% remove empty populations from CQ_COUNTS and SUM_CQ_COUNTS, SP_COUNTS, -% SUM_SP_COUNTS -% update PARTITION -% Lu Cheng, 15.12.2012 - -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global PARTITION; - -global LOGML_TABLE; -global ADDITION_DIFFERENCE; -global JOIN_DIFFERENCE; - -global LOC_CQ_COUNTS; -global LOC_SP_COUNTS; - -notEmpty = find(any(SUM_CQ_COUNTS,1) & any(SUM_SP_COUNTS,1)); - -CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); -SP_COUNTS = SP_COUNTS(:,:,notEmpty); - -SUM_CQ_COUNTS = SUM_CQ_COUNTS(:,notEmpty); -SUM_SP_COUNTS = SUM_SP_COUNTS(:,notEmpty); - -LOGML_TABLE = LOGML_TABLE(notEmpty); -ADDITION_DIFFERENCE = ADDITION_DIFFERENCE(:,notEmpty); -JOIN_DIFFERENCE = JOIN_DIFFERENCE(notEmpty,notEmpty); - -for i=1:length(notEmpty) - apu = (PARTITION==notEmpty(i)); - PARTITION(apu)=i; -end - -npops = length(notEmpty); - -[cliqcounts, sepcounts] = computeCounts(locCliques, locSeparators, npops); - -LOC_CQ_COUNTS = cliqcounts; -LOC_SP_COUNTS = sepcounts; - diff --git a/matlab/spatial/private/updateGlobalVariables.m b/matlab/spatial/private/updateGlobalVariables.m deleted file mode 100644 index 154adf7..0000000 --- a/matlab/spatial/private/updateGlobalVariables.m +++ /dev/null @@ -1,61 +0,0 @@ -function updateGlobalVariables(inds, i2, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators) -% this function moves the samples specified by "inds" to cluser i2 -% then update all the global variables, "inds" are supposed to come from the -% same cluster -% Lu Cheng, 15.12.2012 - -global PARTITION; -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global ADDITION_DIFFERENCE; -global REMOVAL_DIFFERENCE; -global JOIN_DIFFERENCE; - -global LOC_SP_COUNTS; -global LOC_CQ_COUNTS; - -i1 = PARTITION(inds(1)); -PARTITION(inds)=i2; - -[diffCqCounts diffCqSumCounts]= computeDiffInCounts(inds, cqData, nCqCodes); -[diffSpCounts diffSpSumCounts]= computeDiffInCounts(inds, spData, nSpCodes); - -diffLocCqCounts = computeDiffInCliqCounts(locCliques, inds); -diffLocSpCounts = computeDiffInCliqCounts(locSeparators, inds); - -CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts; -SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts; - -SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts; -SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts; - -LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts; -LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts; - -CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts; -SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts; - -SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts; -SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts; - -LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts; -LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts; - -updateLogmlTable([i1 i2]); - -REMOVAL_DIFFERENCE(PARTITION==i1) = nan; -REMOVAL_DIFFERENCE(PARTITION==i2) = nan; -ADDITION_DIFFERENCE(:,[i1 i2]) = nan; - -JOIN_DIFFERENCE(:,i2) = nan; -JOIN_DIFFERENCE(i2,:) = nan; - -if ~any(PARTITION==i1) - % i1 became empty - JOIN_DIFFERENCE(:,i1) = 0; - JOIN_DIFFERENCE(i1,:) = 0; - JOIN_DIFFERENCE(i1,i1) = nan; -else - JOIN_DIFFERENCE(:,i1) = nan; - JOIN_DIFFERENCE(i1,:) = nan; -end \ No newline at end of file diff --git a/matlab/spatial/private/updateLogmlTable.m b/matlab/spatial/private/updateLogmlTable.m deleted file mode 100644 index 65d7fd5..0000000 --- a/matlab/spatial/private/updateLogmlTable.m +++ /dev/null @@ -1,37 +0,0 @@ -function updateLogmlTable(pops) -% Updates global variables LOGML_TABLE, npops*1 array, logml values for -% each population given in "pops" -% After the updates, the values are based on the current values of the -% global variables CQ_COUNTS, SUM_CQ_COUNTS, SP_COUNTS, SUM_SP_COUNTS -% Lu Cheng, 25.05.2011 - -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global CQ_PRIOR; global SP_PRIOR; - -global LOGML_TABLE; - -tmpN = length(pops); -tmpCqPrior = repmat(CQ_PRIOR,[1 1 tmpN]); -tmpSpPrior = repmat(SP_PRIOR,[1 1 tmpN]); - -term1 = 0-gammaln(1+SUM_CQ_COUNTS(:,pops)); -term2 = sum(gammaln(tmpCqPrior+CQ_COUNTS(:,:,pops))-gammaln(tmpCqPrior) , 1); -if length(pops) > 1 - term2 = squeeze(term2); -else - term2 = term2'; -end - -term3 = 0-gammaln(1+SUM_SP_COUNTS(:,pops)); -term4 = sum(gammaln(tmpSpPrior+SP_COUNTS(:,:,pops))-gammaln(tmpSpPrior) , 1); - -if length(pops) > 1 - term4 = squeeze(term4); -else - term4 = term4'; -end - -LOGML_TABLE(pops) = sum(term1+term2) - sum(term3+term4); - -%---------------------------------------------------------------------- \ No newline at end of file diff --git a/matlab/spatial/private/update_difference_tables.m b/matlab/spatial/private/update_difference_tables.m deleted file mode 100644 index 2094684..0000000 --- a/matlab/spatial/private/update_difference_tables.m +++ /dev/null @@ -1,92 +0,0 @@ -function update_difference_tables(ind, cqData, nCqLetter, ... - spData, nSpLetter, locCliques, locSeparators,logml) -% update ADDITION_DIFFERENCE and REMOVAL_DIFFERENCE -% Lu Cheng, 15.12.2012 - -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global PARTITION; -global ADDITION_DIFFERENCE; -global REMOVAL_DIFFERENCE; - -global LOC_CQ_COUNTS; -global LOC_SP_COUNTS; - -rem_old = REMOVAL_DIFFERENCE; -add_old = ADDITION_DIFFERENCE; - -[diffCqCounts diffCqSumCounts] = computeDiffInCounts(ind, cqData, nCqLetter); -[diffSpCounts diffSpSumCounts] = computeDiffInCounts(ind, spData, nSpLetter); -diffLocCqCounts = computeDiffInCliqCounts(locCliques, ind); -diffLocSpCounts = computeDiffInCliqCounts(locSeparators, ind); - -i1 = PARTITION(ind); - -if isnan(rem_old(ind)) - % Update removal difference for the individual: - % note that we did NOT add the removed item to other clusters - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts; - - SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts; - SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts; - - LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts; - LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts; - -% PARTITION(ind) = -1; - updateLogmlTable(i1); - logml_new = computeTotalLogml(); - rem_old(ind) = logml_new-logml; - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts; - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts; - - SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts; - SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts; - - LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts; - LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts; - -% PARTITION(ind) = i1; - updateLogmlTable(i1); -end - -new_pops = isnan(add_old(ind,:)); -new_pops(i1) = 0; % Own cluster needs never be calculated. -new_pops = find(new_pops); - -for i2 = new_pops(:)' - % Update addition differences for the individual: - % note that we did NOT remove the item - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts; - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts; - - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts; - -% PARTITION(ind) = i2; - updateLogmlTable(i2); - logml_new = computeTotalLogml(); - add_old(ind,i2) = logml_new - logml; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts; - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts; - - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts; - -% PARTITION(ind) = i1; - updateLogmlTable(i2); -end - -REMOVAL_DIFFERENCE = rem_old; -ADDITION_DIFFERENCE = add_old; - -%--------------------------------------------------------------------- \ No newline at end of file diff --git a/matlab/spatial/private/update_join_difference.m b/matlab/spatial/private/update_join_difference.m deleted file mode 100644 index 41e8449..0000000 --- a/matlab/spatial/private/update_join_difference.m +++ /dev/null @@ -1,83 +0,0 @@ -function update_join_difference(cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml) -% update JOIN_DIFFERENCE -% Lu Cheng, 15.12.2012 - -global CQ_COUNTS; global SUM_CQ_COUNTS; -global SP_COUNTS; global SUM_SP_COUNTS; -global PARTITION; -global JOIN_DIFFERENCE; - -global LOC_CQ_COUNTS; -global LOC_SP_COUNTS; - -npops = size(CQ_COUNTS,3); - -for i1 = 1:npops-1 - indsToBeMoved = find(PARTITION==i1); - if isempty(indsToBeMoved) - % Cluster i1 is empty - JOIN_DIFFERENCE(i1,(i1+1):npops) = 0; - JOIN_DIFFERENCE((i1+1):npops,i1) = 0; - else - [diffCqCounts diffCqSumCounts] = computeDiffInCounts(indsToBeMoved, cqData, nCqCodes); - [diffSpCounts diffSpSumCounts] = computeDiffInCounts(indsToBeMoved, spData, nSpCodes); - diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved); - diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved); - - unknown_pops = find(isnan(JOIN_DIFFERENCE(i1,(i1+1):end))); - unknown_pops = unknown_pops+i1; - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts; - SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts; - - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts; - SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts; - - LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts; - LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts; - -% PARTITION(indsToBeMoved) = -1; - updateLogmlTable(i1); - - for i2 = unknown_pops - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts; - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts; - - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts; - -% PARTITION(indsToBeMoved) = i2; - updateLogmlTable(i2); - logml_new = computeTotalLogml(); - JOIN_DIFFERENCE(i1,i2) = logml_new-logml; - JOIN_DIFFERENCE(i2,i1) = logml_new-logml; - - CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts; - SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts; - - SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts; - SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts; - - LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts; - LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts; - -% PARTITION(indsToBeMoved) = -1; - updateLogmlTable(i2); - end - - CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts; - SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts; - - SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts; - SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts; - - LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts; - LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts; - -% PARTITION(indsToBeMoved) = i1; - updateLogmlTable(i1); - end -end \ No newline at end of file diff --git a/matlab/spatial/spatialMix.m b/matlab/spatial/spatialMix.m deleted file mode 100644 index fec57eb..0000000 --- a/matlab/spatial/spatialMix.m +++ /dev/null @@ -1,1512 +0,0 @@ -function [logml, npops, partitionSummary]=spatialMix(c,npopsTaulu) -% Greedy search algorithm with unknown number of classes for spatial -% clustering. - -logml = 1; - -global PARTITION; global COUNTS; -global SUMCOUNTS; -global SEPCOUNTS; global CLIQCOUNTS; -global LOGDIFF; -clearGlobalVars; - -data = c.data; rowsFromInd = c.rowsFromInd; -noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; -cliques = c.cliques; separators = c.separators; rows = c.rows; - -if isfield(c, 'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - - -if nargin < 2; - npopstext = []; - ready = false; - teksti = 'Input upper bound to the number of populations (possibly multiple values): '; - while ready == false - npopstextExtra = inputdlg(teksti ,... - 'Input maximum number of populations',1,{'20'}); - drawnow - if isempty(npopstextExtra) % Painettu Cancel:ia - return - end - npopstextExtra = npopstextExtra{1}; - if length(npopstextExtra)>=255 - npopstextExtra = npopstextExtra(1:255); - npopstext = [npopstext ' ' npopstextExtra]; - teksti = 'The input field length limit (255 characters) was reached. Input more values: '; - else - npopstext = [npopstext ' ' npopstextExtra]; - ready = true; - end - end - clear ready; clear teksti; - if isempty(npopstext) | length(npopstext)==1 - return - else - npopsTaulu = str2num(npopstext); - ykkoset = find(npopsTaulu==1); - npopsTaulu(ykkoset) = []; % Mikäli ykkösiä annettu ylärajaksi, ne poistetaan. - if isempty(npopsTaulu) - return - end - clear ykkoset; - end -end - -nruns = length(npopsTaulu); - -maxnpops = max(npopsTaulu); -logmlBest = -1e50; -partitionSummary = -1e50*ones(30,2,maxnpops); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1,:) = zeros(30,1,maxnpops); -worstLogml = -1e50*ones(1, maxnpops); worstIndex = ones(1, maxnpops); - - -initData = data; -data = initData(:, 1:end-1); - -for run = 1:nruns - npops = npopsTaulu(run); - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - disp(' '); - disp('Computing initial partition'); - - [initialPartition, counts, sumcounts] = initSpatialMultiMixture(initData, ... - npops, Z, rows, noalle, dist, adjprior, priorTerm,0); - - PARTITION = initialPartition; - [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - - COUNTS = zeros(max(noalle), size(data,2),npops); SUMCOUNTS = zeros(npops,size(data,2)); - COUNTS(:,:,1:size(counts,3)) = counts; SUMCOUNTS(1:size(sumcounts,1),:) = sumcounts; - CLIQCOUNTS = cliqcounts; SEPCOUNTS = sepcounts; - - ninds = length(PARTITION); - %maxsize=max([max(noalle) npops]); - %initializeGammaln(ninds, rowsFromInd, maxsize); - logml = computeLogml(adjprior, priorTerm); - - LOGDIFF = repmat(-Inf,ninds,npops); - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - - clear initialPartition; clear counts; clear sumcounts; - clear cliqcounts; clear sepcounts; - - % PARHAAN MIXTURE-PARTITION ETSIMINEN - roundTypes = [1]; %Ykkösvaiheen sykli kahteen kertaan. - nRoundTypes = 7; - kokeiltu = zeros(nRoundTypes, 1); - ready = 0; vaihe = 1; - - - disp(['Mixture analysis started with initial ' num2str(nnotEmptyPops) ' populations.']); - - while ready ~= 1 - muutoksia = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - - round = roundTypes(n); - - if kokeiltu(round) == 1 %Askelta kokeiltu viime muutoksen jälkeen - - elseif round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon. - - inds = randperm(ninds); - muutosNyt = 0; - for ind = inds - i1 = PARTITION(ind); - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - - if round==1, [maxMuutos, i2] = max(muutokset); - elseif round==0, [maxMuutos, i2] = arvoSeuraavaTila(muutokset, logml); - end - - if (i1~=i2 & maxMuutos>1e-5) - % Tapahtui muutos - muutoksia = 1; - if muutosNyt == 0 - disp('action 1'); - muutosNyt = 1; - kokeiltu = zeros(nRoundTypes,1); - end - updateGlobalVariables(ind, i2, diffInCounts,... - cliques, separators, adjprior, priorTerm); - logml = logml+maxMuutos; - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - if muutosNyt == 0 - kokeiltu(round) = 1; - end - - - elseif round==2 %Populaation yhdistäminen toiseen. - maxMuutos = 0; - - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - - - if maxMuutos > 1e-5 - disp('action 2'); - muutoksia = 1; - kokeiltu = zeros(nRoundTypes,1); - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - cliques, separators, adjprior, priorTerm); - logml=logml + maxMuutos; - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - else - kokeiltu(round) = 1; - end - - - elseif round==3 | round==4 %Populaation jakaminen osiin. - maxMuutos = 0; - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>5 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop, logml, cliques, separators); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - if round==3 - disp('action 3'); - else - disp('action 4'); - end - kokeiltu = zeros(nRoundTypes,1); - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - i1 = PARTITION(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); - logml = logml + maxMuutos; - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); end - end - else - kokeiltu(round)=1; - end - - elseif round == 5 | round == 6 - j=0; - muutettu = 0; - %poplogml = POP_LOGML; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - cliqcounts = CLIQCOUNTS; - sepcounts = SEPCOUNTS; - logdiff = LOGDIFF; - - pops = randperm(npops); - while (j < npops & muutettu == 0) - j = j+1; - pop = pops(j); - totalMuutos = 0; - inds = find(PARTITION==pop); - if round == 5 - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, ... - rows, data, adjprior, priorTerm); - end - - i=0; - - while (length(inds) > 0 & i < length(inds)) - i = i+1; - ind =inds(i); - - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - muutokset(pop) = -1e50; % Varmasti ei suurin!!! - [maxMuutos, i2] = max(muutokset); - - updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); - - totalMuutos = totalMuutos+maxMuutos; - logml = logml+maxMuutos; - if round == 6 - % Lopetetaan heti kun muutos on positiivinen. - if totalMuutos > 1e-5 - i=length(inds); - end - end - end - - if totalMuutos>1e-5 - if round == 5 - disp('action 5'); - elseif round == 6 - disp('action 6'); - end - kokeiltu = zeros(nRoundTypes,1); - muutettu=1; - muutoksia = 1; % Ulompi kirjanpito. - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - else - % Missään vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION = partition; - SUMCOUNTS = sumcounts; - %POP_LOGML = poplogml; - COUNTS = counts; - logml = logml - totalMuutos; - CLIQCOUNTS = cliqcounts; - SEPCOUNTS = sepcounts; - LOGDIFF = logdiff; - kokeiltu(round)=1; - end - end - clear partition; clear sumcounts; clear counts; - clear cliqcounts; clear sepcounts; - - elseif round == 7 - emptyPop = findEmptyPop(npops); - j = 0; - pops = randperm(npops); - muutoksiaNyt = 0; - if emptyPop == -1 - j = npops; - end - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - sumcounts = SUMCOUNTS; - counts = COUNTS; - cliqcounts = CLIQCOUNTS; - sepcounts = SEPCOUNTS; - oldLogml = logml; - logdiff = LOGDIFF; - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - T2 = cluster_own(Z2, 2); - muuttuvat = inds2(find(T2==1)); - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop, cliques, separators); - - logml = computeLogml(adjprior, priorTerm); - - muutettu = 1; - while (muutettu == 1) - muutettu = 0; - % Siirretään yksilöitä populaatioiden välillä - muutokset = laskeMuutokset5(inds2, rows, data, ... - adjprior, priorTerm, logml, cliques, separators, pop, emptyPop); - - [maxMuutos, indeksi] = max(muutokset); - muuttuva = inds2(indeksi); - - if (PARTITION(muuttuva) == pop) - i2 = emptyPop; - else - i2 = pop; - end - - if maxMuutos > 1e-5 - rivit = rows(muuttuva,1):rows(muuttuva,2); - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - updateGlobalVariables3(muuttuva, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); - muutettu = 1; - logml = logml + maxMuutos; - end - - end - - if logml > oldLogml + 1e-5 - muutoksia = 1; - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); end - end - if muutoksiaNyt == 0 - disp('action 7'); - muutoksiaNyt = 1; - end - kokeiltu = zeros(nRoundTypes,1); - j = npops; % Lopetetaan, koska muutos syntyi - else - %palutetaan vanhat arvot - PARTITION = partition; - SUMCOUNTS = sumcounts; - COUNTS = counts; - CLIQCOUNTS = cliqcounts; - SEPCOUNTS = sepcounts; - LOGDIFF = logdiff; - logml = oldLogml; - end - - end - - end - - if muutoksiaNyt == 0 - kokeiltu(round)=1; - end - - end - end - - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - vaihe = 4; - elseif vaihe==4; - vaihe = 5; - elseif vaihe==5 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes=[2 1]; - elseif vaihe==3 - roundTypes=[5 5 7]; - elseif vaihe==4 - roundTypes=[4 3 1]; - elseif vaihe==5 - roundTypes=[6 2 7 3 4 1]; - end - end - end - - % TALLENNETAAN - - npops = poistaTyhjatPopulaatiot(npops); - %POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - if logml>logmlBest - % Päivitetään parasta löydettyä partitiota. - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - countsBest = COUNTS; - sumCountsBest = SUMCOUNTS; - %pop_logmlBest = POP_LOGML; - cliqCountsBest = CLIQCOUNTS; - sepCountsBest = SEPCOUNTS; - logdiffbest = LOGDIFF; - end -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -COUNTS = countsBest; -SUMCOUNTS = sumCountsBest; -%POP_LOGML = pop_logmlBest; -CLIQCOUNTS = cliqCountsBest; -SEPCOUNTS = sepCountsBest; -LOGDIFF = logdiffbest; - -%------------------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -%global POP_LOGML; POP_LOGML = []; -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksilö ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -global LOGDIFF; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksessä siten, että ensimmäisenä on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilöt inds siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättävä -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; -global LOGDIFF; - -npops = size(COUNTS,3); -muutokset = LOGDIFF(ind,:); - -counts = COUNTS; -sumcounts = SUMCOUNTS; - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); -muutokset(i1) = 0; - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -i2 = sort(i2); -laskematta = find(muutokset==-Inf); -i2 = intersect(i2,laskematta); - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -COUNTS = counts; -SUMCOUNTS = sumcounts; -LOGDIFF(ind,:) = muutokset; - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -ninds = length(inds); - -rows = []; -for i = 1:ninds - rows = [rows globalRows(inds(i),1):globalRows(inds(i),2)]; -end -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mikä olisi muutos logml:ssä, jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - rows = [rows; (globalRows(ind,1):globalRows(ind,2))']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- -function muutokset = laskeMuutokset5(inds, globalRows, data, ... - adjprior, priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä. - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -for i = 1:ninds - ind = inds(i); - - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - if PARTITION(ind)==i1 - pop1 = i1; %mistä - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveillä rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops) - -global PARTITION; -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteenä -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättävä sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm) - -%global GAMMA_LN; -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -notEmpty = any(CLIQCOUNTS); -npops = length(find(notEmpty == 1)); -sumcliq=sum(CLIQCOUNTS, 2); -sumsep=sum(SEPCOUNTS, 2); -ncliq = size(CLIQCOUNTS, 1); -nsep = size(SEPCOUNTS, 1); - -cliqsizes = sum(CLIQCOUNTS, 2)'; -sepsizes = sum(SEPCOUNTS, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - - -%klikkitn = sum(sum(gammaln(CLIQCOUNTS + 1/npops))) ... -% - ncliq*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumcliq + 1)); -%septn = sum(sum(gammaln(SEPCOUNTS + 1/npops))) ... -% - nsep*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -%if spatialPrior > 0 -% keyboard -%end - - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; - -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -CLIQCOUNTS = cliqcounts; -SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; - -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); diff --git a/matlab/spatial/spatialMix_fixK.m b/matlab/spatial/spatialMix_fixK.m deleted file mode 100644 index 2fb1276..0000000 --- a/matlab/spatial/spatialMix_fixK.m +++ /dev/null @@ -1,1621 +0,0 @@ -function [logml, npops, partitionSummary]=spatialMix(c,npops,nruns) -% Greedy search algorithm with fixed number of classes for spatial -% clustering. - -logml = 1; npops=1; partitionSummary=1; - -global PARTITION; global COUNTS; -global SUMCOUNTS; -global SEPCOUNTS; global CLIQCOUNTS; -global LOGDIFF; -clearGlobalVars; - -data = c.data; rowsFromInd = c.rowsFromInd; -noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; -cliques = c.cliques; separators = c.separators; rows = c.rows; - -if isfield(c, 'dist') - dist = c.dist; Z = c.Z; -end - -clear c; - - -if nargin < 2; - npopstext = []; - teksti = {'Number of populations:', ... - 'Number of runs:'}; - def = {'20', '1'}; - - npopstextExtra = inputdlg(teksti ,... - 'Input parameters for the computation algorithm',1,def); - - if isempty(npopstextExtra) % cancel has been pressed - dispCancel - logml = 1; partitionSummary=1; npops=1; - return - end - npopstext = npopstextExtra{1}; - nrunstext = npopstextExtra{2}; - - clear teksti npopstextExtra; - if isempty(npopstext) - logml = 1; partitionSummary=1; npops=1; - return - else - npopsTable = str2num(npopstext); - npops = npopsTable(1); - - if npops==1 - logml = 1; partitionSummary=1; npops=1; - return - end - nrunsTable = str2num(nrunstext); - nruns = nrunsTable(1); - end -end - -maxnpops = npops; -logmlBest = -1e50; -partitionSummary = -1e50*ones(30,2,maxnpops); % Tiedot 30 parhaasta partitiosta (npops ja logml) -partitionSummary(:,1,:) = zeros(30,1,maxnpops); -worstLogml = -1e50*ones(1, maxnpops); worstIndex = ones(1, maxnpops); - -initData = data; -data = initData(:, 1:end-1); - -for run = 1:nruns - dispLine; - disp(['Run ' num2str(run) '/' num2str(nruns) ... - ', maximum number of populations ' num2str(npops) '.']); - disp(' '); - disp('Computing initial partition'); - - [initialPartition, counts, sumcounts] = initSpatialMultiMixture(initData, ... - npops, Z, rows, noalle, dist, adjprior, priorTerm,1); - - PARTITION = initialPartition; - [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - - COUNTS = counts; SUMCOUNTS = sumcounts; - CLIQCOUNTS = cliqcounts; SEPCOUNTS = sepcounts; - - ninds = length(PARTITION); - %maxsize=max([max(noalle) npops]); - %initializeGammaln(ninds, rowsFromInd, maxsize); - logml = computeLogml(adjprior, priorTerm); - - LOGDIFF = repmat(-Inf,ninds,npops); - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - - clear initialPartition; clear counts; clear sumcounts; - clear cliqcounts; clear sepcounts; - - % PARHAAN MIXTURE-PARTITION ETSIMINEN - roundTypes = [1]; %Ykkösvaiheen sykli kahteen kertaan. - nRoundTypes = 7; - kokeiltu = zeros(nRoundTypes, 1); - ready = 0; vaihe = 1; - - - disp(['Mixture analysis started with initial ' num2str(nnotEmptyPops) ' populations.']); - - while ready ~= 1 - muutoksia = 0; - - disp(['Performing steps: ' num2str(roundTypes)]); - - for n = 1:length(roundTypes) - - round = roundTypes(n); - - if kokeiltu(round) == 1 %Askelta kokeiltu viime muutoksen jälkeen - - elseif round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon. - - inds = randperm(ninds); - muutosNyt = 0; - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - [maxMuutos, i2] = max(muutokset); - - if (i1~=i2 & maxMuutos>1e-5) - % Tapahtui muutos - muutoksia = 1; - if muutosNyt == 0 - disp('action 1'); - muutosNyt = 1; - kokeiltu = zeros(nRoundTypes,1); - end - updateGlobalVariables(ind, i2, diffInCounts,... - cliques, separators, adjprior, priorTerm); - logml = logml+maxMuutos; - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - end - end - end - if muutosNyt == 0 - kokeiltu(round) = 1; - end - - - elseif round==2 %Populaation yhdistäminen toiseen. - maxMuutos = -1e50; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - cliqcounts = CLIQCOUNTS; - sepcounts = SEPCOUNTS; - logdiff = LOGDIFF; - - % Two populations are merged at first. - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - muutokset(pop)=-1e50; - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - - totalMuutos = maxMuutos; - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - cliques, separators, adjprior, priorTerm); - - % A new population is formed from a part of another - emptyPop = i1; - maxMuutos = -1e50; - ninds = size(rows,1); - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>1 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - npops2 = 2; %Moneenko osaan jaetaan - T2 = cluster_own(Z2, npops2); - muutokset = laskeMuutokset3(T2, inds2, rows, data, ... - adjprior, priorTerm, pop, logml+totalMuutos, cliques, separators); - isoin = muutokset(1,emptyPop); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvat = inds2(find(T2==1)); - end - end - end - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit; lisa']; - %rivit = [rivit; rows(ind)']; - end - diffInCounts = computeDiffInCounts(rivit', size(COUNTS,1), ... - size(COUNTS,2), data); - pop = PARTITION(muuttuvat(1)); - inds2 = find(PARTITION == pop); - - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop, cliques, separators); - totalMuutos = totalMuutos + maxMuutos; - - % Individuals are moved between the populations. - inds = randperm(ninds); - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml+totalMuutos, cliques, separators); - - [maxMuutos, i2] = max(muutokset); - if (i1~=i2 & maxMuutos>1e-5) - updateGlobalVariables(ind, i2, diffInCounts,... - cliques, separators, adjprior, priorTerm); - totalMuutos = totalMuutos+maxMuutos; - end - end - end - - if totalMuutos>1e-5 - muutoksia = 1; - kokeiltu = zeros(nRoundTypes,1); - logml = logml + totalMuutos; - nnotEmptyPops = length(unique(PARTITION)); - disp('Action 2'); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - - else - PARTITION = partition; - SUMCOUNTS = sumcounts; - COUNTS = counts; - CLIQCOUNTS = cliqcounts; - SEPCOUNTS = sepcounts; - LOGDIFF = logdiff; - kokeiltu(round) = 1; - end - - elseif round==3 | round==4 % Splitting a population - maxMuutos = 0; - for pop = 1:npops - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2>5 - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - if round==3 - npops2 = max(min(20, floor(ninds2/2)),2); %Moneenko osaan jaetaan - elseif round==4 - npops2 = 2; - end - T2 = cluster_own(Z2, npops2); - - muutokset = laskeMuutokset3(T2, inds2, rows, data, ...7 - adjprior, priorTerm, pop, logml, cliques, separators); - [isoin, indeksi] = max(muutokset(1:end)); - if isoin>maxMuutos - maxMuutos = isoin; - muuttuvaPop2 = rem(indeksi,npops2); - if muuttuvaPop2==0, muuttuvaPop2 = npops2; end - muuttuvat = inds2(find(T2==muuttuvaPop2)); - i2 = ceil(indeksi/npops2); - end - end - end - if maxMuutos>1e-5 - muutoksia = 1; - if round==3 - disp('action 3'); - else - disp('action 4'); - end - kokeiltu = zeros(nRoundTypes,1); - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - i1 = PARTITION(muuttuvat(1)); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); - logml = logml + maxMuutos; - - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); end - end - else - kokeiltu(round)=1; - end - - elseif round == 5 | round == 6 - j=0; - muutettu = 0; - %poplogml = POP_LOGML; - partition = PARTITION; - counts = COUNTS; - sumcounts = SUMCOUNTS; - cliqcounts = CLIQCOUNTS; - sepcounts = SEPCOUNTS; - logdiff = LOGDIFF; - - pops = randperm(npops); - while (j < npops & muutettu == 0) - j = j+1; - pop = pops(j); - totalMuutos = 0; - inds = find(PARTITION==pop); - if round == 5 - aputaulu = [inds rand(length(inds),1)]; - aputaulu = sortrows(aputaulu,2); - inds = aputaulu(:,1)'; - elseif round == 6 - inds = returnInOrder(inds, pop, ... - rows, data, adjprior, priorTerm); - end - - i=0; - - while (length(inds) > 0 & i < length(inds) - 1) % last ind is not removed from population - i = i+1; - ind =inds(i); - - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - muutokset(pop) = -1e50; % Varmasti ei suurin!!! - [maxMuutos, i2] = max(muutokset); - - updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); - - totalMuutos = totalMuutos+maxMuutos; - logml = logml+maxMuutos; - if round == 6 - % Lopetetaan heti kun muutos on positiivinen. - if totalMuutos > 1e-5 - i=length(inds); - end - end - end - - if totalMuutos>1e-5 - if round == 5 - disp('action 5'); - elseif round == 6 - disp('action 6'); - end - kokeiltu = zeros(nRoundTypes,1); - muutettu=1; - muutoksia = 1; % Ulompi kirjanpito. - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); - end - end - else - % Missään vaiheessa tila ei parantunut. - % Perutaan kaikki muutokset. - PARTITION = partition; - SUMCOUNTS = sumcounts; - %POP_LOGML = poplogml; - COUNTS = counts; - logml = logml - totalMuutos; - CLIQCOUNTS = cliqcounts; - SEPCOUNTS = sepcounts; - LOGDIFF = logdiff; - kokeiltu(round)=1; - end - end - clear partition; clear sumcounts; clear counts; - clear cliqcounts; clear sepcounts; - - elseif round == 7 % A new population is formed and merged into another - emptyPop = npops + 1; - j = 0; - pops = randperm(npops); - muutoksiaNyt = 0; - while (j < npops) - j = j +1; - pop = pops(j); - inds2 = find(PARTITION==pop); - ninds2 = length(inds2); - if ninds2 > 5 - partition = PARTITION; - sumcounts = SUMCOUNTS; - counts = COUNTS; - cliqcounts = CLIQCOUNTS; - sepcounts = SEPCOUNTS; - oldLogml = logml; - logdiff = LOGDIFF; - - % a new population is formed temporarily - npops = npops + 1; - POP_LOGML(npops) = 0; - COUNTS(:,:,npops) = zeros(size(COUNTS(:,:,1))); - SUMCOUNTS(npops,:) = zeros(size(SUMCOUNTS(1,:))); - CLIQCOUNTS(:,npops) = zeros(size(CLIQCOUNTS(:,1))); - SEPCOUNTS(:,npops) = zeros(size(SEPCOUNTS(:,1))); - - dist2 = laskeOsaDist(inds2, dist, ninds); - Z2 = linkage(dist2'); - T2 = cluster_own(Z2, 2); - muuttuvat = inds2(find(T2==1)); - - rivit = []; - for i = 1:length(muuttuvat) - ind = muuttuvat(i); - lisa = rows(ind,1):rows(ind,2); - rivit = [rivit lisa]; - end - diffInCounts = computeDiffInCounts(rivit, size(COUNTS,1), ... - size(COUNTS,2), data); - updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, emptyPop, cliques, separators); - - logml = computeLogml(adjprior, priorTerm); - - % Individuals are moved between the populations. - inds = randperm(ninds); - for ind = inds - i1 = PARTITION(ind); - if length(find(PARTITION==i1))>1 - [muutokset, diffInCounts] = laskeMuutokset(ind, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - - [maxMuutos, i2] = max(muutokset); - if (i1~=i2 & maxMuutos>1e-5) - updateGlobalVariables(ind, i2, diffInCounts,... - cliques, separators, adjprior, priorTerm); - logml = logml + maxMuutos; - end - end - end - - % Merge two populations if the number of - % populations is too big. - if length(find(any(SUMCOUNTS,2))) == npops - maxMuutos = -1e50; - for pop = 1:npops - [muutokset, diffInCounts] = laskeMuutokset2(pop, rows, ... - data, adjprior, priorTerm, logml, cliques, separators); - muutokset(pop)=-1e50; - [isoin, indeksi] = max(muutokset); - if isoin>maxMuutos - maxMuutos = isoin; - i1 = pop; - i2 = indeksi; - diffInCountsBest = diffInCounts; - end - end - updateGlobalVariables2(i1,i2, diffInCountsBest, ... - cliques, separators, adjprior, priorTerm); - logml = logml + maxMuutos; - end - - if logml > oldLogml + 1e-5 - muutoksia = 1; - npops = poistaTyhjatPopulaatiot(npops); - nnotEmptyPops = length(unique(PARTITION)); - - if logml>worstLogml(nnotEmptyPops); - [partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ... - partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops)); - if (added==1) - [worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ... - min(partitionSummary(:,2,nnotEmptyPops)); end - end - if muutoksiaNyt == 0 - disp('action 7'); - muutoksiaNyt = 1; - end - kokeiltu = zeros(nRoundTypes,1); - j = npops; % Lopetetaan, koska muutos syntyi - else - %palutetaan vanhat arvot - PARTITION = partition; - SUMCOUNTS = sumcounts; - COUNTS = counts; - CLIQCOUNTS = cliqcounts; - SEPCOUNTS = sepcounts; - LOGDIFF = logdiff; - npops = npops-1; - logml = oldLogml; - if abs(logml-computeLogml(adjprior, priorTerm))>1e-5 - keyboard - end - end - - end - - end - - if muutoksiaNyt == 0 - kokeiltu(round)=1; - end - - end - - end - - - if muutoksia == 0 - if vaihe==1 - vaihe = 2; - elseif vaihe==2 - vaihe = 3; - elseif vaihe==3 - vaihe = 4; - elseif vaihe==4; - vaihe = 5; - elseif vaihe==5 - ready = 1; - end - else - muutoksia = 0; - end - - if ready==0 - if vaihe==1 - roundTypes=[1]; - elseif vaihe==2 - roundTypes=[2 1]; - elseif vaihe==3 - roundTypes=[5 5 7]; - elseif vaihe==4 - roundTypes=[4 3 1]; - elseif vaihe==5 - roundTypes=[6 2 7 3 4 1]; - end - end - end - - % TALLENNETAAN - - npops = poistaTyhjatPopulaatiot(npops); - %POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - disp(['Found partition with ' num2str(npops) ' populations.']); - disp(['Log(ml) = ' num2str(logml)]); - disp(' '); - - - - if logml>logmlBest - % Päivitetään parasta löydettyä partitiota. - logmlBest = logml; - npopsBest = npops; - partitionBest = PARTITION; - countsBest = COUNTS; - sumCountsBest = SUMCOUNTS; - %pop_logmlBest = POP_LOGML; - cliqCountsBest = CLIQCOUNTS; - sepCountsBest = SEPCOUNTS; - logdiffbest = LOGDIFF; - end -end - -logml = logmlBest; -npops = npopsBest; -PARTITION = partitionBest; -COUNTS = countsBest; -SUMCOUNTS = sumCountsBest; -%POP_LOGML = pop_logmlBest; -CLIQCOUNTS = cliqCountsBest; -SEPCOUNTS = sepCountsBest; -LOGDIFF = logdiffbest; - -%------------------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -%global POP_LOGML; POP_LOGML = []; -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; -global LOGDIFF; LOGDIFF = []; - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, että annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ssä ei vielä ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyistä partitiota vastaava nclusters:in arvo. Muutoin ei tehdä mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole vielä kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksilö ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -global LOGDIFF; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -LOGDIFF(:,[i1 i2]) = -Inf; -inx = [find(PARTITION==i1); find(PARTITION==i2)]; -LOGDIFF(inx,:) = -Inf; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksessä siten, että ensimmäisenä on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilöt inds siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättävä -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; -global LOGDIFF; - -npops = size(COUNTS,3); -muutokset = LOGDIFF(ind,:); - -counts = COUNTS; -sumcounts = SUMCOUNTS; - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); -muutokset(i1) = 0; - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -i2 = sort(i2); -laskematta = find(muutokset==-Inf); -i2 = intersect(i2,laskematta); - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -COUNTS = counts; -SUMCOUNTS = sumcounts; -LOGDIFF(ind,:) = muutokset; - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -ninds = length(inds); - -rows = []; -for i = 1:ninds - rows = [rows globalRows(inds(i),1):globalRows(inds(i),2)]; -end -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mikä olisi muutos logml:ssä, jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - rows = [rows; (globalRows(ind,1):globalRows(ind,2))']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- -function muutokset = laskeMuutokset5(inds, globalRows, data, ... - adjprior, priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi -% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä. - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -for i = 1:ninds - ind = inds(i); - - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - if PARTITION(ind)==i1 - pop1 = i1; %mistä - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveillä rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; -global LOGDIFF; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); -LOGDIFF = LOGDIFF(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops) - -global PARTITION; -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteenä -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättävä sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm,cliqcounts,sepcounts) - -%global GAMMA_LN; -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -if nargin < 3 - - notEmpty = any(CLIQCOUNTS); - npops = length(find(notEmpty == 1)); - sumcliq=sum(CLIQCOUNTS, 2); - sumsep=sum(SEPCOUNTS, 2); - ncliq = size(CLIQCOUNTS, 1); - nsep = size(SEPCOUNTS, 1); - - cliqsizes = sum(CLIQCOUNTS, 2)'; - sepsizes = sum(SEPCOUNTS, 2)'; - cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; - sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - - klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - - septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - -else - notEmpty = any(cliqcounts); - npops = length(find(notEmpty == 1)); - sumcliq=sum(cliqcounts, 2); - sumsep=sum(sepcounts, 2); - ncliq = size(cliqcounts, 1); - nsep = size(sepcounts, 1); - - cliqsizes = sum(cliqcounts, 2)'; - sepsizes = sum(sepcounts, 2)'; - cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; - sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - - klikkitn = sum(sum(gammaln(cliqcounts(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - - septn = sum(sum(gammaln(sepcounts(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); -end - - - -%klikkitn = sum(sum(gammaln(CLIQCOUNTS + 1/npops))) ... -% - ncliq*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumcliq + 1)); -%septn = sum(sum(gammaln(SEPCOUNTS + 1/npops))) ... -% - nsep*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -%if spatialPrior > 0 -% keyboard -%end - - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; - -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä. - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n= 1 - suurinYks = suurinYks+1; - end - if suurinYks<10 - mjono(7) = num2str(suurinYks); - mjono(6) = 'e'; - mjono(5) = palautaYks(abs(logml),suurinYks-1); - mjono(4) = '.'; - mjono(3) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(2) = '-'; - end - elseif suurinYks>=10 - mjono(6:7) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(logml),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(logml),suurinYks); - if logml<0 - mjono(1) = '-'; - end - end -end - -function digit = palautaYks(num,yks) -% palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on -% vähintään -1:n suuruinen. Pienemmillä -% luvuilla tapahtuu jokin pyöristysvirhe. - -if yks>=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -%CLIQCOUNTS = cliqcounts; -%SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm,cliqcounts,sepcounts); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhjiä -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; - -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - - -%-------------------------------------------------------------------------- - -function [sumcounts, counts, logml] = ... - initialCounts(partition, data, npops, rowsFromInd, noalle) - -nloci=size(data,2); -ninds = size(data,1)/rowsFromInd; - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); diff --git a/matlab/spatial/spatialMixture.m b/matlab/spatial/spatialMixture.m deleted file mode 100644 index 79afd5a..0000000 --- a/matlab/spatial/spatialMixture.m +++ /dev/null @@ -1,2609 +0,0 @@ -function spatialMixture -%Vaihtuvalla populaatioiden määräll? priori 3:lla -base = findobj('Tag','base_figure'); % added by jing 21.11.06 - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% output file name -OUTPUT_FILE = 'baps6_output.baps'; % also remember to update the file name in function WriteMixtureInfo - -% check whether partition compare mode is selected -h1 = findobj('Tag','partitioncompare_menu'); -partitionCompare = get(h1, 'userdata'); - -formatList = {'BAPS-format','FASTA-format', 'GenePop-format', 'Preprocessed data'}; -formatChoice = menu('Specify the format of your data: ','BAPS-format','FASTA-format', 'GenePop-format', 'Preprocessed data'); -if formatChoice==0 - return; -else - input_type = formatList{formatChoice}; -end - -switch input_type - -case 'BAPS-format' - %waitALittle; - setWindowOnTop(base,'false') - [filename1, pathname1] = uigetfile('*.txt', 'Load data in BAPS-format'); - if filename1==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - data = load([pathname1 filename1]); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('Incorrect Data-file.'); - return; - end - - %waitALittle; - setWindowOnTop(base,'false') - [filename2,pathname2]=uigetfile('*.txt', 'Load individual coordinates'); - if filename2==0 - return - end - - coordinates = load([pathname2 filename2]); - % viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - - input_pops = questdlg(['When using data which are in BAPS-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); - if isequal(input_pops,'Yes') - %waitALittle; - setWindowOnTop(base,'false') - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - %waitALittle; - setWindowOnTop(base,'false') - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - popnames = []; - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - popnames = []; - end - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.vorCells = vorCells; - c.pointers = pointers; c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - -%%%%%%%%%%%%% added by Lu Cheng 11.11.2012 START %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -case 'FASTA-format' - - setWindowOnTop(base,'false') - [filename1, pathname1] = uigetfile({'*.fasta';'*.*'}, 'Load data in FASTA-format'); - if filename1==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - %data = load([pathname1 filename1]); - [heds, seqs] = fastaread([pathname1 filename1]); - seqs = seqs(:); - alnMat = cell2mat(seqs); - nSeq = length(seqs); - clear seqs; - - cc = preprocAln(alnMat); - cc.heds = heds; - cc.nSeq = nSeq; - dist = seqpdist(alnMat,'method','p-distance'); - Z = linkage(dist,'complete'); - - ninds = nSeq; - - clear alnMat heds nSeq - -% [ninds,data,heds]=testFastaData([pathname1 filename1]); - - setWindowOnTop(base,'false') - [filename2,pathname2]=uigetfile('*.txt', 'Load individual coordinates'); - if filename2==0 - return - end - - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - - input_pops = questdlg(['When using data which are in FASTA-format, '... - 'you can specify the sampling populations of the individuals by '... - 'giving two additional files: one containing the names of the '... - 'populations, the other containing the indices of the first '... - 'individuals of the populations. Do you wish to specify the '... - 'sampling populations?'], ... - 'Specify sampling populations?',... - 'Yes', 'No', 'No'); -% input_pops = 'No'; - if isequal(input_pops,'Yes') - %waitALittle; - setWindowOnTop(base,'false') - [namefile, namepath] = uigetfile('*.txt', 'Load population names'); - if namefile==0 - kysyToinen = 0; - else - kysyToinen = 1; - end - if kysyToinen==1 - %waitALittle; - setWindowOnTop(base,'false') - [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices'); - if indicesfile==0 - popnames = []; - else - popnames = initPopNames([namepath namefile],[indicespath indicesfile]); - end - else - popnames = []; - end - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - -% [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); -% [Z,dist] = newGetDistances(data,rowsFromInd); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - - cc.locCliques = cliques; - cc.locSeparators = separators; - cc.popnames = popnames; - cc.vorPoints = vorPoints; - cc.vorCells = vorCells; - cc.pointers = pointers; - cc.coordinates = coordinates; - format_type = 'FASTA'; - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - save(kokonimi,'cc','dist','Z','format_type','-v7.3'); % added by Lu Cheng, 08.06.2012 - end; - - handleIndiFastaCase(cc,dist,Z); - - return; -%%%%%%%%%%%%%add by Lu Cheng 11.11.2012 END %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -case 'GenePop-format' - %%waitALittle; - setWindowOnTop(base,'false') - [filename1, pathname1] = uigetfile('*.txt', 'Load data in GenePop-format'); - if filename1==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - - kunnossa = testaaGenePopData([pathname1 filename1]); - if kunnossa==0 - return - end - [data,popnames]=lueGenePopData([pathname1 filename1]); - - %%waitALittle; - setWindowOnTop(base,'false') - [filename2,pathname2]=uigetfile('*.txt', 'Load individual coordinates'); - if filename2==0 - return - end - - ninds = max(data(:,end)); - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rowsFromInd); - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.vorCells = vorCells; - c.pointers = pointers; c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - -case 'Preprocessed data' - %%waitALittle; -% setWindowOnTop(base,'false') - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - display('---------------------------------------------------'); - display(['Reading preprocessed spatial data from: ',[pathname filename],'...']); - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - struct_array = load([pathname filename]); - - if isfield(struct_array,'format_type') && strcmp(struct_array.format_type,'FASTA') - handleIndiFastaCase(struct_array.cc,struct_array.dist,struct_array.Z); - return; - end - - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('Incorrect file format'); - return; - end - data = double(c.data); rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; - if isfield(c, 'cliques') - cliques = c.cliques; separators = c.separators; - vorPoints = c.vorPoints; vorCells = c.vorCells; - pointers = c.pointers; coordinates = c.coordinates; - clear c; - else - load_coord = questdlg(['The data file did not contain ',... - 'coordinate information. Do you wish to load coordinates?'], ... - 'Load coordinates?',... - 'Yes','No','Yes'); - if isequal(load_coord, 'No') - return - end - %%waitALittle; - setWindowOnTop(base,'false') - [filename2,pathname2]=uigetfile('*.txt', 'Load individual coordinates'); - if filename2==0 - return - end - - ninds = max(data(:,end)); - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - - if viallinen - disp('Incorrect coordinates'); - return - end - inp = [filename ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename; clear filename2; clear pathname; clear pathname2; - - disp('Pre-processing the data. This may take several minutes.'); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - %%waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.vorCells = vorCells; - c.pointers = pointers; c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - end - - otherwise - return; -end - -global PARTITION; global COUNTS; -global SUMCOUNTS; -global SEPCOUNTS; global CLIQCOUNTS; -clearGlobalVars; - -c.data=data; c.alleleCodes = alleleCodes; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; -c.cliques = cliques; c.separators = separators; - -ninds = length(unique(data(:,end))); -ekat = (1:rowsFromInd:ninds*rowsFromInd)'; -c.rows = [ekat ekat+rowsFromInd-1]; - -% partition compare -if ~isempty(partitionCompare) - nsamplingunits = size(c.rows,1); - data = data(:,1:end-1); - partitions = partitionCompare.partitions; - npartitions = size(partitions,2); - partitionLogml = zeros(1,npartitions); - for i = 1:npartitions - % number of unique partition lables - npops = length(unique(partitions(:,i))); - try - partitionInd = zeros(ninds*rowsFromInd,1); - partitionSample = partitions(:,i); - for j = 1:nsamplingunits - partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j); - end - [sumcounts, counts] = initialCounts(partitionInd, data, npops, c.rows, noalle, adjprior); - [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops, partitionSample); - partitionLogml(i) = ... - computeLogml(adjprior, priorTerm, cliqcounts, sepcounts, counts, sumcounts); - catch - disp('*** ERROR: unmatched data.'); - return - end - end - % return the logml result - partitionCompare.logmls = partitionLogml; - set(h1, 'userdata', partitionCompare); - return -end - -if fixedK - [logml, npops, partitionSummary]=spatialMix_fixK(c); -else - [logml, npops, partitionSummary]=spatialMix(c); -end - -if logml==1 - return -end - -data = data(:,1:end-1); - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); -[varmuus,changesInLogml] = writeMixtureInfo(logml, rowsFromInd, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, cliques, separators, fixedK); - - - -viewMixPartition(PARTITION, popnames); - -if isequal(popnames, []) - names = pointers; -else - names = cell(size(pointers)); - indices = zeros(size(popnames(:,2))); - for i=1:length(popnames(:,2)); - indices(i) = popnames{i,2}; - end - for i = 1:length(pointers) - inds = pointers{i}; - namesInCell = []; - for j = 1:length(inds) - ind = inds(j); - I = find(indices > ind); - if isempty(I) - nameIndex = length(indices); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -vorPlot(vorPoints, vorCells, PARTITION, pointers, coordinates, names); - -%varmuus = 1 - laskeVarmuus(rowsFromInd, data, adjprior, priorTerm, ... -% logml, cliques, separators, ninds); - -%plotVarmuus(vorPoints, vorCells, pointers, varmuus, coordinates, ... -% PARTITION, names); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis or plot ' ... - 'additional images?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - %%waitALittle; % Hetki odotusta, jotta muistaa kysy?.. - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy 'baps4_output.baps' into the text file with the same name. - if exist(OUTPUT_FILE,'file') - copyfile(OUTPUT_FILE,[pathname filename '.txt']) - delete(OUTPUT_FILE) - end - end - - c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; - c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; - c.noalle = noalle; c.mixtureType = 'spatial'; - c.pointers = pointers; c.vorPoints = vorPoints; c.vorCells = vorCells; - c.coordinates = coordinates; c.varmuus = varmuus; c.names = names; - c.changesInLogml = changesInLogml; % added by jing - 22.11.2006 - c.logml = logml; - - % added by Lu Cheng, 05.12.2012 - tmpFile = [pathname filename '.mapfile.txt']; - fid = fopen(tmpFile,'w+'); - fprintf(fid,'Name\tLatitude\tLongitude\tDescription\tLabel\n'); - if exist('heds','var') - for i=1:length(heds) - fprintf(fid,'%s\t%.10f\t%.10f\t%s_%d\t%d\n',heds{i},coordinates(i,1),coordinates(i,2),... - heds{i},PARTITION(i),PARTITION(i)); - end - else - for i=1:ninds - fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),... - i,PARTITION(i),PARTITION(i)); - end - end - fclose(fid); - -% save([pathname filename], 'c'); - save([pathname filename], 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 -else - if exist(OUTPUT_FILE,'file') - delete(OUTPUT_FILE) - end -end - -%-------------------------------------------------------------------------- -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; - -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; - - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, rowsFromInd, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind = inds(i); - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, rowsFromInd, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -rows = computeRows(rowsFromInd, inds, length(inds)); - -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = computeRows(rowsFromInd, inds, ninds); - - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- - -function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... - priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); -cliqsize = size(CLIQCOUNTS,2); -sepsize = size(SEPCOUNTS, 2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; - - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; - -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -nrows = size(data,1); -ncols = size(data,2); -rowsFromInd = zeros(nind,1); -for i=1:nind - rowsFromInd(i) = length(find(data(:,end)==i)); -end -maxRowsFromInd = max(rowsFromInd); -a = -999; -emptyRow = repmat(a, 1, ncols); -lessThanMax = find(rowsFromInd < maxRowsFromInd); -missingRows = maxRowsFromInd*nind - nrows; -data = [data; zeros(missingRows, ncols)]; -pointer = 1; -for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. - for j=1:miss - rowToBeAdded = emptyRow; - rowToBeAdded(end) = ind; - data(nrows+pointer, :) = rowToBeAdded; - pointer = pointer+1; - end -end -data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti -newData = data; -rowsFromInd = maxRowsFromInd; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - - -function [Z, dist] = newGetDistances(data, rowsFromInd) - -ninds = max(data(:,end)); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -eka = pariTaulu(:,ones(1,rowsFromInd)); -eka = eka * rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -eka = eka - miinus; - -toka = pariTaulu(:,ones(1,rowsFromInd)*2); -toka = toka * rowsFromInd; -toka = toka - miinus; - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:rowsFromInd - x(:,k) = data(eka(:,k),j); - y(:,k) = data(toka(:,k),j); - end - - for a=1:rowsFromInd - for b=1:rowsFromInd - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]=getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile, indexFile) -%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto -% eivät olleet yht?pitki? - -popnames = []; -indices = load(indexFile); - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -if length(names) ~= length(indices) - disp('The number of population names must be equal to the number of '); - disp('entries in the file specifying indices of the first individuals of '); - disp('each population.'); - return; -end - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = indices(i); -end - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops, PARTITION) - - -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen? -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm, ... - CLIQCOUNTS, SEPCOUNTS, ... - COUNTS, SUMCOUNTS) - -notEmpty = any(CLIQCOUNTS); -npops = length(find(notEmpty == 1)); -sumcliq=sum(CLIQCOUNTS, 2); -sumsep=sum(SEPCOUNTS, 2); -ncliq = size(CLIQCOUNTS, 1); -nsep = size(SEPCOUNTS, 1); - -cliqsizes = sum(CLIQCOUNTS, 2)'; -sepsizes = sum(SEPCOUNTS, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - - -%klikkitn = sum(sum(gammaln(CLIQCOUNTS + 1/npops))) ... -% - ncliq*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumcliq + 1)); -%septn = sum(sum(gammaln(SEPCOUNTS + 1/npops))) ... -% - nsep*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -%if spatialPrior > 0 -% keyboard -%end - - -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; - -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%---------------------------------------------------------------------------- - - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - - -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; - - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen väliss?täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntej? 1 jos odotetaan muita merkkej? -for i=1:pit - merkki = line(i); - if (isspace(merkki) & tila==0) - %Ei tehd?mitään. - elseif (isspace(merkki) & tila==1) - tila = 0; - elseif (~isspace(merkki) & tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) & tila==1) - %Ei tehd?mitään - end -end - -%------------------------------------------------------- - -function pal = testaaPop(rivi) -% pal=1, mikäli rivi alkaa jollain seuraavista -% kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa -% tapauksissa pal=0. - -if length(rivi)<3 - pal = 0; - return -end -if (all(rivi(1:3)=='Pop') | ... - all(rivi(1:3)=='pop') | ... - all(rivi(1:3)=='POP')) - pal = 1; - return -else - pal = 0; - return -end - - -%-------------------------------------------------------- - - -function data = addAlleles(data, ind, line, divider) -% Lisaa BAPS-formaatissa olevaan datataulukkoon -% yksilöä ind vastaavat rivit. Yksilön alleelit -% luetaan genepop-formaatissa olevasta rivist? -% line. Jos data on 3 digit formaatissa on divider=1000. -% Jos data on 2 digit formaatissa on divider=100. - -nloci = size(data,2)-1; -if size(data,1) < 2*ind - data = [data; zeros(100,nloci+1)]; -end - -k=1; -merkki=line(k); -while ~isequal(merkki,',') - k=k+1; - merkki=line(k); -end -line = line(k+1:end); -clear k; clear merkki; - -alleeliTaulu = sscanf(line,'%d'); - -if length(alleeliTaulu)~=nloci - disp('Incorrect data format.'); -end - -for j=1:nloci - ekaAlleeli = floor(alleeliTaulu(j)/divider); - if ekaAlleeli==0 ekaAlleeli=-999; end; - tokaAlleeli = rem(alleeliTaulu(j),divider); - if tokaAlleeli==0 tokaAlleeli=-999; end - - data(2*ind-1,j) = ekaAlleeli; - data(2*ind,j) = tokaAlleeli; -end - -data(2*ind-1,end) = ind; -data(2*ind,end) = ind; - -%------------------------------------------------------------------- - -function [varmuus,changesInLogml] = writeMixtureInfo(logml, rowsFromInd, data, adjprior, ... - priorTerm, outPutFile, inputFile, partitionSummary, popnames, ... - cliques, separators, fixedK) - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global LOGDIFF; - -OUTPUT_FILE = 'baps6_output.baps'; - -ninds = size(data,1)/rowsFromInd; -npops = size(COUNTS,3); -names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin - -if length(outPutFile)>0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary(OUTPUT_FILE); % save in text anyway. -end - -dispLine; -disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end -end - -disp(' '); -disp(' '); -disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if indvidual i is moved to group j:']); fprintf(fid, '\n'); -end - -if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku ' ind' blanks(6+erotus)]; -else - ekarivi = ' ind '; -end - -for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -end -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); -end - -ninds = size(data,1)/rowsFromInd; -varmuus = zeros(ninds,1); - -changesInLogml = LOGDIFF'; -for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... - % adjprior, priorTerm, logml, cliques, separators); - %changesInLogml(:,ind) = muutokset; - muutokset = changesInLogml(:,ind); - - if sum(exp(muutokset))>0 - varmuus(ind) = 1 - 1/sum(exp(muutokset)); - else - varmuus(ind) = 0; - end - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end -end - -disp(' '); disp(' '); -disp('KL-divergence matrix in PHYLIP format:'); -dist_mat = zeros(npops, npops); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); -end - -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end -ekarivi = num2str(npops); -disp(ekarivi); - -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); -end - -for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -% end -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -[I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - - -%-------------------------------------------------------------------------- - - -function [ninds, data, heds] = testFastaData(inFile) -% added by Lu Cheng, 11.11.2012 -if ~exist(inFile,'file') - error('Fasta file %s does not exist!\n',inFile); -end - -[heds, seqs]=fastaread(inFile); -ninds = length(seqs); - -data = cell2mat(seqs(:)); -newData = ones(size(data))*-9; -newData(ismember(data,'Aa'))=1; -newData(ismember(data,'Cc'))=2; -newData(ismember(data,'Gg'))=3; -newData(ismember(data,'Tt'))=4; -data = [newData (1:ninds)']; - - - -%-------------------------------------------------------------------------- - -function [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates) -%Laskee yksilöiden luonnolliset naapurit koordinaateista. -%Naapurit lasketaan lisäämäll?koordinaatteihin pisteit? -%jotta kutakin yksilöä vastaisi rajoitettu voronoi-solu -%Puuttuvat koordinaatit (0,0) tulevat erakkopisteiksi -% -%Määrittää lisäksi yksilöit?vastaavat voronoi tesselaation solut. -%vorPoints:ssa on solujen kulmapisteet ja vorCells:ss?kunkin solun -%kulmapisteiden indeksit. Pointers{i} sisältää solussa i olevien yksilöiden -%indeksit. - - - -ninds = length(coordinates); -[I,J] = find(coordinates>0 | coordinates <0); %Käsitellään vain yksilöit? joilta koordinaatit -I = unique(I); %olemassa -ncoords = length(I); -new_coordinates = addPoints(coordinates(I,:)); %Ympäröidään yksilöt apupisteill? - -apuData = [new_coordinates(1:ncoords,:) (1:ncoords)']; -apuData = sortrows(apuData,[1 2]); -erot = [diff(apuData(:,1)) diff(apuData(:,2))]; -empties = find(erot(:,1)==0 & erot(:,2)==0); -samat = cell(length(empties),1); -pointer = 0; - -for i = 1:length(empties) - if i == 1 | empties(i) - empties(i-1) > 1 %Tutkitaan onko eri pisteess?kuin edellinen - pointer = pointer+1; - samat{pointer} = [apuData(empties(i),3) apuData(empties(i)+1,3)]; - else - samat{pointer} = [samat{pointer} apuData(empties(i)+1,3)]; - end -end - -samat = samat(1:pointer); -erot = []; apuData = []; empties = []; - -%tri = delaunay(new_coordinates(:,1), new_coordinates(:,2), {'Qt','Qbb','Qc','Qz'}); %Apupisteiden takia ok. -tri = delaunay(new_coordinates(:,1), new_coordinates(:,2)); -%[rivi,sarake] = find(tri>ncoords); %Jätetään huomiotta apupisteet -%tri(rivi,:) = []; -pituus = tri(:,1); -pituus = length(pituus); -parit = zeros(6*pituus,2); -for i = 1:pituus %Muodostetaan kolmikoista parit - j = 6*(i-1)+1; - parit(j,:) = tri(i,1:2); - parit(j+1,:) = tri(i,1:2:3); - parit(j+2,:) = tri(i,2:3); - parit(j+3:j+5,:) = [parit(j:j+2,2) parit(j:j+2,1)]; -end -parit = unique(parit,'rows'); -[rivi,sarake] = find(parit>ncoords); %Jätetään huomiotta apupisteet -parit(rivi,:) = []; -parit = I(parit); %Otetaan poistetut takaisin mukaan -graph = sparse(parit(:,1),parit(:,2),1, ninds, ninds); - - -%Kopioidaan samassa pisteess?olevien yksilöiden naapurustot -%silt? jolle ne laitettu. - -for i = 1:length(samat); - taulu = I(samat{i}); - [rivi,sarake] = find(graph(taulu,:)>0); - if length(rivi) > 0 - kopioitava = graph(taulu(rivi(1)),:); - for j = 1:length(taulu); - graph(taulu(j),:) = kopioitava; - graph(:,taulu(j)) = kopioitava'; - end - end -end - -%Asetetaan samassa pisteess?olevat yksilöt toistensa naapureiksi - -for i = 1:length(samat) - for j = I(samat{i}) - for k = I(samat{i}) - if k ~= j - graph(j,k) = 1; - end - end - end -end - -%Laskee maksimin klikkien ja separaattorien koolle -%Määritetään myös klikit ja separaattorit - -[ncliq, nsep, cliq, sep] = laskeKlikit(graph, ninds, ninds); - -sumcliq = sum(ncliq); -sumsep = sum(nsep); -maxCliqSize = max(find(sumcliq > 0)); -maxSepSize = max(find(sumsep > 0)); - -cliques = zeros(length(cliq), maxCliqSize); -separators = zeros(length(sep), maxSepSize); - -nollia = zeros(1, length(cliq)); -for i = 1:length(cliq); - klikki = cliq{i}; - if length(klikki)>1 - cliques(i, 1:length(klikki)) = klikki; - else - nollia(i)=1; - end -end -cliques(find(nollia==1), :) = []; - - -for i = 1:length(sep); - klikki = sep{i}; - separators(i, 1:length(klikki)) = klikki; -end - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%Määritetään yksilöit?vastaavat voronoi tesselaation solut - - - -[vorPoints, vorCells] = voronoin(new_coordinates, {'Qbb', 'Qz'}); - - -bounded = ones(length(vorCells),1); -for i=1:length(vorCells) - if (isempty(vorCells{i})) | (length(find(vorCells{i}==1))>0) - bounded(i)=0; - end -end - - - -vorCells = vorCells(bounded == 1); - -pointers = cell(length(vorCells),1); -empties = zeros(1,length(vorCells)); -X = coordinates(:,1); -Y = coordinates(:,2); - -for i=1:length(pointers) - vx = vorPoints(vorCells{i},1); - vy = vorPoints(vorCells{i},2); - IN = inpolygon(X,Y,vx,vy); - if any(IN)==0 - empties(i) = 1; - else - pointers{i} = find(IN ==1); - end -end - - -%figure -%hold on -% -%for i = 1:length(vorCells) -% vx = vorPoints(vorCells{i},1); -% vy = vorPoints(vorCells{i},2); -% k = convhull(vx,vy); -% if any(pointers{i}) -% patch(vx(k), vy(k),'y'); -% else -% plot(vx(k), vy(k)); -% end -%end - -%plot(coordinates(:,1), coordinates(:,2), 'r*'); -%plot(new_coordinates(ninds+1:end,1), new_coordinates(ninds+1:end,2), 'b+'); -%axis([-2 7 -2 8]); - -vorCells = vorCells(find(empties == 0)); -pointers = pointers(find(empties == 0)); - - - - -%-------------------------------------------------------------------------- - -function [ncliques, nseparators, cliques, separators] = ... - laskeKlikit(M, maxCliqSize,maxSepSize) -%Laskee samankokoisten klikkien määrän verkosta M -%ncliques(i)=kokoa i olevien klikkien määr? -%nseparators vastaavasti - -ncliques=zeros(1,maxCliqSize); -nseparators=zeros(1,maxSepSize); - -if isequal(M,[]) - return; -end - -[cliques,separators]=findCliques(M); - -for i=1:length(cliques) - ncliques(length(cliques{i}))=ncliques(length(cliques{i}))+1; -end - -%cliqmax=max(find(ncliques~=0)); -%ncliques=ncliques(1:cliqmax); - -for i=1:length(separators) - nseparators(length(separators{i}))=nseparators(length(separators{i}))+1; -end - -%sepmax=max(find(nseparators~=0)); -%nseparators=nseparators(1:sepmax); - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -CLIQCOUNTS = cliqcounts; -SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -function [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates) -% Testaa onko koordinaatit kunnollisia. -% modified by Lu Cheng, 05.12.2012 - -viallinen = 1; -if ~isnumeric(coordinates) - warning('Coordinates are not numerical!'); - return; -end - -oikeanKokoinen = (size(coordinates,1) == ninds) & (size(coordinates,2) == 2); -if ~oikeanKokoinen - warning('Wrong coordinates dimension!'); - return; -end - -posstr = cellfun(@(x) sprintf('%.10f',x),num2cell(coordinates),'UniformOutput',false); -posstr = cellfun(@(x) regexprep(x,'0+$',''),posstr,'UniformOutput',false); - -uni1 = unique(posstr(:,1)); -uni2 = unique(posstr(:,2)); -posstr_new = posstr; - -if length(uni1)==ninds && length(uni2)==ninds - viallinen = 0; - return; -else - ans = questdlg('Input coordinates are not unique. Do you want to make them unique?','coordinates NOT unique', 'Yes','No','Yes'); - if strcmp(ans,'No') - warning('Coordinates are not unique!'); - return; - end -end - -for i=1:length(uni1) - tmpinds = find(ismember(posstr(:,1),uni1(i))); - tmpNinds = length(tmpinds); - - if tmpNinds==1 - continue; - end - - assert(tmpNinds<100); - tmparr = round(linspace(0,99,tmpNinds)); - tmparr = tmparr(randperm(tmpNinds)); - - for j=1:tmpNinds - posstr_new{tmpinds(j),1}=sprintf('%s%02d',posstr{tmpinds(j),1},tmparr(j)); - end -end - -for i=1:length(uni2) - tmpinds = find(ismember(posstr(:,2),uni2(i))); - tmpNinds = length(tmpinds); - - if tmpNinds==1 - continue; - end - - assert(tmpNinds<100); - tmparr = round(linspace(0,99,tmpNinds)); - tmparr = tmparr(randperm(tmpNinds)); - - for j=1:tmpNinds - posstr_new{tmpinds(j),2}=sprintf('%s%02d',posstr{tmpinds(j),2},tmparr(j)); - end -end - -coordinates = cellfun(@str2double,posstr_new); -uni1 = unique(coordinates(:,1)); -uni2 = unique(coordinates(:,2)); -if length(uni1)==ninds && length(uni2)==ninds - viallinen = 0; -else - warning('Can not make coordinates unique!'); -end - -%-------------------------------------------------------------------------- - -function varmuus = laskeVarmuus(rowsFromInd, data, adjprior, priorTerm, ... - logml, cliques, separators, ninds); - -varmuus = zeros(ninds,1); - -for ind=1:ninds - muutokset = laskeMuutokset(ind, rowsFromInd, data, adjprior, ... - priorTerm, logml, cliques, separators); - varmuus(ind) = 1/sum(exp(muutokset)); - -end - - -% ------------------------------------------------------------------------- -function [sumcounts, counts] = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -koot = rows(:,1) - rows(:,2) + 1; -maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -%initializeGammaln(ninds, maxSize, max(noalle)); - -% logml = laskeLoggis(counts, sumcounts, adjprior); - diff --git a/matlab/spatial/spatialPopMixture.m b/matlab/spatial/spatialPopMixture.m deleted file mode 100644 index ad9ec13..0000000 --- a/matlab/spatial/spatialPopMixture.m +++ /dev/null @@ -1,2645 +0,0 @@ -function spatialPopMixture() -%Vaihtuvalla populaatioiden määräll? priori 3:lla -%Samassa pisteess?olevien yksilöiden täytyy olla samasta populaatiosta. -%Toiminta pitkälti samanlainen kuin greedyPopMixiss? - -base = findobj('Tag','base_figure'); % added by Lu Cheng, 11.11.2012 - -% check whether fixed k mode is selected -h0 = findobj('Tag','fixk_menu'); -fixedK = get(h0, 'userdata'); - -if fixedK - if ~(fixKWarning == 1) % call function fixKWarning - return - end -end - -% output file name -OUTPUT_FILE = 'baps5_output.baps'; % also remember to update the file name in function WriteMixtureInfo - -% check whether partition compare mode is selected -h1 = findobj('Tag','partitioncompare_menu'); -partitionCompare = get(h1, 'userdata'); - -formatList = {'BAPS-format','FASTA-format', 'GenePop-format', 'Preprocessed data'}; -formatChoice = menu('Specify the format of your data: ','BAPS-format','FASTA-format', 'GenePop-format', 'Preprocessed data'); -if formatChoice==0 - return; -else - input_type = formatList{formatChoice}; -end - -% input_type = questdlg('Specify the format of your data: ',... -% 'Specify Data Format', ... -% 'BAPS-format', 'GenePop-format', 'Preprocessed data', ... -% 'BAPS-format'); - -switch input_type - -case 'BAPS-format' - waitALittle; - [filename1, pathname1] = uigetfile('*.txt', 'Load data in BAPS-format'); - if filename1==0 - return; - end - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname1 filename1]); - end - - data = load([pathname1 filename1]); - ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS - if (ninds==0) - disp('Incorrect Data-file.'); - return; - end - - waitALittle; - [filename2,pathname2]=uigetfile('*.txt', 'Load group coordinates'); - if filename2==0 - return - end - - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - - load_names = questdlg('Do you wish to specify the names of the groups?',... - 'Input group names?','Yes','No','Yes'); - if isequal(load_names,'Yes') - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load group names'); - popnames = initPopNames([pathname filename]); - if (size(popnames,1)~=ninds) - disp('Incorrect name-file.'); - popnames = []; - end - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rows); - - rowsFromInd = 0; % Ei tiedet? - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.rowsFromInd = rowsFromInd; - c.vorCells = vorCells; c.pointers = pointers; - c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - -%%%%%%%%%%%%% added by Lu Cheng 11.11.2012 START %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -case 'FASTA-format' - -% setWindowOnTop(base,'false') - [filename1, pathname1] = uigetfile({'*.fasta';'*.*'}, 'Load data in FASTA-format'); - if filename1==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - %data = load([pathname1 filename1]); - [~, seqs] = fastaread([pathname1 filename1]); - seqs = seqs(:); - alnMat = cell2mat(seqs); - nSeq = length(seqs); - clear seqs; - - cc = preprocAln(alnMat); - - setWindowOnTop(base,'false') - [filename1_1,pathname1_1]=uigetfile('*.txt', 'Load group partition'); - if filename1_1==0 - return - end - groupPartition = load([pathname1_1 filename1_1]); - if nSeq~=length(groupPartition) - warning('Number of individuals inconsistent in %s (%d) and %s (%d).',... - filename1,nSeq,filename1_1,length(groupPartition)); - return; - else - nPregroup = length(unique(groupPartition)); - ninds = nPregroup; - assert(nPregroup==max(groupPartition)); - - cc.nPregroup = nPregroup; - cc.groupPartition = groupPartition; - - disp('Calculating distance matrix. This may take several minutes.'); - - pgdist = nchoosek(nPregroup,2); - tmpIndK=1; - for i=1:nPregroup - for j=i+1:nPregroup - tmpIndsI = find(groupPartition==i); - tmpIndsJ = find(groupPartition==j); - tmpNI = length(tmpIndsI); - tmpNJ = length(tmpIndsJ); - tmpSum=0; - for k=tmpIndsJ(:)' - tmp = alnMat(tmpIndsI,:)~=repmat(alnMat(k,:),tmpNI,1); - tmpSum = tmpSum+sum(tmp(:)); - end - - pgdist(tmpIndK)=tmpSum/tmpNI/tmpNJ; - tmpIndK = tmpIndK+1; - end - end - clear tmp* alnMat - end - - setWindowOnTop(base,'false') - [filename2,pathname2]=uigetfile('*.txt', 'Load group coordinates'); - if filename2==0 - return - end - - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename1_1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - clear filename1_1 pathname1_1 - - load_names = questdlg('Do you wish to specify the names of the groups?',... - 'Input group names?','Yes','No','Yes'); -% load_names = 'No'; - if isequal(load_names,'Yes') - waitALittle; - [filename, pathname] = uigetfile('*.txt', 'Load group names'); - popnames = initPopNames([pathname filename]); - if (size(popnames,1)~=ninds) - disp('Incorrect name-file.'); - popnames = []; - else - popnames = popnames(:,1); - - end - else - popnames = []; - end - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - - cc.locCliques = cliques; - cc.locSeparators = separators; - cc.popnames = popnames; - cc.vorPoints = vorPoints; - cc.vorCells = vorCells; - cc.pointers = pointers; - cc.coordinates = coordinates; - format_type = 'FASTA'; - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - save(kokonimi,'cc','pgdist','format_type','groupPartition','-v7.3'); - end - - handlePopFastaCase(cc,groupPartition,pgdist); - - return; - -%%%%%%%%%%%%%add by Lu Cheng 11.11.2012 END %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -case 'GenePop-format' - waitALittle; - [filename1, pathname1] = uigetfile('*.txt', 'Load data in GenePop-format'); - if filename1==0 - return; - end - - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname1 filename1]); - end - - kunnossa = testaaGenePopData([pathname1 filename1]); - if kunnossa==0 - return - end - [data,popnames]=lueGenePopData([pathname1 filename1]); - - waitALittle; - [filename2,pathname2]=uigetfile('*.txt', 'Load group coordinates'); - if filename2==0 - return - end - - ninds = max(data(:,end)); - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - - if viallinen - disp('Incorrect coordinates'); - return - end - - inp = [filename1 ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename1; clear filename2; clear pathname1; clear pathname2; - - disp('Pre-processing the data. This may take several minutes.'); - - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - [data, rows, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); - [Z,dist] = newGetDistances(data,rows); - - rowsFromInd = 2; %Tiedetään - - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.data = data; c.rows = rows; c.alleleCodes = alleleCodes; - c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; - c.dist = dist; c.popnames = popnames; c.Z = Z; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.rowsFromInd = rowsFromInd; - c.vorCells = vorCells; c.pointers = pointers; - c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - -case 'Preprocessed data' - waitALittle; - [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data'); - if filename==0 - return; - end - if ~isempty(partitionCompare) - fprintf(1,'Data: %s\n',[pathname filename]); - end - h0 = findobj('Tag','filename1_text'); - set(h0,'String',filename); clear h0; - - struct_array = load([pathname filename]); - - if isfield(struct_array,'format_type') && strcmp(struct_array.format_type,'FASTA') - handlePopFastaCase(struct_array.cc,struct_array.groupPartition,struct_array.pgdist); - return; - end - - if isfield(struct_array,'c') %Matlab versio - c = struct_array.c; - if ~isfield(c,'dist') - disp('Incorrect file format'); - return - end - elseif isfield(struct_array,'dist') %Mideva versio - c = struct_array; - else - disp('Incorrect file format'); - return; - end - data = double(c.data); rows = c.rows; alleleCodes = c.alleleCodes; - noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; - dist = c.dist; popnames = c.popnames; Z = c.Z; rowsFromInd = c.rowsFromInd; - - if isfield(c, 'cliques') - cliques = c.cliques; separators = c.separators; - vorPoints = c.vorPoints; vorCells = c.vorCells; - pointers = c.pointers; coordinates = c.coordinates; - clear c; - else - load_coord = questdlg(['The data file did not contain ',... - 'coordinate information. Do you wish to load coordinates?'], ... - 'Load coordinates?',... - 'Yes','No','Yes'); - if isequal(load_coord, 'No') - return - end - waitALittle; - [filename2,pathname2]=uigetfile('*.txt', 'Load group coordinates'); - if filename2==0 - return - end - - ninds = max(data(:,end)); - coordinates = load([pathname2 filename2]); - %viallinen = testaaKoordinaatit(ninds, coordinates); - [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates); % added by Lu Cheng, 05.12.2012 - - if viallinen - disp('Incorrect coordinates'); - return - end - inp = [filename ' & ' filename2]; - h0 = findobj('Tag','filename1_text'); - set(h0,'String',inp); - clear h0; clear inp; - clear filename; clear filename2; clear pathname; clear pathname2; - - disp('Pre-processing the data. This may take several minutes.'); - [cliques, separators, vorPoints, vorCells, pointers] = ... - handleCoords(coordinates); - save_preproc = questdlg('Do you wish to save pre-processed data?',... - 'Save pre-processed data?',... - 'Yes','No','Yes'); - if isequal(save_preproc,'Yes'); - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); - kokonimi = [pathname filename]; - c.cliques = cliques; c.separators = separators; - c.vorPoints = vorPoints; c.vorCells = vorCells; - c.pointers = pointers; c.coordinates = coordinates; -% save(kokonimi,'c'); - save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 - clear c; - end; - end - otherwise - return -end - -global PARTITION; global COUNTS; -global SUMCOUNTS; %global POP_LOGML; -global SEPCOUNTS; global CLIQCOUNTS; -clearGlobalVars; - -c.data=data; c.alleleCodes = alleleCodes; -c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; -c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; -c.cliques = cliques; c.separators = separators; -c.rows = rows; - -% partition compare -if ~isempty(partitionCompare) - nsamplingunits = size(rows,1); - data = data(:,1:end-1); - partitions = partitionCompare.partitions; - npartitions = size(partitions,2); - partitionLogml = zeros(1,npartitions); - for i = 1:npartitions - % number of unique partition lables - npops = length(unique(partitions(:,i))); - try - partitionInd = zeros(rows(end),1); - partitionSample = partitions(:,i); - for j = 1:nsamplingunits - partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j); - end - [sumcounts, counts] = initialCounts(partitionInd, data, npops, noalle); - [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops, partitionSample); - partitionLogml(i) = ... - computeLogml(adjprior, priorTerm, cliqcounts, sepcounts, counts, sumcounts); - catch - disp('*** ERROR: unmatched data.'); - return - end - end - % return the logml result - partitionCompare.logmls = partitionLogml; - set(h1, 'userdata', partitionCompare); - return -end - -if fixedK - [logml, npops, partitionSummary]=spatialMix_fixK(c); -else - [logml, npops, partitionSummary]=spatialMix(c); -end - -if logml==1 - return -end - -lastCol = data(:,end); -data = data(:,1:end-1); - -h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); -h0 = findobj('Tag','filename2_text'); -outp = get(h0,'String'); -[varmuus,changesInLogml] = writeMixtureInfo(logml, rows, data, adjprior, priorTerm, ... - outp,inp,partitionSummary, popnames, cliques, separators, fixedK); - -%checkLogml(priorTerm, adjprior, cliques, separators); - -viewPopMixPartition(PARTITION, rows, popnames); - -if isequal(popnames, []) - names = pointers; -else - %Etsitään voronoi-soluja vastaavat nimet. - names = cell(size(pointers)); - indices = 1:length(popnames); - for i = 1:length(pointers) - inds = pointers{i}; - namesInCell = []; - for j = 1:length(inds) - ind = inds(j); - I = find(indices > ind); - if isempty(I) - nameIndex = indices(end); - else - nameIndex = min(I) -1; - end - name = popnames{nameIndex}; - namesInCell = [namesInCell name]; - end - names{i} = namesInCell; - end -end -vorPlot(vorPoints, vorCells, PARTITION, pointers, coordinates, names); - -talle = questdlg(['Do you want to save the mixture populations ' ... - 'so that you can use them later in admixture analysis or plot ' ... - 'additional images?'], ... - 'Save results?','Yes','No','Yes'); -if isequal(talle,'Yes') - waitALittle; - [filename, pathname] = uiputfile('*.mat','Save results as'); - - if (filename == 0) & (pathname == 0) - % Cancel was pressed - return - else % copy OUTPUT_FILE into the text file with the same name. - if exist(OUTPUT_FILE,'file') - copyfile(OUTPUT_FILE,[pathname filename '.txt']) - delete(OUTPUT_FILE) - end - end - - if rowsFromInd==0 - %Käytettiin BAPS-formaattia, eik?rowsFromInd ole tunnettu. - [popnames, rowsFromInd] = findOutRowsFromInd(popnames, rows); - end - - groupPartition = PARTITION; - - fiksaaPartitioYksiloTasolle(rows, rowsFromInd); - - c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS; - c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames; - c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops; - c.noalle = noalle; c.groupPartition = groupPartition; - c.pointers = pointers; c.vorPoints = vorPoints; c.vorCells = vorCells; - c.coordinates = coordinates; c.names = names; c.varmuus = varmuus; - c.rows = rows; c.mixtureType = 'spatialPop'; - c.logml = logml; c.changesInLogml = changesInLogml; - - % added by Lu Cheng, 05.12.2012 - tmpFile = [pathname filename '.mapfile.txt']; - fid = fopen(tmpFile,'w+'); - fprintf(fid,'GroupLabel\tLatitude\tLongitude\tDescription\tLabel\n'); - for i=1:max(lastCol) - fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),... - i,PARTITION(i),PARTITION(i)); - end - fclose(fid); - -% save([pathname filename], 'c'); - save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012 -else - if exist(OUTPUT_FILE,'file') - delete(OUTPUT_FILE) - end -end - - - - - -%------------------------------------------------------------------------------------- -%------------------------------------------------------------------------------------- - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -%global POP_LOGML; POP_LOGML = []; -global SEPCOUNTS; SEPCOUNTS = []; -global CLIQCOUNTS; CLIQCOUNTS = []; - -%------------------------------------------------------------------------------------- - - -function rows = computeRows(rowsFromInd, inds, ninds) -% On annettu yksilöt inds. Funktio palauttaa vektorin, joka -% sisältää niiden rivien numerot, jotka sisältävät yksilöiden -% dataa. - -rows = inds(:, ones(1,rowsFromInd)); -rows = rows*rowsFromInd; -miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -rows = rows - miinus; -rows = reshape(rows', [1,rowsFromInd*ninds]); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - - -%-------------------------------------------------------------------------- - - -function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -% Suorittaa yksilön seuraavan tilan arvonnan - -y = logml + muutokset; % siirron jälkeiset logml:t -y = y - max(y); -y = exp(y); -summa = sum(y); -y = y/summa; -y = cumsum(y); - -i2 = rand_disc(y); % uusi kori -suurin = muutokset(i2); - - -%-------------------------------------------------------------------------------------- - - -function svar=rand_disc(CDF) -%returns an index of a value from a discrete distribution using inversion method -slump=rand; -har=find(CDF>slump); -svar=har(1); - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global CLIQCOUNTS; -global SEPCOUNTS; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2(i1, i2, diffInCounts, ... - cliques, separators, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -%global POP_LOGML; -global CLIQCOUNTS; -global SEPCOUNTS; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -diffInCliqCounts = CLIQCOUNTS(:,i1); -diffInSepCounts = SEPCOUNTS(:,i1); - - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = 0; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2, cliques, separators); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; global CLIQCOUNTS; -global SUMCOUNTS; global SEPCOUNTS; -%global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -diffInCliqCounts = computeDiffInCliqCounts(cliques, muuttuvat); -diffInSepCounts = computeDiffInCliqCounts(separators, muuttuvat); - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -CLIQCOUNTS(:,i2) = CLIQCOUNTS(:,i2) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; -SEPCOUNTS(:,i2) = SEPCOUNTS(:,i2) + diffInSepCounts; - -%POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------- - - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksilöt inds siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. -% Huom! Laskee muutoksen vain yhdelle tyhjälle populaatiolle, muiille -% tyhjille tulee muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; %global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -counts = COUNTS; -sumcounts = SUMCOUNTS; - -[emptyPop, pops] = findEmptyPop(npops); - -i1 = PARTITION(ind); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -i2 = sort(i2); - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); -diffInSepCounts = computeDiffInCliqCounts(separators, ind); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; -SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - -% Asetetaan muillekin tyhjille populaatioille sama muutos, kuin -% emptyPop:lle - -if emptyPop > 0 - empties = mysetdiff((1:npops), [i2 i1]); - muutokset(empties) = muutokset(emptyPop); -end - -COUNTS = counts; -SUMCOUNTS = sumcounts; - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2(i1, globalRows, ... - data, adjprior, priorTerm, logml, cliques, separators); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. -% Laskee muutokset vain yhdelle tyhjälle populaatiolle, muille tulee -% muutokseksi 0. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -[emptyPop, pops] = findEmptyPop(npops); - -i2 = [pops(find(pops~=i1))]; -if emptyPop > 0 - i2 =[i2 emptyPop]; -end - -inds = find(PARTITION == i1); -ninds = length(inds); - -rows = []; -for i = 1:ninds - rows = [rows globalRows(inds(i),1):globalRows(inds(i),2)]; -end -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); -diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); -diffInSepCounts = computeDiffInCliqCounts(separators, inds); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -CLIQCOUNTS(:,i1) = 0; -SEPCOUNTS(:,i1) = 0; - -for i=i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; -end - -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; -CLIQCOUNTS(:,i1) = diffInCliqCounts; -SEPCOUNTS(:,i1) = diffInSepCounts; - - - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1, logml, cliques, separators) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. -% Laskee vain yhden tyhjän populaation, muita kohden muutokseksi jää 0. - - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -global CLIQCOUNTS; global SEPCOUNTS; - -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - rows = [rows; (globalRows(ind,1):globalRows(ind,2))']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - diffInCliqCounts = computeDiffInCliqCounts(cliques, inds); - diffInSepCounts = computeDiffInCliqCounts(separators, inds); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) - diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) - diffInSepCounts; - - [emptyPop, pops] = findEmptyPop(npops); - i2 = [pops(find(pops~=i1))]; - if emptyPop > 0 - i2 =[i2 emptyPop]; - end - - for i = i2 - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) + diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) + diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) + diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) + diffInSumCounts; - - muutokset(pop2,i) = computeLogml(adjprior, priorTerm) - logml; - - CLIQCOUNTS(:,i) = CLIQCOUNTS(:,i) - diffInCliqCounts; - SEPCOUNTS(:,i) = SEPCOUNTS(:,i) - diffInSepCounts; - COUNTS(:,:,i) = COUNTS(:,:,i) - diffInCounts; - SUMCOUNTS(i,:) = SUMCOUNTS(i,:) - diffInSumCounts; - end - - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - CLIQCOUNTS(:,i1) = CLIQCOUNTS(:,i1) + diffInCliqCounts; - SEPCOUNTS(:,i1) = SEPCOUNTS(:,i1) + diffInSepCounts; - end -end - -%-------------------------------------------------------------------------- -function muutokset = laskeMuutokset5(inds, globalRows, data, ... - adjprior, priorTerm, logml, cliques, separators, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; global SEPCOUNTS; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -for i = 1:ninds - ind = inds(i); - - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - - diffInCliqCounts = computeDiffInCliqCounts(cliques, ind); - diffInSepCounts = computeDiffInCliqCounts(separators, ind); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) - diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) + diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) - diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) + diffInSepCounts; - - muutokset(i) = computeLogml(adjprior, priorTerm) - logml; - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - - CLIQCOUNTS(:,pop1) = CLIQCOUNTS(:,pop1) + diffInCliqCounts; - CLIQCOUNTS(:,pop2) = CLIQCOUNTS(:,pop2) - diffInCliqCounts; - SEPCOUNTS(:,pop1) = SEPCOUNTS(:,pop1) + diffInSepCounts; - SEPCOUNTS(:,pop2) = SEPCOUNTS(:,pop2) - diffInSepCounts; -end - -%-------------------------------------------------------------------------- - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - - - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - -%------------------------------------------------------------------------------------ - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; -global CLIQCOUNTS; -global SEPCOUNTS; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); -CLIQCOUNTS = CLIQCOUNTS(:,notEmpty); -SEPCOUNTS = SEPCOUNTS(:,notEmpty); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%---------------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - - -%--------------------------------------------------------------------------------------- - - -function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = ... - handleData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako -% rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan -% tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio -% lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän -% rivej?kuin maksimimäär? -% Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin -% koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. -% Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -% koodit saavat arvoja välill?1,...,noalle(j). -% -% Muutettu vastaamaan greedyPopMixin handlePopDataa. - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -%rows = cell(nind,1); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end -newData = data; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%---------------------------------------------------------------------------------------- - -function [Z, dist] = newGetDistances(data, initRows) - -ninds = size(initRows,1); -nloci = size(data,2)-1; -riviLkm = nchoosek(ninds,2); - -empties = find(data<0); -data(empties)=0; -data = uint8(data); % max(noalle) oltava <256 - -pariTaulu = zeros(riviLkm,2); -aPointer=1; -for a=1:ninds-1 - pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; - pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; - aPointer = aPointer+ninds-a; -end - -%eka = pariTaulu(:,ones(1,rowsFromInd)); -%eka = eka * rowsFromInd; -%miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -%eka = eka - miinus; - -koot = initRows(:,2) - initRows(:,1); -maxSize = max(koot) + 1; - -rows = zeros(ninds, maxSize); - -for i=1:ninds - apu = initRows(i,1):initRows(i,2); - rows(i, 1:length(apu)) = apu; -end -eka = zeros(riviLkm, maxSize); -toka = zeros(riviLkm, maxSize); - -for i = 1:riviLkm - eka(i, :) = rows(pariTaulu(i, 1), :); - toka(i, :) = rows(pariTaulu(i,2), :); -end - -%eka = uint16(eka); -%toka = uint16(toka); - -summa = zeros(riviLkm,1); -vertailuja = zeros(riviLkm,1); - -clear pariTaulu; clear miinus; - -x = zeros(size(eka)); x = uint8(x); -y = zeros(size(toka)); y = uint8(y); - -for j=1:nloci; - - for k=1:maxSize - I = find(eka(:,k)>0); - x(I,k) = data(eka(I,k),j); - I = find(toka(:,k)>0); - y(I,k) = data(toka(I,k),j); - end - - for a=1:maxSize - for b=1:maxSize - vertailutNyt = double(x(:,a)>0 & y(:,b)>0); - vertailuja = vertailuja + vertailutNyt; - lisays = (x(:,a)~=y(:,b) & vertailutNyt); - summa = summa+double(lisays); - end - end -end - -clear x; clear y; clear vertailutNyt; -nollat = find(vertailuja==0); -dist = zeros(length(vertailuja),1); -dist(nollat) = 1; -muut = find(vertailuja>0); -dist(muut) = summa(muut)./vertailuja(muut); -clear summa; clear vertailuja; - -Z = linkage(dist'); - -%---------------------------------------------------------------------------------------- - - -function [Z, distances]=getDistances(data_matrix,nclusters) - -%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -%gives partition in 8-bit format -%allocates all alleles of a single individual into the same basket -%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -%missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -distances=zeros(nchoosek(n,2),1); -pointer=1; -for i=1:n-1 - i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); - for j=i+1:n - d_ij=0; - j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); - vertailuja = 0; - for k=1:size(i_data,1) - for l=1:size(j_data,1) - here_i=find(i_data(k,:)>=0); - here_j=find(j_data(l,:)>=0); - here_joint=intersect(here_i,here_j); - vertailuja = vertailuja + length(here_joint); - d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); - end - end - d_ij = d_ij / vertailuja; - distances(pointer)=d_ij; - pointer=pointer+1; - end -end - -Z=linkage(distances'); - - - -%---------------------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile) - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = 0; -end - - -%----------------------------------------------------------------------------------- -% Laskee arvot cliqcounts:lle ja sepcounts:lle - -function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops, PARTITION) - -ncliq = size(cliques,1); -nsep = size(separators,1); - -cliqPartition = zeros(ncliq, size(cliques,2)); -sepPartition = zeros(nsep, size(separators, 2)); - -apuCliq = find(cliques > 0); -apuSep = find(separators > 0); - -cliqPartition(apuCliq) = PARTITION(cliques(apuCliq)); -sepPartition(apuSep) = PARTITION(separators(apuSep)); - - -cliqcounts = zeros(ncliq, npops); -for i = 1:npops - cliqcounts(:,i) = sum(cliqPartition == i, 2); -end - - -sepcounts = zeros(nsep, npops); -for i = 1:npops - sepcounts(:,i) = sum(sepPartition == i, 2); -end - -%------------------------------------------------------------------------- - -function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds) -% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen? -% separators) kun yksilöt inds siirretään. -% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta -% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt -% siirretään. - -ncliq = size(cliques,1); -diffInCliqCounts = zeros(ncliq,1); -ninds = length(inds); -for i = 1:ninds - ind = inds(i); - rivit = sum((cliques == ind),2); - diffInCliqCounts = diffInCliqCounts + rivit; -end - -%----------------------------------------------------------------------- - -function [logml, spatialPrior] = computeLogml(adjprior,priorTerm, ... - CLIQCOUNTS, SEPCOUNTS, ... - COUNTS, SUMCOUNTS) - -notEmpty = any(CLIQCOUNTS); -npops = length(find(notEmpty == 1)); -sumcliq=sum(CLIQCOUNTS, 2); -sumsep=sum(SEPCOUNTS, 2); -ncliq = size(CLIQCOUNTS, 1); -nsep = size(SEPCOUNTS, 1); - -cliqsizes = sum(CLIQCOUNTS, 2)'; -sepsizes = sum(SEPCOUNTS, 2)'; -cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])'; -sepsizes = min([sepsizes; npops*ones(1,nsep)])'; - -klikkitn = sum(sum(gammaln(CLIQCOUNTS(:,notEmpty) + repmat(1./cliqsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./cliqsizes))) ... - - sum(gammaln(sumcliq + 1)); - -septn = sum(sum(gammaln(SEPCOUNTS(:,notEmpty) + repmat(1./sepsizes, [1 npops])))) ... - - sum(npops*(gammaln(1./sepsizes))) ... - - sum(gammaln(sumsep + 1)); - - -%klikkitn = sum(sum(gammaln(CLIQCOUNTS + 1/npops))) ... -% - ncliq*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumcliq + 1)); -%septn = sum(sum(gammaln(SEPCOUNTS + 1/npops))) ... -% - nsep*npops*(gammaln(1/npops)) ... -% - sum(gammaln(sumsep + 1)); - -spatialPrior = (klikkitn - septn); - -%if spatialPrior > 0 -% keyboard -%end - -x = size(COUNTS,1); -y = size(COUNTS,2); -z = size(COUNTS,3); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 z]) + COUNTS) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS),2) - priorTerm; - -logml = sum(popLogml) + spatialPrior; -%-------------------------------------------------------------------------- - - -function initializeGammaln(ninds, rowsFromInd, maxSize) -%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j) -global GAMMA_LN; -GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxSize); -for i=1:(ninds+1)*rowsFromInd - for j=1:maxSize - GAMMA_LN(i,j)=gammaln((i-1) + 1/j); - end -end - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%---------------------------------------------------------------------------- - - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format 1168'); fclose(fid); - return -end -if (testaaPop(line1)==1 || testaaPop(line2)==1) - disp('Incorrect file format 1172'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format 1180'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format 1185'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1195'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format 1206'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format 1212'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format 1217'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format 1228'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - -%------------------------------------------------------ - - -function [data, popnames] = lueGenePopData(tiedostonNimi) - -fid = fopen(tiedostonNimi); -line = fgetl(fid); %ensimmäinen rivi -line = fgetl(fid); %toinen rivi -count = rivinSisaltamienMjonojenLkm(line); - -line = fgetl(fid); -lokusRiveja = 1; -while (testaaPop(line)==0) - lokusRiveja = lokusRiveja+1; - line = fgetl(fid); -end - -if lokusRiveja>1 - nloci = lokusRiveja; -else - nloci = count; -end - -popnames = cell(10,2); -data = zeros(100, nloci+1); -nimienLkm=0; -ninds=0; -poimiNimi=1; -digitFormat = -1; -while line ~= -1 - line = fgetl(fid); - - if poimiNimi==1 - %Edellinen rivi oli 'pop' - nimienLkm = nimienLkm+1; - ninds = ninds+1; - if nimienLkm>size(popnames,1); - popnames = [popnames; cell(10,2)]; - end - nimi = lueNimi(line); - if digitFormat == -1 - digitFormat = selvitaDigitFormat(line); - divider = 10^digitFormat; - end - popnames{nimienLkm, 1} = {nimi}; %Näin se on greedyMix:issäkin?!? - popnames{nimienLkm, 2} = ninds; - poimiNimi=0; - - data = addAlleles(data, ninds, line, divider); - - elseif testaaPop(line) - poimiNimi = 1; - - elseif line ~= -1 - ninds = ninds+1; - data = addAlleles(data, ninds, line, divider); - end -end - -data = data(1:ninds*2,:); -popnames = popnames(1:nimienLkm,:); -fclose(fid); - -npops = size(popnames,1); -ind = 1; -for pop = 1:npops - if pop0 - fid = fopen(outPutFile,'a'); -else - fid = -1; - diary(OUTPUT_FILE); % save in text anyway. -end - -dispLine; -disp('RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:'); -disp(['Data file: ' inputFile]); -disp(['Number of clustered groups: ' ownNum2Str(ninds)]); -disp(['Number of clusters in optimal partition: ' ownNum2Str(npops)]); -disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -disp(' '); -if (fid ~= -1) - fprintf(fid,'%s \n', ['RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Number of clusters in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); - fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -end - -cluster_count = length(unique(PARTITION)); -disp(['Best Partition: ']); -if (fid ~= -1) - fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -end -for m=1:cluster_count - indsInM = find(PARTITION==m); - length_of_beginning = 11 + floor(log10(m)); - cluster_size = length(indsInM); - - if names - text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; - for k = 2:cluster_size - text = [text ', ' char(popnames{indsInM(k)})]; - end; - else - text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; - for k = 2:cluster_size - text = [text ', ' num2str(indsInM(k))]; - end; - end - text = [text '}']; - while length(text)>58 - %Take one line and display it. - new_line = takeLine(text,58); - text = text(length(new_line)+1:end); - disp(new_line); - if (fid ~= -1) - fprintf(fid,'%s \n',[new_line]); - fprintf(fid,'\n'); - end - if length(text)>0 - text = [blanks(length_of_beginning) text]; - else - text = []; - end; - end; - if ~isempty(text) - disp(text); - if (fid ~= -1) - fprintf(fid,'%s \n',[text]); - fprintf(fid,'\n'); - end - end -end - -disp(' '); -disp(' '); -disp('Changes in log(marginal likelihood) if group i is moved to cluster j:'); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if group i is moved to cluster j:']); fprintf(fid, '\n'); -end - -if names - nameSizes = zeros(ninds,1); - for i = 1:ninds - nimi = char(popnames{i}); - nameSizes(i) = length(nimi); - end - maxSize = max(nameSizes); - maxSize = max(maxSize, 5); - erotus = maxSize - 5; - alku = blanks(erotus); - ekarivi = [alku 'group' blanks(6+erotus)]; -else - ekarivi = 'group '; -end - -for i = 1:cluster_count - ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -end -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); -end - -varmuus = zeros(ninds,1); -changesInLogml = LOGDIFF'; - -for ind = 1:ninds - %[muutokset, diffInCounts] = laskeMuutokset(ind, globalRows, data, ... - % adjprior, priorTerm, logml, cliques, separators); - %changesInLogml(:,ind) = muutokset; - muutokset = changesInLogml(:,ind); - if sum(exp(muutokset))>0 - varmuus(ind) = 1 - 1/sum(exp(muutokset)); - else - varmuus(ind) = 0; - end - if names - nimi = char(popnames{ind}); - rivi = [blanks(maxSize - length(nimi)) nimi ':']; - else - rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; - end - for j = 1:npops - rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); - end -end - -disp(' '); disp(' '); -disp('KL-divergence matrix:'); -dist_mat = zeros(npops, npops); -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); -end - -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -% ekarivi = blanks(7); -% for pop = 1:npops -% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -% end -ekarivi = num2str(npops); -disp(ekarivi); -if (fid ~= -1) - fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); -end - -for pop1 = 1:npops - rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; - for pop2 = 1:pop1-1 - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - % rivi = [rivi kldiv2str(div) ' ']; - dist_mat(pop1,pop2) = div; - end -% disp(rivi); -% if (fid ~= -1) -% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -% end -end - -dist_mat = dist_mat + dist_mat'; % make it symmetric -for pop1 = 1:npops - rivi = ['Cluster_' num2str(pop1) ' ']; - for pop2 = 1:npops - rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; - end - disp(rivi); - if (fid ~= -1) - fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); - end -end - -disp(' '); -disp(' '); -disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -end - -partitionSummaryKaikki = partitionSummary; -partitionSummary =[]; -for i=1:size(partitionSummaryKaikki,3) - partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)]; -end -[I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49); -partitionSummaryKaikki = partitionSummaryKaikki(I,:,:); -%keyboard - - -partitionSummary = sortrows(partitionSummary,2); -partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -if size(partitionSummary,1)>10 - vikaPartitio = 10; -else - vikaPartitio = size(partitionSummary,1); -end -for part = 1:vikaPartitio - line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end -end - -if ~fixedK - - disp(' '); - disp(' '); - disp('Probabilities for number of clusters'); - - if (fid ~= -1) - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); - fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); - end - - npopsTaulu = unique(partitionSummary(:,1)); - len = length(npopsTaulu); - probs = zeros(len,1); - partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); - sumtn = sum(exp(partitionSummary(:,2))); - for i=1:len - npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); - probs(i) = npopstn / sumtn; - end - for i=1:len - if probs(i)>1e-5 - line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; - disp(line); - if (fid ~= -1) - fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); - end - end - end -end - -if (fid ~= -1) - fclose(fid); -else - diary off -end - -%--------------------------------------------------------------- - - -function dispLine; -disp('---------------------------------------------------'); - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%-------------------------------------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - - -%-------------------------------------------------------------------------- - - -function [ninds, data, heds] = testFastaData(inFile) -% added by Lu Cheng, 05.12.2012 -if ~exist(inFile,'file') - error('Fasta file %s does not exist!\n',inFile); -end - -[heds, seqs]=fastaread(inFile); -ninds = length(seqs); - -data = cell2mat(seqs(:)); -newData = ones(size(data))*-9; -newData(ismember(data,'Aa'))=1; -newData(ismember(data,'Cc'))=2; -newData(ismember(data,'Gg'))=3; -newData(ismember(data,'Tt'))=4; -data = [newData (1:ninds)']; - - -%-------------------------------------------------------------------------- - -function [cliques, separators, vorPoints, vorCells, pointers] ... - = handleCoords(coordinates) -%Laskee yksilöiden luonnolliset naapurit koordinaateista. -%Naapurit lasketaan lisäämäll?koordinaatteihin pisteit? -%jotta kutakin yksilöä vastaisi rajoitettu voronoi-solu -%Puuttuvat koordinaatit (negatiiviset) tulevat erakkopisteiksi -% -%Määrittää lisäksi yksilöit?vastaavat voronoi tesselaation solut. -%vorPoints:ssa on solujen kulmapisteet ja vorCells:ss?kunkin solun -%kulmapisteiden indeksit. Pointers{i} sisältää solussa i olevien yksilöiden -%indeksit. - - - -ninds = length(coordinates); -[I,J] = find(coordinates>0 | coordinates <0); %Käsitellään vain yksilöit? joilta koordinaatit -I = unique(I); %olemassa -ncoords = length(I); -puuttuvat = setdiff(1:ninds, I); -new_coordinates = addPoints(coordinates(I,:)); %Ympäröidään yksilöt apupisteill? - - -apuData = [new_coordinates(1:ncoords,:) (1:ncoords)']; -apuData = sortrows(apuData,[1 2]); -erot = [diff(apuData(:,1)) diff(apuData(:,2))]; -empties = find(erot(:,1)==0 & erot(:,2)==0); -samat = cell(length(empties),1); -pointer = 0; - -for i = 1:length(empties) - if i == 1 | empties(i) - empties(i-1) > 1 %Tutkitaan onko eri pisteess?kuin edellinen - pointer = pointer+1; - samat{pointer} = [apuData(empties(i),3) apuData(empties(i)+1,3)]; - else - samat{pointer} = [samat{pointer} apuData(empties(i)+1,3)]; - end -end - -samat = samat(1:pointer); - -erot = []; apuData = []; empties = []; - -%tri = delaunay(new_coordinates(:,1), new_coordinates(:,2), {'Qt','Qbb','Qc','Qz'}); %Apupisteiden takia ok. -tri = delaunay(new_coordinates(:,1), new_coordinates(:,2)); -%[rivi,sarake] = find(tri>ncoords); %Jätetään huomiotta apupisteet -%tri(rivi,:) = []; -pituus = tri(:,1); -pituus = length(pituus); -parit = zeros(6*pituus,2); -for i = 1:pituus %Muodostetaan kolmikoista parit - j = 6*(i-1)+1; - parit(j,:) = tri(i,1:2); - parit(j+1,:) = tri(i,1:2:3); - parit(j+2,:) = tri(i,2:3); - parit(j+3:j+5,:) = [parit(j:j+2,2) parit(j:j+2,1)]; -end -parit = unique(parit,'rows'); -[rivi,sarake] = find(parit>ncoords); %Jätetään huomiotta apupisteet -parit(rivi,:) = []; -parit = I(parit); %Otetaan poistetut takaisin mukaan -graph = sparse(parit(:,1),parit(:,2),1, ninds, ninds); - - -%Kopioidaan samassa pisteess?olevien yksilöiden naapurustot -%silt? jolle ne laitettu. - - for i = 1:length(samat); - taulu = I(samat{i}); - [rivi,sarake] = find(graph(taulu,:)>0); - if length(rivi) > 0 - kopioitava = graph(taulu(rivi(1)),:); - for j = 1:length(taulu); - graph(taulu(j),:) = kopioitava; - graph(:,taulu(j)) = kopioitava'; - end - end - end - - %Asetetaan samassa pisteess?olevat yksilöt toistensa naapureiksi - - for i = 1:length(samat) - for j = I(samat{i}) - for k = I(samat{i}) - if k ~= j - graph(j,k) = 1; - end - end - end - end - -%Laskee maksimin klikkien ja separaattorien koolle -%Määritetään myös klikit ja separaattorit - -[ncliq, nsep, cliq, sep] = laskeKlikit(graph, ninds, ninds); - -sumcliq = sum(ncliq); -sumsep = sum(nsep); -maxCliqSize = max(find(sumcliq > 0)); -maxSepSize = max(find(sumsep > 0)); - -cliques = zeros(length(cliq), maxCliqSize); -separators = zeros(length(sep), maxSepSize); - -nollia = zeros(1, length(cliq)); -for i = 1:length(cliq); - klikki = cliq{i}; - if length(klikki)>1 - cliques(i, 1:length(klikki)) = klikki; - else - nollia(i)=1; - end -end -cliques(find(nollia==1), :) = []; - -for i = 1:length(sep); - klikki = sep{i}; - separators(i, 1:length(klikki)) = klikki; -end - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%Määritetään yksilöit?vastaavat voronoi tesselaation solut - -[vorPoints, vorCells] = voronoin(new_coordinates, {'Qbb', 'Qz'}); - -bounded = ones(length(vorCells),1); -for i=1:length(vorCells) - if isempty(vorCells{i}) || length(find(vorCells{i}==1))>0 - bounded(i)=0; - end -end - - - -vorCells = vorCells(find(bounded == 1)); -pointers = cell(length(vorCells),1); -empties = zeros(1,length(vorCells)); -X = coordinates(:,1); -Y = coordinates(:,2); - -for i=1:length(pointers) - vx = vorPoints(vorCells{i},1); - vy = vorPoints(vorCells{i},2); - IN = inpolygon(X,Y,vx,vy); - if any(IN)==0 - empties(i) = 1; - else - pointers{i} = find(IN ==1)'; - end -end - -vorCells = vorCells(find(empties == 0)); -pointers = pointers(find(empties == 0)); - -%-------------------------------------------------------------------------- - -function [ncliques, nseparators, cliques, separators] = ... - laskeKlikit(M, maxCliqSize,maxSepSize) -%Laskee samankokoisten klikkien määrän verkosta M -%ncliques(i)=kokoa i olevien klikkien määr? -%nseparators vastaavasti - -ncliques=zeros(1,maxCliqSize); -nseparators=zeros(1,maxSepSize); - -if isequal(M,[]) - return; -end - -[cliques,separators]=findCliques(M); - -for i=1:length(cliques) - ncliques(length(cliques{i}))=ncliques(length(cliques{i}))+1; -end - -%cliqmax=max(find(ncliques~=0)); -%ncliques=ncliques(1:cliqmax); - -for i=1:length(separators) - nseparators(length(separators{i}))=nseparators(length(separators{i}))+1; -end - -%sepmax=max(find(nseparators~=0)); -%nseparators=nseparators(1:sepmax); - -%-------------------------------------------------------------------------- - -function C = mysetdiff(A,B) -% MYSETDIFF Set difference of two sets of positive integers (much faster than built-in setdiff) -% C = mysetdiff(A,B) -% C = A \ B = { things in A that are not in B } -% -% Original by Kevin Murphy, modified by Leon Peshkin - -if isempty(A) - C = []; - return; -elseif isempty(B) - C = A; - return; -else % both non-empty - bits = zeros(1, max(max(A), max(B))); - bits(A) = 1; - bits(B) = 0; - C = A(logical(bits(A))); -end - - -%-------------------------------------------------------------------------- - -function logml = checkLogml(priorTerm, adjprior, cliques, separators) -% tarkistaa logml:n - -global CLIQCOUNTS; -global SEPCOUNTS; -global PARTITION; - -npops = length(unique(PARTITION)); -[cliqcounts, sepcounts] = computeCounts(cliques, separators, npops); - -CLIQCOUNTS = cliqcounts; -SEPCOUNTS = sepcounts; - - -[logml, spatialPrior] = computeLogml(adjprior, priorTerm); - -disp(['logml: ' logml2String(logml) ', spatial prior: ' logml2String(spatialPrior)]); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; - -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end - -%-------------------------------------------------------------------------- - -% function viallinen = testaaKoordinaatit(ninds, coordinates) -% % Testaa onko koordinaatit kunnollisia. -% -% viallinen = 1; -% if ~isnumeric(coordinates) -% return -% end -% -% oikeanKokoinen = (size(coordinates,1) == ninds) & (size(coordinates,2) == 2); -% if oikeanKokoinen -% viallinen = 0; -% end - -function [viallinen coordinates] = testaaKoordinaatit(ninds, coordinates) -% Testaa onko koordinaatit kunnollisia. -% modified by Lu Cheng, 05.12.2012 - -viallinen = 1; -if ~isnumeric(coordinates) - warning('Coordinates are not numerical!'); - return; -end - -oikeanKokoinen = (size(coordinates,1) == ninds) & (size(coordinates,2) == 2); -if ~oikeanKokoinen - warning('Wrong coordinates dimension!'); - return; -end - -posstr = cellfun(@(x) sprintf('%.10f',x),num2cell(coordinates),'UniformOutput',false); -posstr = cellfun(@(x) regexprep(x,'0+$',''),posstr,'UniformOutput',false); - -uni1 = unique(posstr(:,1)); -uni2 = unique(posstr(:,2)); -posstr_new = posstr; - -if length(uni1)==ninds && length(uni2)==ninds - viallinen = 0; - return; -else - ans = questdlg('Input coordinates are not unique. Do you want to make them unique?','coordinates NOT unique', 'Yes','No','Yes'); - if strcmp(ans,'No') - warning('Coordinates are not unique!'); - return; - end -end - -for i=1:length(uni1) - tmpinds = find(ismember(posstr(:,1),uni1(i))); - tmpNinds = length(tmpinds); - - if tmpNinds==1 - continue; - end - - assert(tmpNinds<100); - tmparr = round(linspace(0,99,tmpNinds)); - tmparr = tmparr(randperm(tmpNinds)); - - for j=1:tmpNinds - posstr_new{tmpinds(j),1}=sprintf('%s%02d',posstr{tmpinds(j),1},tmparr(j)); - end -end - -for i=1:length(uni2) - tmpinds = find(ismember(posstr(:,2),uni2(i))); - tmpNinds = length(tmpinds); - - if tmpNinds==1 - continue; - end - - assert(tmpNinds<100); - tmparr = round(linspace(0,99,tmpNinds)); - tmparr = tmparr(randperm(tmpNinds)); - - for j=1:tmpNinds - posstr_new{tmpinds(j),2}=sprintf('%s%02d',posstr{tmpinds(j),2},tmparr(j)); - end -end - -coordinates = cellfun(@str2double,posstr_new); -uni1 = unique(coordinates(:,1)); -uni2 = unique(coordinates(:,2)); -if length(uni1)==ninds && length(uni2)==ninds - viallinen = 0; -else - warning('Can not make coordinates unique!'); -end - -%-------------------------------------------------------------------------- - - -function [sumcounts, counts] = ... - initialCounts(partition, data, npops, noalle) - -nloci=size(data,2); -% ninds = size(rows, 1); - -%koot = rows(:,1) - rows(:,2) + 1; -%maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - - -%-------------------------------------------------------------------------- - -function [popnames2, rowsFromInd] = findOutRowsFromInd(popnames, rows) - -ploidisuus = questdlg('Specify the type of individuals in the data: ',... - 'Individual type?', 'Haploid', 'Diploid', 'Tetraploid', ... - 'Diploid'); - -switch ploidisuus -case 'Haploid' - rowsFromInd = 1; -case 'Diploid' - rowsFromInd = 2; -case 'Tetraploid' - rowsFromInd = 4; -end - -if ~isempty(popnames) - for i = 1:size(rows,1) - popnames2{i,1} = popnames{i,1}; - rivi = rows(i,1):rows(i,2); - popnames2{i,2} = (rivi(rowsFromInd))/rowsFromInd; - end -else - popnames2 = []; -end - -%-------------------------------------------------------------------------- - -function fiksaaPartitioYksiloTasolle(rows, rowsFromInd) - -global PARTITION; -totalRows = 0; -for ind = 1:size(rows,1) - totalRows = totalRows + (rows(ind,2)-rows(ind,1)+1); -end -partitio2 = zeros(totalRows/rowsFromInd,1); - -for ind = 1:size(rows,1) - kaikkiRivit = rows(ind,1):rows(ind,2); - for riviNumero = rowsFromInd:rowsFromInd:length(kaikkiRivit) - %for riviNumero = rowsFromInd:rowsFromInd:length(rows{ind}) - %rivi = rows{ind}(riviNumero); - rivi = kaikkiRivit(riviNumero); - partitio2(rivi/rowsFromInd) = PARTITION(ind); - end -end -PARTITION = partitio2;