Skip to content

Commit

Permalink
fix: avoid Penr statistic calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
edkerk committed Oct 19, 2024
1 parent 3ac5563 commit 61f9b12
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 18 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/gene-essentiality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ jobs:
ihuman = readYAMLmodel('model/Human-GEM.yml');
taskStruct = parseTaskList('data/metabolicTasks/metabolicTasks_Essential.txt');
[~, eGenes] = evalc('estimateEssentialGenes(ihuman, ''Hart2015_RNAseq.txt'', taskStruct);');
output = transpose(evaluateHart2015Essentiality(eGenes));
output = evaluateHart2015Essentiality(eGenes);
fid = fopen('data/testResults/gene-essential.csv','w');
fprintf(fid,[repmat('%s,',1,13) '%s\n'],output{:,1});
fprintf(fid,['%s,%d,%d,%d,%d' repmat(',%.4g',1,9) '\n'],output{:,2:end});
fprintf(fid,[repmat('%s,',1,13) '%s\n'],transpose(output{1,:}));
fprintf(fid,['%s,%d,%d,%d,%d' repmat(',%.4g',1,9) '\n'],transpose(output{2:end,:});
fclose(fid);
disp(evaluateHart2015Essentiality(eGenes));" | awk 'NR>9 && !/^\.+/') &&
disp(output);" | awk 'NR>9 && !/^\.+/') &&
PARSED_RESULTS="${TEST_RESULTS//'%'/'%25'}" &&
PARSED_RESULTS="${PARSED_RESULTS//$'\n'/'<br>'}" &&
PARSED_RESULTS="${PARSED_RESULTS//$'\r'/'<br>'}" &&
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@
% Table S2 from the Hart2015 datasets:
% In this study, essential genes are, by definition, a subset of fitness genes
x = readtable('Hart2015_TableS2.xlsx');

% remove duplicated rows (MARCH1 and MARCH2 genes)
ind1 = find(ismember(x.Gene,'MARCH1'),1,'last');
ind2 = find(ismember(x.Gene,'MARCH2'),1,'last');
x([ind1;ind2],:) = [];

% extract gene list and cell types
genes = x.Gene;
celltypes = regexprep(upper(x.Properties.VariableNames(3:7)'),'BF_','');

% for each cell type, determine the "fitness" genes, defined as those with
% a 5% FDRs were chosen as thresholds (the values are extracted from the
% supporting information of the Hart 2015 paper. Genes observed in 3 or more
Expand All @@ -63,7 +63,7 @@
% also get the genes that were essential in all 5 cell lines ("all")
celltypes(end+1) = {'all'};
fitness_mat(:,end+1) = all(fitness_mat == 1,2);

% put Hart2015 essentiality data into data structure for comparison
expdata = {};
expdata.genes = genes;
Expand Down Expand Up @@ -94,30 +94,31 @@
% calculate true and false positives and negatives
[TP,TN,FP,FN,Penr] = deal(nan(numel(tissues),1)); % initialize variables
for i = 1:numel(tissues)

[~,tissue_ind] = ismember(tissues(i), expdata.tissues);
if tissue_ind == 0
continue % a few tissues are missing from the DepMap dataset
end

modelGenes = eGenes.refModel.genes;
if strcmpi(tissues{i},'all')
modelEssential = modelGenes(sum(modelPred,2) == size(modelPred,2));
else
modelEssential = modelGenes(modelPred(:,i));
end
modelNonEssential = setdiff(modelGenes,modelEssential);

expGenes = expdata.genes(~isnan(expdata.essential(:,tissue_ind)));
expEssential = expdata.genes(expdata.essential(:,tissue_ind) == 1);
expNonEssential = setdiff(expGenes,expEssential);

TP(i) = sum(ismember(modelEssential, expEssential)); % true positives
TN(i) = sum(ismember(modelNonEssential, expNonEssential)); % true negatives
FP(i) = sum(ismember(modelEssential, expNonEssential)); % false positives
FN(i) = sum(ismember(modelNonEssential, expEssential)); % false negatives
FN(i) = sum(ismember(modelNonEssential, expEssential)); % false negatives

Penr(i) = EnrichmentTest(intersect(modelGenes,expGenes), intersect(modelEssential,expGenes), intersect(expEssential,modelGenes));
% Requires Statistics and Machine Learning Toolbox
%Penr(i) = EnrichmentTest(intersect(modelGenes,expGenes), intersect(modelEssential,expGenes), intersect(expEssential,modelGenes));
end

% calculate some metrics
Expand All @@ -126,13 +127,15 @@
accuracy = (TP + TN)./(TP + TN + FP + FN);
F1 = 2*TP./(2*TP + FP + FN);
MCC = ((TP.*TN) - (FP.*FN))./sqrt((TP+FP).*(TP+FN).*(TN+FP).*(TN+FN)); % Matthews correlation coefficient
PenrAdj = adjust_pvalues(Penr,'Benjamini');
%PenrAdj = adjust_pvalues(Penr,'Benjamini');

% get results for cell types
results = [{'cellLine','TP','TN','FP','FN','accuracy','sensitivity','specificity','F1','MCC','Penr','logPenr','PenrAdj','logPenrAdj'};
[tissues, num2cell([TP, TN, FP, FN, accuracy, sensitivity, specificity, F1, MCC, Penr, -log10(Penr), PenrAdj, -log10(PenrAdj)])]];

results = [{'cellLine','TP','TN','FP','FN','accuracy','sensitivity','specificity','F1','MCC'};
[tissues, num2cell([TP, TN, FP, FN, accuracy, sensitivity, specificity, F1, MCC])]];

% Including Penr results
%results = [{'cellLine','TP','TN','FP','FN','accuracy','sensitivity','specificity','F1','MCC','Penr','logPenr','PenrAdj','logPenrAdj'};
% [tissues, num2cell([TP, TN, FP, FN, accuracy, sensitivity, specificity, F1, MCC, Penr, -log10(Penr), PenrAdj, -log10(PenrAdj)])]];
end

function [penr,pdep] = EnrichmentTest(pop,sample,successes)
Expand Down

0 comments on commit 61f9b12

Please sign in to comment.