Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove code for ignoring ambiguous spectra from MSFReader.cpp #3111

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
83 changes: 1 addition & 82 deletions pwiz_tools/BiblioSpec/src/MSFReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,37 +310,6 @@ namespace BiblioSpec

void MSFReader::collectPsms() {
sqlite3_stmt* statement;
map<int, double> alts; // peptide id --> alt score, for breaking ties when q-values are identical
vector<string> altScoreNames;
altScoreNames.push_back("XCorr");
altScoreNames.push_back("IonScore");

if (tableExists(msfFile_, "TargetPsms")) {
for (vector<string>::const_iterator i = altScoreNames.begin(); i != altScoreNames.end(); ++i) {
if (!columnExists(msfFile_, "TargetPsms", *i)) {
continue;
}
statement = getStmt("SELECT PeptideID, " + *i + " FROM TargetPsms");
while (hasNext(&statement)) {
alts[sqlite3_column_int(statement, 0)] = sqlite3_column_double(statement, 1);
}
break;
}
} else if (tableExists(msfFile_, "PeptideScores") && tableExists(msfFile_, "ProcessingNodeScores")) {
for (vector<string>::const_iterator i = altScoreNames.begin(); i != altScoreNames.end(); ++i) {
statement = getStmt(
"SELECT PeptideID, ScoreValue "
"FROM PeptideScores JOIN ProcessingNodeScores ON PeptideScores.ScoreID = ProcessingNodeScores.ScoreID "
"WHERE ScoreName = '" + *i + "'");
while (hasNext(&statement)) {
alts[sqlite3_column_int(statement, 0)] = sqlite3_column_double(statement, 1);
}
if (!alts.empty()) {
break;
}
}
}

int resultCount, pepConfidence, protConfidence;
PSM_SCORE_TYPE scoreType;
getScoreInfo(&statement, &resultCount, &scoreType, &pepConfidence, &protConfidence);
Expand All @@ -349,7 +318,6 @@ namespace BiblioSpec
ProgressIndicator progress(resultCount);

initFileNameMap();
map<string, ProcessedMsfSpectrum> processedSpectra;
ModSet modSet = ModSet(msfFile_, !versionLess(2, 2) || filtered_);
map<int, int> fileIdMap = getFileIds();

Expand Down Expand Up @@ -390,56 +358,7 @@ namespace BiblioSpec
continue;
}

auto altIter = alts.find(peptideId);
double altScore = (altIter != alts.end()) ? altIter->second : -std::numeric_limits<double>::max();

// check if we already processed a peptide that references this spectrum
auto processedSpectraSearch = processedSpectra.find(specId);
if (processedSpectraSearch != processedSpectra.end()) {
ProcessedMsfSpectrum& processed = processedSpectraSearch->second;
// not an ambigous spectrum (yet)
if (!processed.ambiguous) {
if (qvalue > processed.qvalue || (qvalue == processed.qvalue && altScore < processed.altScore)) { // worse than other score, skip this
Verbosity::debug("Peptide %d (%s) had a worse score than another peptide (%s) "
"referencing spectrum %d (ignoring this peptide).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());
continue;
} else if (qvalue == processed.qvalue && altScore == processed.altScore) { // equal, discard other and skip this
Verbosity::debug("Peptide %d (%s) had the same score as another peptide (%s) "
"referencing spectrum %d (ignoring both peptides).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());

removeFromFileMap(processed.psm);
delete processed.psm;

processed.psm = NULL;
processed.ambiguous = true;
continue;
} else { // better than other score, discard other
Verbosity::debug("Peptide %d (%s) had a better score than another peptide (%s) "
"referencing spectrum %d (ignoring other peptide).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());
removeFromFileMap(processed.psm);
curPSM_ = processed.psm;
curPSM_->mods.clear();
processed.qvalue = qvalue;
processed.altScore = altScore;
}
} else { // ambigous spectrum, check if score is better
Verbosity::debug("Peptide %d (%s) with score %f references same spectrum as other peptides "
"that had score %f.", peptideId, sequence.c_str(), qvalue, processed.qvalue);
if (qvalue < processed.qvalue || (qvalue == processed.qvalue && altScore > processed.altScore)) {
curPSM_ = new PSM();
processedSpectraSearch->second = ProcessedMsfSpectrum(curPSM_, qvalue, altScore);
} else {
continue;
}
}
} else {
// unseen spectrum
curPSM_ = new PSM();
processedSpectra[specId] = ProcessedMsfSpectrum(curPSM_, qvalue, altScore);
}
curPSM_ = new PSM();

if (findItr->second->charge > 0)
curPSM_->charge = findItr->second->charge;
Expand Down
1 change: 1 addition & 0 deletions pwiz_tools/BiblioSpec/tests/Jamfile.jam
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ blib-test-build tinyPlink-proxl : -o : output/tinyPlink-proxl.blib : tinyPlink-p
blib-test-build tinyPeptideProphet-proxl : -o : output/tinyPeptideProphet-proxl.blib : tinyPeptideProphet-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyPeptideProphet.proxl.xml ;
blib-test-build tinyMerox-proxl : -o : output/tinyMerox-proxl.blib : tinyMerox-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyMerox.proxl.xml ;
blib-test-build tiny-msf : --unicode -o : output/tiny-msf.blib : tiny-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.msf ;
blib-test-build tiny-msf-keep : --unicode -o -K : output/tiny-msf-keep.blib : tiny-msf-keep.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.msf ;
blib-test-build tiny-v2-msf : -o : output/tiny-v2-msf.blib : tiny-v2-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2.msf ;
blib-test-build tiny-v2-filtered-pdResult : -o : output/tiny-v2-filtered-pdResult.blib : tiny-v2-filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2-filtered.pdResult ;
blib-test-build md_special_filtered-pdResult : --unicode -o : output/md_special_filtered-pdResult.blib : md_special_filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/md_special_filtered.pdResult ;
Expand Down
50 changes: 50 additions & 0 deletions pwiz_tools/BiblioSpec/tests/reference/tiny-msf-keep.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
libLSID numSpecs majorVersion minorVersion
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf-keep.blib 6 1 10
id RefSpectraID position mass
1 1 1 4.008491
2 1 2 4.008491
3 1 7 4.008491
4 2 3 4.008491
5 2 4 4.008491
6 2 7 4.008491
7 3 1 4.008491
8 3 2 4.008491
9 3 7 4.008491
10 4 1 4.008491
11 4 3 4.008491
12 4 7 4.008491
13 5 1 15.994915
14 5 8 15.994915
15 5 10 4.008491
id peptideSeq precursorMZ precursorCharge peptideModSeq prevAA nextAA copies numPeaks ionMobility collisionalCrossSectionSqA ionMobilityHighEnergyOffset ionMobilityType retentionTime startTime endTime totalIonCurrent moleculeName chemicalFormula precursorAdduct inchiKey otherKeys fileID SpecIDinFile score scoreType
1 KKLVPLK 419.81906737 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 90 0.0 0.0 0.0 0 0.32088 N/A N/A 3448.02592301 N/A N/A N/A N/A N/A 1 2 0.002 1
2 VPKKILK 419.81915892 2 VPK[+4.0]K[+4.0]ILK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.006 1
3 KKLVPLK 419.81915892 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.009 1
4 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
5 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
6 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
id fileName idFileName cutoffScore
1 D:/Elite_MAM/MITO_03.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
2 X:/Test.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
id scoreType probabilityType
0 UNKNOWN NOT_A_PROBABILITY_VALUE
1 PERCOLATOR QVALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
2 PEPTIDE PROPHET SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
3 SPECTRUM MILL NOT_A_PROBABILITY_VALUE
4 IDPICKER FDR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
5 MASCOT IONS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
6 TANDEM EXPECTATION VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
7 PROTEIN PILOT CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
8 SCAFFOLD SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
9 WATERS MSE PEPTIDE SCORE NOT_A_PROBABILITY_VALUE
10 OMSSA EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
11 PROTEIN PROSPECTOR EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
12 SEQUEST XCORR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
13 MAXQUANT SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
14 MORPHEUS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
15 MSGF+ SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
16 PEAKS CONFIDENCE SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
17 BYONIC SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
18 PEPTIDE SHAKER CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
19 GENERIC Q-VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
20 HARDKLOR IDOTP PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
23 changes: 10 additions & 13 deletions pwiz_tools/BiblioSpec/tests/reference/tiny-msf.check
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
libLSID numSpecs majorVersion minorVersion
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf.blib 5 1 10
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf.blib 4 1 10
id RefSpectraID position mass
1 1 1 4.008491
2 1 2 4.008491
3 1 7 4.008491
4 2 3 4.008491
5 2 4 4.008491
4 2 1 4.008491
5 2 3 4.008491
6 2 7 4.008491
7 3 1 4.008491
8 3 3 4.008491
9 3 7 4.008491
10 4 1 15.994915
11 4 8 15.994915
12 4 10 4.008491
7 3 1 15.994915
8 3 8 15.994915
9 3 10 4.008491
id peptideSeq precursorMZ precursorCharge peptideModSeq prevAA nextAA copies numPeaks ionMobility collisionalCrossSectionSqA ionMobilityHighEnergyOffset ionMobilityType retentionTime startTime endTime totalIonCurrent moleculeName chemicalFormula precursorAdduct inchiKey otherKeys fileID SpecIDinFile score scoreType
1 KKLVPLK 419.81906737 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 90 0.0 0.0 0.0 0 0.32088 N/A N/A 3448.02592301 N/A N/A N/A N/A N/A 1 2 0.002 1
2 VPKKILK 419.81915892 2 VPK[+4.0]K[+4.0]ILK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.006 1
3 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
4 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
5 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
2 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
3 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
4 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
id fileName idFileName cutoffScore
1 D:/Elite_MAM/MITO_03.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
2 X:/Test.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
Expand All @@ -43,3 +39,4 @@ id scoreType probabilityType
17 BYONIC SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
18 PEPTIDE SHAKER CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
19 GENERIC Q-VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
20 HARDKLOR IDOTP PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
3 changes: 2 additions & 1 deletion pwiz_tools/Skyline/TestPerf/PerfThermoFAIMSTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ private void TestPopulateDocumentFromLibrary()
// Add all peptides
var filterMatchedPeptidesDlg = ShowDialog<FilterMatchedPeptidesDlg>(viewLibUI.AddAllPeptides);
var docBefore = WaitForProteinMetadataBackgroundLoaderCompletedUI();
using (new CheckDocumentState(1, 8433, 10882, 43484))
using (new CheckDocumentState(1, 8591, 11057, 44174))
{
RunDlg<MultiButtonMsgDlg>(filterMatchedPeptidesDlg.OkDialog, addLibraryPepsDlg =>
{
Expand All @@ -146,6 +146,7 @@ private void TestWizardBuildDocumentLibraryAndFinish(string documentFile)
RunUI(() =>
{
Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.spectra_page);
importPeptideSearchDlg.BuildPepSearchLibControl.IncludeAmbiguousMatches = true;
importPeptideSearchDlg.BuildPepSearchLibControl.AddSearchFiles(SearchFiles);
});
WaitForConditionUI(() => importPeptideSearchDlg.IsEarlyFinishButtonEnabled);
Expand Down
Loading