Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated PEP to use set seed (42) #2408

Merged
merged 7 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions MetaMorpheus/EngineLayer/FdrAnalysis/FdrInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,16 @@ public class FdrInfo
public double QValueNotch { get; set; }
public double PEP { get; set; }
public double PEP_QValue { get; set; }

/// <summary>
/// Creates a new FdrInfo object where Q-Values and PEP_Qvalues are set to 2 by default
/// This is done to avoid situations where q-values aren't calcualted for a given peptides, but it is still
/// reported in the final results.
/// </summary>
public FdrInfo()
{
QValue = 2;
PEP_QValue = 2;
}
}
}
25 changes: 23 additions & 2 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,27 @@ namespace EngineLayer
{
public class PepAnalysisEngine
{
private int _randomSeed = 42;

/// <summary>
/// This method contains the hyper-parameters that will be used when training the machine learning model
/// </summary>
/// <returns> Options object to be passed in to the FastTree constructor </returns>
public Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options BGDTreeOptions =>
new Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options
{
NumberOfThreads = 1,
NumberOfTrees = 400,
MinimumExampleCountPerLeaf = 10,
NumberOfLeaves = 20,
LearningRate = 0.2,
LabelColumnName = "Label",
FeatureColumnName = "Features",
Seed = _randomSeed,
FeatureSelectionSeed = _randomSeed,
RandomStart = false
};

private static readonly double AbsoluteProbabilityThatDistinguishesPeptides = 0.05;

//These two dictionaries contain the average and standard deviations of hydrophobicitys measured in 1 minute increments accross each raw
Expand Down Expand Up @@ -105,10 +126,10 @@ public string ComputePEPValuesForAllPSMs()
}
}

MLContext mlContext = new MLContext();
MLContext mlContext = new MLContext(seed: _randomSeed);
TransformerChain<BinaryPredictionTransformer<Microsoft.ML.Calibrators.CalibratedModelParametersBase<Microsoft.ML.Trainers.FastTree.FastTreeBinaryModelParameters, Microsoft.ML.Calibrators.PlattCalibrator>>>[] trainedModels = new TransformerChain<BinaryPredictionTransformer<Microsoft.ML.Calibrators.CalibratedModelParametersBase<Microsoft.ML.Trainers.FastTree.FastTreeBinaryModelParameters, Microsoft.ML.Calibrators.PlattCalibrator>>>[numGroups];

var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfTrees: 400);
var trainer = mlContext.BinaryClassification.Trainers.FastTree(BGDTreeOptions);
var pipeline = mlContext.Transforms.Concatenate("Features", TrainingVariables)
.Append(trainer);

Expand Down
4 changes: 4 additions & 0 deletions MetaMorpheus/EngineLayer/PeptideSpectralMatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ protected PeptideSpectralMatch(SpectralMatch psm, List<(int Notch, IBioPolymerWi
: base(psm, bestMatchingPeptides)
{
}

public PeptideSpectralMatch(SpectralMatch psm) : base(psm)
{
}
#endregion


Expand Down
40 changes: 40 additions & 0 deletions MetaMorpheus/EngineLayer/SpectralMatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,46 @@ protected SpectralMatch(SpectralMatch psm, List<(int Notch, IBioPolymerWithSetMo
SpectralAngle = psm.SpectralAngle;
}

public SpectralMatch(SpectralMatch psm)
{
_BestMatchingBioPolymersWithSetMods = psm._BestMatchingBioPolymersWithSetMods;
BaseSequence = PsmTsvWriter.Resolve(_BestMatchingBioPolymersWithSetMods.Select(b => b.Pwsm.BaseSequence)).ResolvedValue;
FullSequence = PsmTsvWriter.Resolve(_BestMatchingBioPolymersWithSetMods.Select(b => b.Pwsm.FullSequence)).ResolvedValue;

ModsChemicalFormula = psm.ModsChemicalFormula;
Notch = psm.Notch;
BioPolymerWithSetModsLength = psm.BioPolymerWithSetModsLength;
OneBasedStartResidue = psm.OneBasedStartResidue;
OneBasedEndResidue = psm.OneBasedEndResidue;
BioPolymerWithSetModsMonoisotopicMass = psm.BioPolymerWithSetModsMonoisotopicMass;
ParentLength = psm.ParentLength;
Accession = psm.Accession;
Organism = psm.Organism;
MatchedFragmentIons = psm.MatchedFragmentIons;
PsmCount = psm.PsmCount;
ModsIdentified = psm.ModsIdentified;
LocalizedScores = psm.LocalizedScores;
ScanNumber = psm.ScanNumber;
PrecursorScanNumber = psm.PrecursorScanNumber;
ScanRetentionTime = psm.ScanRetentionTime;
ScanExperimentalPeaks = psm.ScanExperimentalPeaks;
TotalIonCurrent = psm.TotalIonCurrent;
ScanPrecursorCharge = psm.ScanPrecursorCharge;
ScanPrecursorMonoisotopicPeakMz = psm.ScanPrecursorMonoisotopicPeakMz;
ScanPrecursorMass = psm.ScanPrecursorMass;
FullFilePath = psm.FullFilePath;
ScanIndex = psm.ScanIndex;
FdrInfo = psm.FdrInfo;
Score = psm.Score;
Xcorr = psm.Xcorr;
RunnerUpScore = psm.RunnerUpScore;
IsDecoy = psm.IsDecoy;
IsContaminant = psm.IsContaminant;
DigestionParams = psm.DigestionParams;
BioPolymersWithSetModsToMatchingFragments = psm.BioPolymersWithSetModsToMatchingFragments;
SpectralAngle = psm.SpectralAngle;
}

#endregion

#region FDR
Expand Down
31 changes: 29 additions & 2 deletions MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,37 @@
// this could cause weird PSM FDR issues

Status("Estimating PSM FDR...", Parameters.SearchTaskId);
new FdrAnalysisEngine(psms, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters,
new List<string> { Parameters.SearchTaskId }, analysisType: analysisType, doPEP: doPep, outputFolder: Parameters.OutputFolder).Run();

List<int> psmsAboveQ = new List<int>();
List<int> peptidesAboveQ = new List<int>();
List<int> psmsAbovePepQ = new List<int>();
List<int> peptidesAbovePepQ = new List<int>();

for (int i = 0; i < 10; i++)
{
var tempPsms = psms
.Select(p => new PeptideSpectralMatch(p))
.Cast<SpectralMatch>()
.ToList();
new FdrAnalysisEngine(tempPsms, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters,
new List<string> { Parameters.SearchTaskId }, analysisType: analysisType, doPEP: doPep, outputFolder: Parameters.OutputFolder).Run();
psmsAboveQ.Add(tempPsms.Count(p => p.FdrInfo.QValue <= 0.01));
peptidesAboveQ.Add(tempPsms.Count(p => p.PeptideFdrInfo.QValue <= 0.01));
psmsAbovePepQ.Add(tempPsms.Count(p => p.FdrInfo.PEP_QValue <= 0.01));
peptidesAbovePepQ.Add(tempPsms.Count(p => p.PeptideFdrInfo.PEP_QValue <= 0.01));
}

// write summary text
using(StreamWriter writer = new StreamWriter(@"C:\Users\Alex\PEP_NoRandomOrdering.tsv"))
{
writer.WriteLine("PSMs Q\tPeptides Q\tPSMs PEP-Q\tPeptides PEP-Q");
for(int i = 0; i < 10; i++)
{
writer.WriteLine(String.Join('\t', psmsAboveQ[i], peptidesAboveQ[i], psmsAbovePepQ[i], peptidesAbovePepQ[i]));
}
}
throw new Exception("PEP Seed Results written to file");
Status("Done estimating PSM FDR!", Parameters.SearchTaskId);

Check warning on line 170 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / macos-latest

Unreachable code detected

Check warning on line 170 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / windows-latest

Unreachable code detected

Check warning on line 170 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / macos-latest

Unreachable code detected

Check warning on line 170 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / windows-latest

Unreachable code detected
}

private void ProteinAnalysis()
Expand Down
Loading