Skip to content

Commit

Permalink
Corrected issue leading to out of memory error with human transcriptome.
Browse files Browse the repository at this point in the history
Transcriptome object now deleted after generating slicedTranscriptome,
immediately before building TRDesigner object.
  • Loading branch information
Rusty Nicovich committed Jan 6, 2020
1 parent 2f5ff9b commit 7028a03
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
# except
!*.m
!./README.md
!/MERFISH_Examples2/MERFISH_Examples2/codebookMusmusculusHypothalamus_v01.csv
!/MERFISH_Examples2/codebookMusmusculusHypothalamus_v01.csv
!/MERFISH_Examples2/readoutsHypothalamus.fasta
!/MERFISH_Examples2/Mus_musculus_proxy.fpkm_tracking
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ Readouts.fasta file
./MERFISH_anlaysis/MERFISH_Examples2/readoutsHypothalamus.fasta


Proxy bulk sequencing file (created with ./MERFISH_anlaysis/MERFISH_Examples2/libraryDesign/makeFPKMfileFromCodebook.m):
Proxy bulk sequencing file (created with ./mFISHcodebookPermutation/makeProxyFPKMfileFromRefSeqfile.py):

./MERFISH_anlaysis/MERFISH_Examples2/Mus_musculus_proxy.fpkm_tracking
./MERFISH_analysis/MERFISH_Examples2/Mus_musculus_proxy.fpkm_tracking


Requires all sub-folders of this repo be on MATLAB path to execute.
Expand Down
64 changes: 61 additions & 3 deletions probe_construction/MERFISHProbeDesign.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function MERFISHProbeDesign(varargin)
function MERFISHProbeDesign(varargin)



Expand Down Expand Up @@ -64,6 +64,7 @@ function MERFISHProbeDesign(varargin)
%-------------------------------------------------------------

% % Mouse
species = 'Mus musculus';
basePath = 'D:\Data\MERFISH\Musmusculus\'; % Base path where all required files can be found

libraryName = ['SMT-M-1002_v2_noIsoSpecificity_70-100Specificity'];
Expand Down Expand Up @@ -142,6 +143,8 @@ function MERFISHProbeDesign(varargin)

keepAllPossibleProbes = true;

debugMode = false;

elseif (nargin == 1) && isa(varargin{1}, 'probeDesign')

obj = varargin{1};
Expand Down Expand Up @@ -187,8 +190,10 @@ function MERFISHProbeDesign(varargin)
codebookPath = fullfile(basePath, obj.codebookPath);
else
codebookPath = obj.codebookPath;
end

end

species = obj.species;

transcriptomeHeaderType = obj.transcriptomeHeaderType;
transcriptomeIDType = obj.transcriptomeIDType;

Expand Down Expand Up @@ -227,6 +232,8 @@ function MERFISHProbeDesign(varargin)

keepAllPossibleProbes = obj.keepAllPossibleProbes;

debugMode = obj.debugMode;

else
error('MERFISHProbeDesign inputs incorrect.');

Expand Down Expand Up @@ -516,6 +523,7 @@ function MERFISHProbeDesign(varargin)
fprintf(logFID, '%s log file\n', libraryName);
fprintf(logFID, 'basePath : %s\n', basePath);
fprintf(logFID, '-----------------------------\n');
fprintf(logFID, 'species : %s\n', species);
fprintf(logFID, 'rawTranscriptomeFasta : %s\n', rawTranscriptomeFasta);
fprintf(logFID, 'fpkmPath : %s\n', fpkmPath);
fprintf(logFID, 'ncRNAPath : %s\n', ncRNAPath);
Expand Down Expand Up @@ -838,6 +846,27 @@ function MERFISHProbeDesign(varargin)
% Slice transcriptome
slicedTranscriptome = transcriptome.Slice('geneID', goodIDs);

fprintf(1, '%s - Sliced transcriptome\n', datestr(datetime));
fprintf(logFID, '%s - Sliced transcriptome\n', datestr(datetime));

% Having memory errors after this part.
% What is needed here?
% slicedTranscriptome
% OTrRNA15
% specificityTable
% isoSpecificityTable
%
% Needed in memory to pass to TRDesigner


% NOT NEEDED
% transcriptome
%
%
% Can clear transcriptome object from memory at this point
clear transcriptome;


%% Create Target Region Designer object
if ~exist(trDesignerPath)

Expand All @@ -864,6 +893,18 @@ function MERFISHProbeDesign(varargin)
%% ------------------------------------------------------------------------
% Create target regions for a specific set of probe properties
%%-------------------------------------------------------------------------



% Apparent that if trRegionsPath object exists and is correct, nothing upstream needs to be loaded.
% Problem is that TRDesigner object does not carry relevant info to validate loaded vs to-build object.
% right now trusting that if path exists w/ correct parameters,
% then rest is OK.
% If this could be done, then trDesignerPathOK check should be done first, then cascade backwards to transcriptomePathOK check,
% then loading only what is not complete.
% Internal hash to make?


if ~exist(trRegionsPath)
% Design target regions
% targetRegions = trDesigner.DesignTargetRegions(...
Expand Down Expand Up @@ -1283,6 +1324,13 @@ function MERFISHProbeDesign(varargin)
fprintf(logFID, '%s - Completed in %d s\n', datestr(datetime), toc(writeTimer));

if keepAllPossibleProbes
if obj.debugMode
assignin('base', 'allHeaders', allHeaders);
assignin('base', 'allSeqs', allSeqs);
end

allSeqs(cellfun(@isempty, allHeaders)) = [];
allHeaders(cellfun(@isempty, allHeaders)) = [];

allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2);

Expand Down Expand Up @@ -1451,6 +1499,16 @@ function MERFISHProbeDesign(varargin)
return;
end

if debugMode
% Debug mode transfers all variables in this workspace into base
% workspace
myVarList=who;

for indVar = 1:length(myVarList)
assignin('base',myVarList{indVar},eval(myVarList{indVar}))
end
end

display('Script completed!');
% Return to calling function
return;
Expand Down
44 changes: 43 additions & 1 deletion probe_construction/TRDesigner.m
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
% -------------------------------------------------------------------------
properties
verbose % A boolean that determines whether the class displays progress
debugMode % boolean for extra debug features
end

properties (SetAccess=protected)
Expand Down Expand Up @@ -67,6 +68,7 @@
% Define defaults
defaults = cell(0,3);
defaults(end+1,:) = {'verbose', 'boolean', true}; % Display progress of construction
defaults(end+1,:) = {'debugMode', 'boolean', false}; % Display progress of construction
defaults(end+1,:) = {'transcriptome', 'freeType', []};
defaults(end+1,:) = {'OTTables', 'freeType', []};
defaults(end+1,:) = {'OTTableNames', 'cell', {}};
Expand Down Expand Up @@ -1009,11 +1011,24 @@ function SetParallel(obj, p)
% -------------------------------------------------------------------------
% Loop over objects in parallel
% -------------------------------------------------------------------------


if obj.debugMode
fprintf(1, 'Assigning TRDesigner variables in main workspace.\n');
assignin('base', 'targetRegionsBuilding', targetRegions);
end

parfor (i=1:length(inds), obj.numPar) % Get out of memory errors in this section
% Maybe because portions
% continue to get added to
% localProps instead of
% pre-allocated?


if obj.debugMode
fprintf(1, 'Building region %d.\n', i);
end

% Compile region properties
regionProps = zeros(6,0);
for l=1:length(regionLength)
Expand All @@ -1030,10 +1045,18 @@ function SetParallel(obj, p)
end
end

if obj.debugMode
fprintf(1, 'Complete region props for %d.\n', i);
end

% Tile regions and return properties of selected regions
selectedRegionProps = TRDesigner.TileRegions(regionProps, ...
threePrimeSpace);

if obj.debugMode
fprintf(1, 'Tiled regions for region %d.\n', i);
end

% Build a new target region object
newTR = TargetRegions('id', ids{i}, ...
'geneName', geneNames{i}, ...
Expand All @@ -1045,14 +1068,33 @@ function SetParallel(obj, p)
'specificity',selectedRegionProps(5,:), ...
'isoSpecificity', selectedRegionProps(6,:));

if obj.debugMode
fprintf(1, 'Built target regions for region %d.\n', i);
end

% Append to growing list of target regions
targetRegions{i} = newTR;


if obj.debugMode
fprintf(1, 'Appended target regions for region %d.\n', i);
end
end

% -------------------------------------------------------------------------
% Flatten objects
% -------------------------------------------------------------------------
targetRegions = [targetRegions{:}];
if obj.debugMode
fprintf(1, 'Flattening target regions...');
end
try
targetRegions = [targetRegions{:}];
fprintf(1, 'Done!\n');
catch me
fprintf(1, 'Error!\n');
rethrow(me);
end


% -------------------------------------------------------------------------
% Display progress
Expand Down
80 changes: 73 additions & 7 deletions probe_construction/executeProbeDesign.m
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,78 @@
%


pd = probeDesign('MO4E4v2', 'mouse', 'C:\Users\ScanningLabAnalysis\Documents\MATLAB\MERFISH_analysis\MERFISH_Examples2\codebook.csv');
set(pd, 'regionGC', [0.43, 0.63], 'regionTm', [66,76], 'isoSpecificity', [0.75, 1], 'specificity', [0.75, 1], 'numProbesPerGene', 92)
set(pd, 'probeSpacing', -20);
set(pd, 'FPKMabundanceThreshold', 0)

set(pd, 'codebookPath', 'D:\Data\MERFISH\Musmusculus\MO4E4\M1_codebook_AIBSFormat.csv')
% pd = probeDesign('Human_40gene_smELT_test201911217', 'human', 'C:\Users\ScanningLabAnalysis\Documents\MATLAB\MERFISH_analysis\MERFISH_Examples2\SMT-H-1002c_codebook.csv');
% set(pd, 'regionGC', [0.43, 0.63], 'regionTm', [66,76], 'isoSpecificity', [0, 1], 'specificity', [0.75, 1], 'numProbesPerGene', 48);
% set(pd, 'probeSpacing', -20);
% set(pd, 'ncRNAPath', 'D:\Data\MERFISH\Homosapiens\Homo_sapiens.GRCh38.ncrna.fa')
% set(pd, 'FPKMabundanceThreshold', 0);
% set(pd, 'rawTranscriptomeFasta', 'D:\Data\MERFISH\Homosapiens\Homo_sapiens_GRCh38_latest_rna.fna');
% set(pd, 'fpkmPath', 'D:\Data\MERFISH\Homosapiens\Homo_sapiens_proxyRandomFPKM.fpkm_tracking');
% set(pd, 'versionMatch', true);
%
% pd.buildLibrary()


%try
% pd = probeDesign();
% logFilePath = 'D:\Data\MERFISH\Homosapiens\Human_MTG_Sequential_20191217_DHS\Human_MTG_Sequential_20191217_DHS.log';
% pd.matchLogFile(logFilePath);
%set(pd, 'libraryName', 'MTG_20191220seq_fromLog', 'species', 'Homo sapiens');
%pd.buildLibrary();
%display('Completed sequential');
%catch mError
% display('Error on MTG_20191220_fromLog');
%end

%try
% pd = probeDesign('Human_MTG_Barcoded_20191220', 'human', 'D:\Data\MERFISH\Homosapiens\Human_MTG_Panel1_barcoded2.csv');
% pd.buildLibrary();
% display('Completed barcoded');
%catch mError
% display('Error on MTG_20191220_fromLog2');
%end
%
% pd = probeDesign('Mouse_VISp_Barcoded_20200102', 'mouse', 'D:\Data\MERFISH\Musmusculus\Mus_musculus_VISp152JLC_barcoded.csv');
% set(pd, 'regionGC', [0.43, 0.63], 'regionTm', [66,76], 'isoSpecificity', [0, 1], 'specificity', [0.75, 1], 'numProbesPerGene', 92);
% set(pd, 'probeSpacing', -20);
% set(pd, 'ncRNAPath', 'D:\Data\MERFISH\Musmusculus\Mus_musculus.GRCm38.ncrna.fa')
% set(pd, 'FPKMabundanceThreshold', 0);
% set(pd, 'rawTranscriptomeFasta','D:\Data\MERFISH\Musmusculus\Mus_musculus.GRCm38.cdna.all.fa');
% set(pd, 'fpkmPath', 'D:\Data\MERFISH\Musmusculus\Mus_musculus_proxyRandomFPKM.fpkm_tracking');
%
% pd.buildLibrary()


%try
% pd = probeDesign();
% pd.matchLogFile('D:\Data\MERFISH\Homosapiens\MTG_20191221seq_fromLog\MTG_20191221seq_fromLog.log');
% set(pd, 'libraryName', 'MTG_20191221seq_fromLog_20200104', 'species', 'Homo sapiens');
% pd.buildLibrary();
%display('Completed sequential');
%catch mError
% display('Build error in executeProbeDesign');
% rethrow(mError)
%end


pd = probeDesign('Mouse_VISp_Barcoded_20200105', 'mouse', 'D:\Data\MERFISH\Musmusculus\Mus_musculus_VISp152JLC_barcoded.csv');
set(pd, 'regionGC', [0.43, 0.63], 'regionTm', [66,76], 'isoSpecificity', [0, 1], 'specificity', [0.75, 1], 'numProbesPerGene', 92);
set(pd, 'probeSpacing', -20);
set(pd, 'ncRNAPath', 'D:\Data\MERFISH\Musmusculus\Mus_musculus.GRCm38.ncrna.fa')
set(pd, 'FPKMabundanceThreshold', 0);
set(pd, 'rawTranscriptomeFasta','D:\Data\MERFISH\Musmusculus\Mus_musculus.GRCm38.cdna.all.fa');
set(pd, 'fpkmPath', 'D:\Data\MERFISH\Musmusculus\Mus_musculus_proxyRandomFPKM.fpkm_tracking');

pd.buildLibrary()












pd.buildLibrary();
11 changes: 7 additions & 4 deletions probe_construction/probeDesign.m
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@
cutPrimersMaxHomologyCross {mustBeNumeric, mustBeNonnegative} = 8;

% Indicate if this is allowed to ignore sequence version number in matching
versionMatch {mustBeNumericOrLogical} = false;
versionMatch {mustBeNumericOrLogical} = true;

doubleHeadedsmELT {mustBeNumericOrLogical} = false;
doubleHeadedsmELT {mustBeNumericOrLogical} = true;

keepAllPossibleProbes {mustBeNumericOrLogical} = true;

debugMode {mustBeNumericOrLogical} = false;

%---------------------------------------------------------------
% Generated paths that could be reused if existing, things match
%
Expand Down Expand Up @@ -181,7 +183,8 @@ function matchLogFile(obj, filePath)
'transcriptomePath', ...
'specificityTablePath', ...
'isoSpecificityTablePath', ...
'trDesignerPath'}
'trDesignerPath', ...
'species'}

if all(isstrprop(ret{2}, 'digit') | (ret{2} == '.') | (ret{2} == 'e') | (ret{2} == '-'))
obj.(ret{1}) = str2double(ret{2});
Expand Down Expand Up @@ -232,7 +235,7 @@ function matchLogFile(obj, filePath)
obj.versionMatch = (ret{2} == '1');

case 'doubleHeadedsmELT'
assignin('base', 'line2', ret)
% assignin('base', 'line2', ret)
obj.doubleHeadedsmELT = (ret{2} == '1');

end
Expand Down

0 comments on commit 7028a03

Please sign in to comment.