diff --git a/+kgmlm/+fittingTools/HMCstep_diag.m b/+kgmlm/+fittingTools/HMCstep_diag.m index a9ecef1..caff7de 100644 --- a/+kgmlm/+fittingTools/HMCstep_diag.m +++ b/+kgmlm/+fittingTools/HMCstep_diag.m @@ -39,6 +39,7 @@ nlpost = inf; % divergent trajectory break; end + %% move positions [w, errs] = paramStep(w, p, M, HMC_state); if(errs) @@ -48,7 +49,6 @@ [nlpost, ndW, ~, results] = nlpostFunction(w); - %% move momentums [p, errs] = momentumStep(p, -ndW, HMC_state); if(errs)% divergent trajectory @@ -56,6 +56,7 @@ break; end + %% check current divergence lp_momentum = logProbMomentum(p, M); H_s = -nlpost + lp_momentum; @@ -73,21 +74,16 @@ error('HMC accept probability is nan!'); end catch ee %#ok - %p_accept = 1e-14; - - log_p_accept = nan;%log(p_accept); - w_new = w_init; - results = results_init; - accepted = false; - divergent = true; - -% msgText = getReport(ee,'extended'); -% fprintf('HMC reaching inf/nan values with step size %.4f: %s\n\tAuto-rejecting sample and setting p_accept = %e.\n\tError Message: %s\n',ees,errorMessageStr,p_accept,msgText); -% fprintf('>>end error message<<\n'); - -% fprintf('\t\t>>>HMC sampler reaching numerically unstable values (infinite/nan): rejecting sample early<<<\n'); - + log_p_accept = nan; + w_new = w_init; + results = results_init; + accepted = false; + divergent = true; + %msgText = getReport(ee,'extended'); + %fprintf('HMC reaching inf/nan values with step size %.4f: %s\n\tAuto-rejecting sample and setting p_accept = %e.\n\tError Message: %s\n',ees,errorMessageStr,p_accept,msgText); + %fprintf('>>end error message<<\n'); + %fprintf('\t\t>>>HMC sampler reaching numerically unstable values (infinite/nan): rejecting sample early<<<\n'); return; end @@ -108,16 +104,16 @@ end end - +%% gets initial momentum function [vv] = generateMomentum(M) vv = (randn(numel(M),1).*sqrt(M)); end + %% gets the probability of a momentum term function [lp] = logProbMomentum(mm,M) lp = -1/2*sum(M.\mm.^2); end - %% complete parameter step function [w,errs] = paramStep(w, p, M, HMC_state) w(:) = w + HMC_state.stepSize.e*(M.\p(:)); diff --git a/+kgmlm/+fittingTools/adjustHMCstepSize.m b/+kgmlm/+fittingTools/adjustHMCstepSize.m index 2fff801..14d535c 100644 --- a/+kgmlm/+fittingTools/adjustHMCstepSize.m +++ b/+kgmlm/+fittingTools/adjustHMCstepSize.m @@ -19,7 +19,7 @@ ps.delta = ps.delta(min(numel(ps.delta), ww)); tt = ss - sample_1 + 1; - [stepSizeState.x_t,stepSizeState.x_bar_t,stepSizeState.H_sum] = dualAverageStepSizeUpdate_internal(ps, log_p_accept_new, stepSizeState.H_sum, stepSizeState.x_bar_t, tt); + [stepSizeState.x_t, stepSizeState.x_bar_t, stepSizeState.H_sum] = dualAverageStepSizeUpdate_internal(ps, log_p_accept_new, stepSizeState.H_sum, stepSizeState.x_bar_t, tt, log(stepSizeSettings.max_step_size)); stepSizeState.e_bar = exp(stepSizeState.x_bar_t); if(ss == stepSizeSettings.schedule(ww,2)) @@ -32,8 +32,8 @@ end -stepSizeState.e = min(stepSizeSettings.max_step_size, stepSizeState.e ); -stepSizeState.e_bar = min(stepSizeSettings.max_step_size, stepSizeState.e_bar ); +% stepSizeState.e = min(stepSizeSettings.max_step_size, stepSizeState.e ); +% stepSizeState.e_bar = min(stepSizeSettings.max_step_size, stepSizeState.e_bar ); HMC_state.stepSize = stepSizeState; @@ -50,7 +50,7 @@ end %% -function [x_t, x_bar_t, H_sum] = dualAverageStepSizeUpdate_internal(ps, log_h, H_sum, x_bar_t, tt) +function [x_t, x_bar_t, H_sum] = dualAverageStepSizeUpdate_internal(ps, log_h, H_sum, x_bar_t, tt, max_x) if(tt == 1 || isnan(x_bar_t) || isinf(x_bar_t)) %reset estimation x_t = x_bar_t; @@ -72,6 +72,10 @@ H_tt = ps.delta - a_tt; H_sum = aa_H * H_tt + (1 - aa_H) * H_sum; x_t = ps.mu - sqrt(tt)/ps.gamma * H_sum; + if(nargin >= 6) + x_t = min(max_x, x_t); + end + aa_x =tt^(-ps.kappa); x_bar_t = aa_x * x_t + (1 - aa_x)*x_bar_t; end \ No newline at end of file diff --git a/+kgmlm/+utils/logMeanExp.m b/+kgmlm/+utils/logMeanExp.m index 4f7e733..27b3706 100644 --- a/+kgmlm/+utils/logMeanExp.m +++ b/+kgmlm/+utils/logMeanExp.m @@ -4,10 +4,7 @@ end -if(strcmpi(dim, 'all')) - NE = numel(log_x); -else - NE = size(log_x, dim); -end +NE = sum(~isnan(log_x), dim); +NE(NE == 0) = nan; log_m = -log(NE) + kgmlm.utils.logSumExp(log_x,dim); diff --git a/+kgmlm/+utils/logSumExp.m b/+kgmlm/+utils/logSumExp.m index 3e8f76a..884db15 100644 --- a/+kgmlm/+utils/logSumExp.m +++ b/+kgmlm/+utils/logSumExp.m @@ -3,23 +3,27 @@ dim = 2; end +if(size(log_x, dim) == 1) + log_m = log_x; + return; +end -cs = max(log_x, [], dim); +cs = max(log_x, [], dim, 'omitnan'); log_x = log_x - cs; if(isnumeric(dim) && dim == 1 && isa(log_x,'single')) %converts to double for numerical saftey without converting the entire matrix of log_x - just one column at a time log_m = cs; for ii = 1:numel(log_m) - log_m(ii) = log_m(ii) + log(sum(exp(double(log_x(:,ii))))); + log_m(ii) = log_m(ii) + log(sum(exp(double(log_x(:,ii))), 'omitnan')); end elseif(isnumeric(dim) && dim == 2 && isa(log_x,'single')) log_m = cs; for ii = 1:numel(log_m) - log_m(ii) = log_m(ii) + log(sum(exp(double(log_x(ii,:))))); + log_m(ii) = log_m(ii) + log(sum(exp(double(log_x(ii,:))), 'omitnan')); end else - log_m = cs +log(sum(exp(log_x),dim)); + log_m = cs +log( sum(exp(log_x),dim, 'omitnan')); end diff --git a/+kgmlm/+utils/truncatedPoiss.m b/+kgmlm/+utils/truncatedPoiss.m index 22ba0ee..71e958a 100644 --- a/+kgmlm/+utils/truncatedPoiss.m +++ b/+kgmlm/+utils/truncatedPoiss.m @@ -1,4 +1,4 @@ -function [ll] = truncatedPoiss(rr, Y) +function [ll, p0, log_p0] = truncatedPoiss(rr, Y) ll = zeros(size(Y)); @@ -6,7 +6,12 @@ Y0 = Y == 0; rl = rr < -30; -ll(Y1 & rl) = rr(Y1 & rl); +ll(Y1 & rl) = rr(Y1 & rl); ll(Y1 & ~rl) = log(1-exp(-exp(rr(Y1 & ~rl)))); -ll(Y0) = -exp(rr(Y0)); \ No newline at end of file +ll(Y0) = -exp(rr(Y0)); + +if(nargout > 1) + log_p0 = -exp(rr); + p0 = exp(log_p0); +end diff --git a/+kgmlm/@GMLM/GMLM.m b/+kgmlm/@GMLM/GMLM.m index ffb8207..c99a874 100644 --- a/+kgmlm/@GMLM/GMLM.m +++ b/+kgmlm/@GMLM/GMLM.m @@ -1193,14 +1193,14 @@ function delete(obj) J = obj.dim_J; params_0 = params; - if(isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.scaleParams)) - params = obj.GMLMstructure.scaleParams(params_0); - end for jj = 1:J if(isfield(obj.GMLMstructure.Groups(jj), "scaleParams") && ~isempty(obj.GMLMstructure.Groups(jj).scaleParams)) params.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleParams(params_0.Groups(jj)); end end + if(isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.GMLMstructure.scaleParams)) + params = obj.GMLMstructure.scaleParams(params); + end for jj = 1:obj.dim_J shared_regressors(jj).F = obj.getF(params, jj); @@ -1313,6 +1313,7 @@ function delete(obj) for pp = 1:size(obj.trials(mm).Y, 2) rr = log_like_per_trial(mm).log_rate(:, pp) + log(obj.bin_size); + log_like_per_trial(mm).log_like_0(:, pp) = kgmlm.utils.truncatedPoiss(rr, obj.trials(mm).Y(:, pp) ); @@ -1889,37 +1890,20 @@ function delete(obj) useAsync = true; params_0 = params; - scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.scaleParams); - if(scaled_WB) - params = obj.GMLMstructure.scaleParams(params_0); - - if(isfield(opts, "dH")) - opts.dB = opts.dB | opts.dH; - opts.dW = opts.dW | opts.dH; - - if(nargin > 3) - if(opts.dW && isempty(results.dW)) - error("Invalid results struct."); - end - if(opts.dB && isempty(results.dB) && obj.dim_B > 0) - error("Invalid results struct."); - end - end - end - end J = obj.dim_J; scaled_VT = false(J,1); + scaleP = cell(J,1); for jj = 1:J if(isfield(obj.GMLMstructure.Groups(jj), "scaleParams") && ~isempty(obj.GMLMstructure.Groups(jj).scaleParams)) scaled_VT(jj) = true; - params.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleParams(params_0.Groups(jj)); + [params.Groups(jj), scaleP{jj}] = obj.GMLMstructure.Groups(jj).scaleParams(params_0.Groups(jj)); if(isfield(opts.Groups(jj), "dH")) opts.Groups(jj).dV = opts.Groups(jj).dV | opts.Groups(jj).dH; opts.Groups(jj).dT(:) = opts.Groups(jj).dT(:) | opts.Groups(jj).dH; - if(nargin > 3) + if(nargin > 3 && ~isempty(results)) if(opts.Groups(jj).dV && isempty(results.Groups(jj).dV)) error("Invalid results struct."); end @@ -1932,6 +1916,24 @@ function delete(obj) end end end + scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.GMLMstructure.scaleParams); + if(scaled_WB) + [params, scaleP_WB] = obj.GMLMstructure.scaleParams(params); + + if(isfield(opts, "dH")) + opts.dB = opts.dB | opts.dH; + opts.dW = opts.dW | opts.dH; + + if(nargin > 3 && ~isempty(results)) + if(opts.dW && isempty(results.dW)) + error("Invalid results struct."); + end + if(opts.dB && isempty(results.dB) && obj.dim_B > 0) + error("Invalid results struct."); + end + end + end + end if(runHost) if(nargin < 4 || isempty(results)) @@ -1963,11 +1965,11 @@ function delete(obj) if(scaled_WB) - results = obj.Groups(jj).scaleDerivatives(results, params_0, true); + results = obj.GMLMstructure.scaleDerivatives(results, params_0, true, scaleP_WB); end for jj = 1:J if(scaled_VT(jj)) - results.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleDerivatives(results.Groups(jj), params_0.Groups(jj), false); + results.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleDerivatives(results.Groups(jj), params_0.Groups(jj), false, scaleP{jj}); end end results.log_likelihood = sum(results.trialLL, 'all'); @@ -1984,38 +1986,21 @@ function delete(obj) useAsync = true; params_0 = params; - scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.scaleParams); - if(scaled_WB) - params = obj.GMLMstructure.scaleParams(params_0); - - if(isfield(opts, "dH")) - opts.dB = opts.dB | opts.dH; - opts.dW = opts.dW | opts.dH; - - if(nargin > 3) - if(opts.dW && isempty(results.dW)) - error("Invalid results struct."); - end - if(opts.dB && isempty(results.dB) && obj.dim_B > 0) - error("Invalid results struct."); - end - end - end - end J = obj.dim_J; scaled_VT = false(J,1); + scaleP = cell(J,1); for jj = 1:J if(isfield(obj.GMLMstructure.Groups(jj), "scaleParams") && ~isempty(obj.GMLMstructure.Groups(jj).scaleParams)) scaled_VT(jj) = true; - params.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleParams(params_0.Groups(jj)); + [params.Groups(jj), scaleP{jj}] = obj.GMLMstructure.Groups(jj).scaleParams(params_0.Groups(jj)); if(isfield(opts.Groups(jj), "dH")) opts.Groups(jj).dV = opts.Groups(jj).dV | opts.Groups(jj).dH; opts.Groups(jj).dT(:) = opts.Groups(jj).dT(:) | opts.Groups(jj).dH; - if(nargin > 3) + if(nargin > 3 && ~isempty(results)) if(opts.Groups(jj).dV && isempty(results.Groups(jj).dV)) error("Invalid results struct."); end @@ -2028,6 +2013,24 @@ function delete(obj) end end end + scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.GMLMstructure.scaleParams); + if(scaled_WB) + [params, scaleP_WB] = obj.GMLMstructure.scaleParams(params); + + if(isfield(opts, "dH")) + opts.dB = opts.dB | opts.dH; + opts.dW = opts.dW | opts.dH; + + if(nargin > 3 && ~isempty(results)) + if(opts.dW && isempty(results.dW)) + error("Invalid results struct."); + end + if(opts.dB && isempty(results.dB) && obj.dim_B > 0) + error("Invalid results struct."); + end + end + end + end if(runHost) if(nargin < 4 || isempty(results)) @@ -2063,11 +2066,11 @@ function delete(obj) end if(scaled_WB) - results = obj.Groups(jj).scaleDerivatives(results, params_0, true); + results = obj.GMLMstructure.scaleDerivatives(results, params_0, true, scaleP_WB); end for jj = 1:J if(scaled_VT(jj)) - results.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleDerivatives(results.Groups(jj), params_0.Groups(jj), true); + results.Groups(jj) = obj.GMLMstructure.Groups(jj).scaleDerivatives(results.Groups(jj), params_0.Groups(jj), true, scaleP{jj}); end end @@ -2173,10 +2176,13 @@ function delete(obj) [samples, summary, HMC_settings, paramStruct, M] = runHMC_simple(obj, params_init, settings, varargin); [samples, samples_file_format, summary, HMC_settings, paramStruct, M] = runHMC_simpleLowerRAM(obj, params_init, settings, varargin); + [paramStruct2] = saveSampleToFile(obj, samples_file, paramStruct, sample_idx, scaled_WB, scaled_VT, save_H, saveUnscaled); + [samples_file_format, totalParams] = getSampleFileFormat(obj, TotalSamples, dataType_samples, paramStruct, scaled_WB, scaled_VT, saveUnscaled) [HMC_settings] = setupHMCparams(obj, nWarmup, nSamples, debugSettings); [results] = computeLogLikelihood_host_v2(obj, params, opts, results); + [log_rate, xx, R] = computeLogRate_host_v2(obj, params); [] = setupComputeStructuresHost(obj, reset, order); end diff --git a/+kgmlm/@GMLM/computeLogLikelihood_host_v2.m b/+kgmlm/@GMLM/computeLogLikelihood_host_v2.m index 2add63c..6e93e1f 100644 --- a/+kgmlm/@GMLM/computeLogLikelihood_host_v2.m +++ b/+kgmlm/@GMLM/computeLogLikelihood_host_v2.m @@ -21,68 +21,8 @@ P = size(Y,2); isPop = P > 1; -%% linear term -if(isempty(X_lin)) - log_rate = zeros(T, P); -else - if(~iscell(X_lin) && isPop) - log_rate = repmat(X_lin * params.B, [1 1 P]); - elseif(isPop) - log_rate = zeros(T, P); - for pp = 1:P - log_rate(:, pp) = X_lin{pp} * params.B(:,pp); - end - else - log_rate = zeros(T, P); - for pp = 1:P - tt = LL_info.dim_N_neuron_ranges(pp):(LL_info.dim_N_neuron_ranges(pp+1)-1); - log_rate(tt,:) = X_lin{tt} * params.B(:,pp); - end - end -end - -%% add constants -if(isPop) - log_rate = log_rate + params.W(:)'; -else - for pp = 1:P - tt = LL_info.dim_N_neuron_ranges(pp):(LL_info.dim_N_neuron_ranges(pp+1)-1); - log_rate(tt) = log_rate(tt) + params.W(pp); - end -end - - -%% add group terms -R = zeros(J,1); -xx = struct("c", cell(J,1), "a", [], "m", []); -for jj = 1:J - R(jj) = size(params.Groups(jj).V,2); - - xx(jj).c = zeros(T*A(jj), R(jj), D(jj)); - for dd = 1:D(jj) - F = getF(dd, X_groups(jj).factorIdxs , X_groups(jj).dim_F, params.Groups(jj)); - if(X_groups(jj).isShared(dd)) - XF = X_groups(jj).X_local{dd} * F; - xx(jj).c(X_groups(jj).iX_shared(dd).cols, :, dd) = XF(X_groups(jj).iX_shared(dd).rows,:); - elseif(size(X_groups(jj).X_local{dd},1) == T && A(jj) > 1) - xx(jj).c(:, :, dd) = repmat(X_groups(jj).X_local{dd} * F, [A 1]); - else - xx(jj).c(:, :, dd) = X_groups(jj).X_local{dd} * F; - end - end - - xx(jj).m = prod(xx(jj).c,3); - xx(jj).a = squeeze(sum(reshape(xx(jj).m, [T, A(jj), R(jj)]),2)); - - if(isPop) - log_rate = log_rate + xx(jj).a * params.Groups(jj).V'; - else - for pp = 1:P - tt = LL_info.dim_N_neuron_ranges(pp):(LL_info.dim_N_neuron_ranges(pp+1)-1); - log_rate(tt) = log_rate(tt) + xx(jj).a(tt,:)*params.Groups(jj).V(pp,:)'; - end - end -end +%% get rates +[log_rate, xx, R] = obj.computeLogRate_host_v2(params); compute_dll = opts.dB || opts.dW; for jj = 1:J @@ -91,13 +31,10 @@ %% compute term-wise log-rate - -LL_info_c = obj.LL_info; - if(compute_dll) - [ll,dll] = LL_info_c.logLikeFun(log_rate, Y, dt); + [ll,dll] = LL_info.logLikeFun(log_rate, Y, dt); else - ll = LL_info_c.logLikeFun(log_rate, Y, dt); + ll = LL_info.logLikeFun(log_rate, Y, dt); end if(~isempty(opts.trial_weights) && compute_dll) @@ -207,19 +144,6 @@ end -function [F] = getF(dd, factor_idx, dim_F, params_group) - dim_R = size(params_group.V,2); - fis = sort(find(factor_idx == dd)); - F = nan(dim_F(dd), dim_R); - - for rr = 1:dim_R - F_r = params_group.T{fis(1)}(:,rr); - for ss = 2:numel(fis) - F_r = kron(params_group.T{fis(ss)}(:,rr), F_r); - end - F(:, rr) = F_r; - end -end function [dFdT] = getdFdT(ss, dd, factor_idx, dim_F, params_group) dim_R = size(params_group.V,2); diff --git a/+kgmlm/@GMLM/plotSamples.m b/+kgmlm/@GMLM/plotSamples.m index 93958f2..9ea8333 100644 --- a/+kgmlm/@GMLM/plotSamples.m +++ b/+kgmlm/@GMLM/plotSamples.m @@ -107,7 +107,7 @@ V_c = V_c ./ sqrt(sum(V_c.^2,1)); end plot(1:sample,V_c') - title(sprintf('%s, rank = %d',paramStruct.Groups(jj).name, size(paramStruct.Groups(jj).T{1},2))); + title(sprintf('%s, rank = %d',paramStruct.Groups(jj).name, size(paramStruct.Groups(jj).T{1},2)), "interpreter", "none"); set(gca,'tickdir','out','box','off'); for ss = 1:min(3, numel(samples.Groups(jj).T)) @@ -118,7 +118,7 @@ end plot(1:sample,T_c'); set(gca,'tickdir','out','box','off'); - title(sprintf('dim = %s',paramStruct.Groups(jj).dim_names(ss))); + title(sprintf('dim = %s',paramStruct.Groups(jj).dim_names(ss)), "interpreter", "none"); end if(numel(samples.Groups(jj).T) == 1 && size(paramStruct.Groups(jj).T{1},2) > 1 && jj >= N_main) @@ -139,7 +139,7 @@ end plot(1:sample,T_c') set(gca,'tickdir','out','box','off'); - title(sprintf('dim = %s, second component', paramStruct.Groups(jj).dim_names(1))); + title(sprintf('dim = %s, second component', paramStruct.Groups(jj).dim_names(1)), "interpreter", "none"); end % if(numel(samples.Groups(jj).T) >= 2 || (numel(samples.Groups(jj).T) == 1 && jj >= N_main)) diff --git a/+kgmlm/@GMLM/runHMC_simple.m b/+kgmlm/@GMLM/runHMC_simple.m index 610bb3d..40d4464 100644 --- a/+kgmlm/@GMLM/runHMC_simple.m +++ b/+kgmlm/@GMLM/runHMC_simple.m @@ -104,7 +104,7 @@ end end -scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.scaleParams); +scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.GMLMstructure.scaleParams); scaled_VT = false(J,1); for jj = 1:J @@ -210,8 +210,8 @@ fprintf("Done.\n") %% initialize HMC state -HMC_state.stepSize.e = HMC_settings.stepSize.e_0; -HMC_state.stepSize.e_bar = HMC_settings.stepSize.e_0; +HMC_state.stepSize.e = HMC_settings.stepSize.e_init; +HMC_state.stepSize.e_bar = HMC_settings.stepSize.e_init; HMC_state.stepSize.x_bar_t = 0; HMC_state.stepSize.x_t = 0; HMC_state.stepSize.H_sum = 0; @@ -221,23 +221,6 @@ resultStruct_empty = obj.getEmptyResultsStruct(optStruct_empty); resultStruct = obj.computeLogPosterior(paramStruct, optStruct); sample_idx = 1; -if(scaled_WB) - - params_0 = obj.GMLMstructure.scaleParams(paramStruct); - - samples.W(:, sample_idx) = params_0.W(:); - samples.B(:,:,sample_idx) = params_0.B(:,:); - - if(saveUnscaled) - samples.W_scaled(:, sample_idx) = paramStruct.W(:); - samples.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); - end -else - samples.W(:, sample_idx) = paramStruct.W(:); - samples.B(:,:,sample_idx) = paramStruct.B(:,:); -end -samples.H(:,1) = paramStruct.H(:); -samples.H_gibbs(:,1) = paramStruct.H_gibbs(:); for jj = 1:J @@ -270,6 +253,23 @@ end metrics.Groups(jj).N(:, 1) = sqrt(metrics.Groups(jj).N(:, 1)); end +if(scaled_WB) + + params_0 = obj.GMLMstructure.scaleParams(paramStruct); + + samples.W(:, sample_idx) = params_0.W(:); + samples.B(:,:,sample_idx) = params_0.B(:,:); + + if(saveUnscaled) + samples.W_scaled(:, sample_idx) = paramStruct.W(:); + samples.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); + end +else + samples.W(:, sample_idx) = paramStruct.W(:); + samples.B(:,:,sample_idx) = paramStruct.B(:,:); +end +samples.H(:,1) = paramStruct.H(:); +samples.H_gibbs(:,1) = paramStruct.H_gibbs(:); samples_block.idx(1) = 1; samples_block.trialLL(1, :, :) = resultStruct.trialLL; @@ -330,6 +330,7 @@ nlpostFunction = @(ww) obj.vectorizedNLPost_func(ww, paramStruct, optStruct, resultStruct); try HMC_state.e_scale = samples.e_scale(sample_idx); + %HMC_state.stepSize.e = 5e-3; [samples.accepted(sample_idx), samples.errors(sample_idx), w_new, samples.log_p_accept(sample_idx), resultStruct] = kgmlm.fittingTools.HMCstep_diag(w_init, M, nlpostFunction, HMC_state); if(samples.accepted(sample_idx)) paramStruct = obj.devectorizeParams(w_new, paramStruct, optStruct); @@ -349,26 +350,6 @@ %% store samples paramStruct2 = paramStruct; - if(scaled_WB) - params_0 = obj.GMLMstructure.scaleParams(paramStruct); - - samples.W(:, sample_idx) = params_0.W(:); - samples.B(:,:,sample_idx) = params_0.B(:,:); - - paramStruct2.W(:) = params_0.W(:); - paramStruct2.B(:) = params_0.B(:); - - if(saveUnscaled) - samples.W_scaled(:, sample_idx) = paramStruct.W(:); - samples.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); - end - else - samples.W(:, sample_idx) = paramStruct.W(:); - samples.B(:,:,sample_idx) = paramStruct.B(:,:); - end - - samples.H(:, sample_idx) = paramStruct.H(:); - samples.H_gibbs(:, sample_idx) = paramStruct.H_gibbs(:); for jj = 1:J samples.Groups(jj).H(:,sample_idx) = paramStruct.Groups(jj).H; @@ -404,6 +385,26 @@ metrics.Groups(jj).N(:, sample_idx) = sqrt(metrics.Groups(jj).N(:, sample_idx)); end + if(scaled_WB) + params_0 = obj.GMLMstructure.scaleParams(paramStruct2); + + samples.W(:, sample_idx) = params_0.W(:); + samples.B(:,:,sample_idx) = params_0.B(:,:); + + paramStruct2.W(:) = params_0.W(:); + paramStruct2.B(:) = params_0.B(:); + + if(saveUnscaled) + samples.W_scaled(:, sample_idx) = paramStruct.W(:); + samples.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); + end + else + samples.W(:, sample_idx) = paramStruct.W(:); + samples.B(:,:,sample_idx) = paramStruct.B(:,:); + end + + samples.H(:, sample_idx) = paramStruct.H(:); + samples.H_gibbs(:, sample_idx) = paramStruct.H_gibbs(:); samples.log_post(sample_idx) = resultStruct.log_post; samples.log_like(sample_idx) = resultStruct.log_likelihood; @@ -423,7 +424,7 @@ %% print any updates - if(sample_idx <= 50 || (sample_idx <= 500 && mod(sample_idx,20) == 0) || mod(sample_idx,50) == 0 || sample_idx == TotalSamples || (HMC_settings.verbose && mod(sample_idx,20) == 0)) + if(sample_idx <= 500 || (sample_idx <= 1000 && mod(sample_idx,20) == 0) || mod(sample_idx,50) == 0 || sample_idx == TotalSamples || (HMC_settings.verbose && mod(sample_idx,20) == 0)) if(sample_idx == TotalSamples) ww = (HMC_settings.nWarmup+1):sample_idx; else @@ -532,6 +533,8 @@ summary.HMC_state = HMC_state; summary.acceptRate = mean(samples.accepted(ss_all)); +summary.HMC_state.M = M; + fprintf("done.\n"); delete(trialLL_file.Filename); % delete the temporary storage file for trial log likelihoods diff --git a/+kgmlm/@GMLM/runHMC_simpleLowerRAM.m b/+kgmlm/@GMLM/runHMC_simpleLowerRAM.m index dbb9a06..4070d6b 100644 --- a/+kgmlm/@GMLM/runHMC_simpleLowerRAM.m +++ b/+kgmlm/@GMLM/runHMC_simpleLowerRAM.m @@ -139,7 +139,7 @@ end end - scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.scaleParams); + scaled_WB = isfield(obj.GMLMstructure, "scaleParams") && ~isempty(obj.GMLMstructure.scaleParams); scaled_VT = false(J,1); for jj = 1:J @@ -153,131 +153,92 @@ samples_block.samplesBlockSize = min(HMC_settings.samplesBlockSize, TotalSamples); samples_block.idx = nan(samples_block.samplesBlockSize, 1); samples_block.trialLL = nan([samples_block.samplesBlockSize DT(1) DT(2)], dataType); + + s_type = zeros(1,1,dataType); + LL_space_bytes = TotalSamples * DT(1) * DT(2) * whos("s_type").bytes; + allocateFile = true; if(exist(HMC_settings.trialLLfile, "file")) if(isfield(HMC_settings, "delete_temp_file")) - continue_opt = HMC_settings.delete_temp_file; + end_opt = HMC_settings.delete_temp_file; else - continue_opt = input(sprintf("Temporary storage file already found (%s)! Overwrite and continue? (y/n)\n ", HMC_settings.trialLLfile), "s"); - continue_opt = startsWith(continue_opt, "y", "IgnoreCase", true); - end - if(continue_opt) - fprintf("Deleting temporary storage file and continuing...\n"); + end_opt = input(sprintf("Temporary storage file already found (%s)! Overwrite and continue? (y/n)\n ", HMC_settings.trialLLfile), "s"); + end_opt = startsWith(end_opt, "y", "IgnoreCase", true); + end + if(end_opt) + fileSize = dir(HMC_settings.trialLLfile).bytes; + + if(fileSize < LL_space_bytes) + fprintf("Deleting temporary storage file and continuing...\n"); + delete(HMC_settings.trialLLfile); + else + fprintf("Overwriting temporary storage file and continuing...\n"); + allocateFile = false; + end else error("Temporary file for storing trial log likelihood samples already exists!\nSpecify another filename or delete if not in use.\n\tfile: %s", HMC_settings.trialLLfile); end end - if(exist(HMC_settings.samplesFile, "file")) - if(isfield(HMC_settings, "delete_samples_file")) - continue_opt = HMC_settings.delete_samples_file; - else - continue_opt = input(sprintf("Samples storage file already found (%s)! Overwrite and continue? (y/n)\n ", HMC_settings.samplesFile), "s"); - continue_opt = startsWith(continue_opt, "y", "IgnoreCase", true); - end - if(continue_opt) - fprintf("Deleting samples storage file and continuing...\n"); - else - error("Temporary file for storing samples already exists!\nSpecify another filename or delete if not in use.\n\tfile: %s", HMC_settings.samplesFile); - end - end %makes space for trialLL without ever making the full matrix in RAM: this is a cludge around the compression that mafile puts in automatically - Z = zeros(TotalSamples,1,dataType); - fprintf("Preallocating HD space to store LLs for each trial (~%.3f gb)...\n", whos("Z").bytes * DT(1) * DT(2) / 1e9); - - obj.temp_storage_file = HMC_settings.trialLLfile; - fileID = fopen(HMC_settings.trialLLfile, "w"); - for ii = 1:DT(1) - for jj = 1:DT(2) - fwrite(fileID, Z, dataType); + if(allocateFile) + Z = zeros(TotalSamples,1,dataType); + fprintf("Preallocating HD space to store LLs for each trial (~%.3f gb)...\n", LL_space_bytes/1e9); + + obj.temp_storage_file = HMC_settings.trialLLfile; + fileID = fopen(HMC_settings.trialLLfile, "w"); + for ii = 1:DT(1) + for jj = 1:DT(2) + fwrite(fileID, Z, dataType); + end end + clear Z; + fclose(fileID); end - clear Z; - fclose(fileID); trialLL_file = memmapfile(HMC_settings.trialLLfile,... "Format",{dataType,[TotalSamples DT(1) DT(2)],"trialLL"}, ... "Writable", true); - samples_file_format = cell(0, 3); - ctr = 1; - totalParams = 0; - - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.W) TotalSamples]; - samples_file_format{ctr, 3} = "W"; totalParams = totalParams + numel(paramStruct.W); - ctr = ctr + 1; - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.B) TotalSamples]; - samples_file_format{ctr, 3} = "B"; totalParams = totalParams + numel(paramStruct.B); - ctr = ctr + 1; - - if(saveUnscaled && scaled_WB) - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.W) TotalSamples]; - samples_file_format{ctr, 3} = "W_scaled"; totalParams = totalParams + numel(paramStruct.W); - ctr = ctr + 1; - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.B) TotalSamples]; - samples_file_format{ctr, 3} = "B_scaled"; totalParams = totalParams + numel(paramStruct.B); - ctr = ctr + 1; - end - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.H) TotalSamples]; - samples_file_format{ctr, 3} = "H"; totalParams = totalParams + numel(paramStruct.H); - ctr = ctr + 1; - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.H_gibbs) TotalSamples]; - samples_file_format{ctr, 3} = "H_gibbs"; totalParams = totalParams + numel(paramStruct.H_gibbs); - ctr = ctr + 1; - - for jj = 1:J - - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.Groups(jj).V) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_V", jj); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).V); - - for ss = 1:S(jj) - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.Groups(jj).T{ss}) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_T_%d", jj, ss); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).T{ss}); - end - - if(saveUnscaled && scaled_VT(jj)) - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.Groups(jj).V) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_V_scaled", jj); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).V); - - for ss = 1:S(jj) - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [size(paramStruct.Groups(jj).T{ss}) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_T_%d_scaled", jj, ss); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).T{ss}); + [samples_file_format, totalParams] = getSampleFileFormat(obj, TotalSamples, dataType_samples, paramStruct, scaled_WB, scaled_VT, saveUnscaled); + + + s_type = zeros(1,1,dataType_samples); + params_space_bytes = TotalSamples * totalParams * whos("s_type").bytes; + allocateFile = true; + if(exist(HMC_settings.samplesFile, "file")) + if(isfield(HMC_settings, "delete_samples_file")) + end_opt = HMC_settings.delete_samples_file; + else + end_opt = input(sprintf("Samples storage file already found (%s)! Overwrite and continue? (y/n)\n ", HMC_settings.samplesFile), "s"); + end_opt = startsWith(end_opt, "y", "IgnoreCase", true); + end + if(end_opt) + fileSize = dir(HMC_settings.samplesFile).bytes; + + if(fileSize < params_space_bytes) + fprintf("Deleting samples storage file and continuing...\n"); + delete(HMC_settings.samplesFile); + else + fprintf("Overwriting samples storage file and continuing...\n"); + allocateFile = false; end + else + error("Temporary file for storing samples already exists!\nSpecify another filename or delete if not in use.\n\tfile: %s", HMC_settings.samplesFile); end - - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.Groups(jj).H) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_H", jj); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).H); - samples_file_format{ctr, 1} = dataType_samples; - samples_file_format{ctr, 2} = [numel(paramStruct.Groups(jj).H_gibbs) TotalSamples]; - samples_file_format{ctr, 3} = sprintf("G%d_H_gibbs", jj); - ctr = ctr + 1; totalParams = totalParams + numel(paramStruct.Groups(jj).H_gibbs); end - Z = zeros(TotalSamples, 1,dataType_samples); - fprintf("Preallocating HD space to store samples (~%.3f gb)...\n", whos("Z").bytes * totalParams / 1e9); - fileID = fopen(HMC_settings.samplesFile, "w"); - for ii = 1:totalParams - fwrite(fileID, Z, dataType_samples); + if(allocateFile) + Z = zeros(TotalSamples, 1,dataType_samples); + fprintf("Preallocating HD space to store samples (~%.3f gb)...\n", params_space_bytes / 1e9); + fileID = fopen(HMC_settings.samplesFile, "w"); + for ii = 1:totalParams + fwrite(fileID, Z, dataType_samples); + end + clear Z; + fclose(fileID); end - clear Z; - fclose(fileID); samples_file = memmapfile(HMC_settings.samplesFile,... "Format",samples_file_format, ... @@ -285,8 +246,8 @@ fprintf("Done.\n") %% initialize HMC state - HMC_state.stepSize.e = HMC_settings.stepSize.e_0; - HMC_state.stepSize.e_bar = HMC_settings.stepSize.e_0; + HMC_state.stepSize.e = HMC_settings.stepSize.e_init; + HMC_state.stepSize.e_bar = HMC_settings.stepSize.e_init; HMC_state.stepSize.x_bar_t = 0; HMC_state.stepSize.x_t = 0; HMC_state.stepSize.H_sum = 0; @@ -295,66 +256,7 @@ %% adds the initial point to the samples resultStruct = obj.computeLogPosterior(paramStruct, optStruct); sample_idx = 1; - paramStruct2 = paramStruct; - if(scaled_WB) - params_0 = obj.GMLMstructure.scaleParams(paramStruct); - - samples_file.Data.W(:, sample_idx) = params_0.W(:); - if(save_H.B) - samples_file.Data.B(:,:,sample_idx) = params_0.B(:,:); - end - paramStruct2.W(:) = params_0.W(:); - paramStruct2.B(:) = params_0.B(:); - - if(saveUnscaled) - samples_file.Data.W_scaled(:, sample_idx) = paramStruct.W(:); - if(save_H.B) - samples_file.Data.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); - end - end - else - samples_file.Data.W(:, sample_idx) = paramStruct.W(:); - if(save_H.B) - samples_file.Data.B(:,:,sample_idx) = paramStruct.B(:,:); - end - end - if(save_H.H) - samples_file.Data.H(:,1) = paramStruct.H(:); - end - if(save_H.H_gibbs) - samples_file.Data.H_gibbs(:,1) = paramStruct.H_gibbs(:); - end - - for jj = 1:J - if(save_H.Groups(jj).H) - samples_file.Data.(sprintf("G%d_H", jj))(:,1) = paramStruct.Groups(jj).H; - end - if(save_H.Groups(jj).H_gibbs) - samples_file.Data.(sprintf("G%d_H_gibbs", jj))(:,1) = paramStruct.Groups(jj).H_gibbs; - end - if(scaled_VT(jj)) - params_0 = obj.GMLMstructure.Groups(jj).scaleParams(paramStruct.Groups(jj)); - - samples_file.Data.(sprintf("G%d_V", jj))(:,:,sample_idx) = params_0.V; - paramStruct2.Groups(jj).V(:) = params_0.V(:); - - if(saveUnscaled) - samples_file.Data.(sprintf("G%d_V_scaled", jj))(:,:,sample_idx) = paramStruct.Groups(jj).V; - end - for ss = 1:S(jj) - samples_file.Data.(sprintf("G%d_T_%d", jj, ss))(:,:,sample_idx) = params_0.T{ss}; - paramStruct2.Groups(jj).T{ss}(:) = params_0.T{ss}(:); - if(saveUnscaled) - samples_file.Data.(sprintf("G%d_T_%d_scaled", jj, ss))(:,:,sample_idx) = paramStruct.Groups(jj).T{ss}; - end - end - else - samples_file.Data.(sprintf("G%d_V", jj))(:,:,sample_idx) = paramStruct.Groups(jj).V; - for ss = 1:S(jj) - samples_file.Data.(sprintf("G%d_T_%d", jj, ss))(:,:,sample_idx) = paramStruct.Groups(jj).T{ss}; - end - end - end + paramStruct2 = obj.saveSampleToFile(samples_file, paramStruct, sample_idx, scaled_WB, scaled_VT, save_H, saveUnscaled); samples_block.idx(1) = 1; samples_block.trialLL(1, :, :) = resultStruct.trialLL; @@ -400,10 +302,13 @@ end %% run sampler +err_ctr = 0; +lastSavePoint = nan; for sample_idx = start_idx:TotalSamples %% save partial progress if requested if(isfield(HMC_settings, "savePartialProgressN") && HMC_settings.savePartialProgressN > 0 && mod(sample_idx, HMC_settings.savePartialProgressN) == 0 && sample_idx > start_idx) obj.destroy_temp_storage_file = false; + lastSavePoint = sample_idx; randomNumberState = rng(); save(HMC_settings.savePartialProgressFile, "-v7.3", "randomNumberState", "HMC_settings", "paramStruct2", "HMC_state", "paramStruct", "resultStruct", "samples_file_format", "samples_block", "samples", "scaled_WB", "scaled_VT", "sample_idx", "modelInfo"); @@ -447,76 +352,36 @@ % adjust step size: during warmup if(~samples.errors(sample_idx)) lpa = samples.log_p_accept(sample_idx); + err_ctr = 0; else lpa = nan; + err_ctr = err_ctr + 1; end HMC_state = kgmlm.fittingTools.adjustHMCstepSize(sample_idx, HMC_state, HMC_settings.stepSize, lpa); samples.e(:,sample_idx) = [HMC_state.stepSize.e; HMC_state.stepSize.e_bar]; - - - %% store samples - paramStruct2 = paramStruct; - if(scaled_WB) - params_0 = obj.GMLMstructure.scaleParams(paramStruct); - samples_file.Data.W(:, sample_idx) = params_0.W(:); - if(save_H.B) - samples_file.Data.B(:,:,sample_idx) = params_0.B(:,:); - end - paramStruct2.W(:) = params_0.W(:); - paramStruct2.B(:) = params_0.B(:); - - if(saveUnscaled) - samples_file.Data.W_scaled(:, sample_idx) = paramStruct.W(:); - if(save_H.B) - samples_file.Data.B_scaled(:,:,sample_idx) = paramStruct.B(:,:); - end - end - else - samples_file.Data.W(:, sample_idx) = paramStruct.W(:); - if(save_H.B) - samples_file.Data.B(:,:,sample_idx) = paramStruct.B(:,:); - end - end - - if(save_H.H) - samples_file.Data.H(:, sample_idx) = paramStruct.H(:); - end - if(save_H.H_gibbs) - samples_file.Data.H_gibbs(:, sample_idx) = paramStruct.H_gibbs(:); - end - - for jj = 1:J - if(save_H.Groups(jj).H) - samples_file.Data.(sprintf("G%d_H", jj))(:,sample_idx) = paramStruct.Groups(jj).H; - end - if(save_H.Groups(jj).H_gibbs) - samples_file.Data.(sprintf("G%d_H_gibbs", jj))(:,sample_idx) = paramStruct.Groups(jj).H_gibbs; - end - - if(scaled_VT(jj)) - params_0 = obj.GMLMstructure.Groups(jj).scaleParams(paramStruct.Groups(jj)); - - samples_file.Data.(sprintf("G%d_V", jj))(:,:,sample_idx) = params_0.V; - paramStruct2.Groups(jj).V(:) = params_0.V(:); - - if(saveUnscaled) - samples_file.Data.(sprintf("G%d_V_scaled", jj))(:,:,sample_idx) = paramStruct.Groups(jj).V; - end - for ss = 1:S(jj) - samples_file.Data.(sprintf("G%d_T_%d", jj, ss))(:,:,sample_idx) = params_0.T{ss}; - paramStruct2.Groups(jj).T{ss}(:) = params_0.T{ss}(:); - if(saveUnscaled) - samples_file.Data.(sprintf("G%d_T_%d_scaled", jj, ss))(:,:,sample_idx) = paramStruct.Groups(jj).T{ss}; - end + %% check for too many errors in a row: stop running to not waste GPU power + if(isfield(HMC_settings, "max_errors") && err_ctr >= HMC_settings.max_errors) + end_opt = input(sprintf("Many HMC errors or diveragences have occured in a row (%d)! End now? (y/n)\n ", err_ctr), "s"); + end_opt = startsWith(end_opt, "y", "IgnoreCase", true); + if(end_opt) + fprintf("Ending HMC run at sample %d.\n", sample_idx); + if(~isnan(lastSavePoint)) + fprintf("Progress last saved at sample %d.\n", lastSavePoint); + else + fprintf("No progresss saved!\n"); end + error("HMC encountered errors: M or step size not configured well."); else - samples.Groups(jj).V(:,:,sample_idx) = paramStruct.Groups(jj).V; - for ss = 1:S(jj) - samples.Groups(jj).T{ss}(:,:,sample_idx) = paramStruct.Groups(jj).T{ss}; - end + fprintf("Continuing anyway: will stop if errors persist.\n"); + err_ctr = 0; end end + + + %% store samples + + paramStruct2 = obj.saveSampleToFile(samples_file, paramStruct, sample_idx, scaled_WB, scaled_VT, save_H, saveUnscaled); samples.log_post(sample_idx) = resultStruct.log_post; samples.log_like(sample_idx) = resultStruct.log_likelihood; @@ -535,7 +400,7 @@ end %% print any updates - if(sample_idx <= 50 || (sample_idx <= 500 && mod(sample_idx,20) == 0) || mod(sample_idx,50) == 0 || sample_idx == TotalSamples || (HMC_settings.verbose && mod(sample_idx,20) == 0)) + if(sample_idx <= 500 || (sample_idx <= 1000 && mod(sample_idx,20) == 0) || mod(sample_idx,50) == 0 || sample_idx == TotalSamples || (HMC_settings.verbose && mod(sample_idx,20) == 0)) if(sample_idx == TotalSamples) ww = (HMC_settings.nWarmup+1):sample_idx; else @@ -578,6 +443,7 @@ clear vectorizedSamples; end end + end %% finish sampler @@ -645,6 +511,8 @@ summary.HMC_state = HMC_state; summary.acceptRate = mean(samples.accepted(ss_all)); +summary.HMC_state.M = M; + fprintf("done.\n"); delete(trialLL_file.Filename); % delete the temporary storage file for trial log likelihoods diff --git a/+kgmlm/@GMLM/setupHMCparams.m b/+kgmlm/@GMLM/setupHMCparams.m index 8ce8869..db3667b 100644 --- a/+kgmlm/@GMLM/setupHMCparams.m +++ b/+kgmlm/@GMLM/setupHMCparams.m @@ -31,9 +31,10 @@ mkdir(TempFolder); end - +HMC_settings.max_errors = 100; % max consecutive HMC errors or divergent trajectories before pausing run ("runHMC_simpleLowerRam" only) %step size paramters %for the dual-averging updates +HMC_settings.stepSize.e_init = 1e-4; HMC_settings.stepSize.e_0 = 1e-3; HMC_settings.stepSize.delta = [0.8; 0.9]; HMC_settings.stepSize.gamma = 0.05; @@ -52,7 +53,7 @@ if(~debugSettings) - nWarmup_min = 15e3; + nWarmup_min = 1e3; if(nWarmup < nWarmup_min) warning('Default HMC warmup schedule espected at least %d samples. Schedule will need to be modified.', nWarmup_min); end @@ -60,7 +61,7 @@ HMC_settings.M_est.first_sample = [25001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample HMC_settings.M_est.samples = [55000]; - HMC_settings.stepSize.schedule = [2 10000; + HMC_settings.stepSize.schedule = [11 10000; 10001 25000; 55001 60000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) @@ -70,7 +71,7 @@ HMC_settings.M_est.first_sample = [25001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample HMC_settings.M_est.samples = [45000]; - HMC_settings.stepSize.schedule = [2 10000; + HMC_settings.stepSize.schedule = [11 10000; 10001 25000; 45001 50000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) @@ -81,7 +82,7 @@ HMC_settings.M_est.first_sample = [15001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample HMC_settings.M_est.samples = [35000]; - HMC_settings.stepSize.schedule = [2 10000; + HMC_settings.stepSize.schedule = [11 10000; 10001 15000; 35001 40000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) @@ -91,7 +92,7 @@ HMC_settings.M_est.first_sample = [20001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample HMC_settings.M_est.samples = [32000]; - HMC_settings.stepSize.schedule = [2 10000; + HMC_settings.stepSize.schedule = [11 10000; 10001 20000; 32001 3500]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) @@ -112,7 +113,7 @@ HMC_settings.M_est.first_sample = []; HMC_settings.M_est.samples = []; - HMC_settings.stepSize.schedule = [2 15000; + HMC_settings.stepSize.schedule = [11 15000; 15001 25000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) HMC_settings.stepSize.scaleRanges = [25001 nSamples]; @@ -120,7 +121,7 @@ HMC_settings.M_est.first_sample = [10001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample HMC_settings.M_est.samples = [22000]; - HMC_settings.stepSize.schedule = [2 4000; + HMC_settings.stepSize.schedule = [11 4000; 4001 7000; 22001 25000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) @@ -131,20 +132,43 @@ HMC_settings.M_est.first_sample = []; HMC_settings.M_est.samples = []; - HMC_settings.stepSize.schedule = [2 10000; - 10001 18000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) + HMC_settings.stepSize.schedule = [2 7500; + 7501 15000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) + + HMC_settings.stepSize.scaleRanges = [15001 nSamples]; + elseif(nWarmup >= 15e3) + + HMC_settings.M_est.first_sample = []; + HMC_settings.M_est.samples = []; + + HMC_settings.stepSize.schedule = [11 7500; + 7501 14000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) + + HMC_settings.stepSize.scaleRanges = [14001 nSamples]; + elseif(nWarmup >= 10e3) + + HMC_settings.M_est.first_sample = []; + HMC_settings.M_est.samples = []; + + HMC_settings.stepSize.schedule = [11 4000; + 6001 9000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) + + HMC_settings.stepSize.scaleRanges = [4001 6000; + 9001 nSamples]; + elseif(nWarmup >= 4e3) + HMC_settings.M_est.first_sample = [ ]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample + HMC_settings.M_est.samples = []; + + HMC_settings.stepSize.schedule = [11 (nWarmup - 2e3)]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) - HMC_settings.stepSize.scaleRanges = [18001 nSamples]; + HMC_settings.stepSize.scaleRanges = [(nWarmup - 2e3 + 1) nSamples]; else - HMC_settings.M_est.first_sample = [ 8001]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample - HMC_settings.M_est.samples = [13000]; + HMC_settings.M_est.first_sample = [ ]; %when to estimate cov matrix. At sample=samples(ii), will use first_sample(samples(ii)):sample + HMC_settings.M_est.samples = []; - HMC_settings.stepSize.schedule = [2 5000; - 5001 8000; - 13001 15000]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) + HMC_settings.stepSize.schedule = [2 nWarmup]; %each row gives a range of trials to estimate step size (restarts estimation at each sample = schedule(ii,1)) - HMC_settings.stepSize.scaleRanges = [8001 13000; - nWarmup + [1 nSamples]]; + HMC_settings.stepSize.scaleRanges = [(nWarmup + 1) nSamples]; end @@ -199,5 +223,5 @@ if(isempty(HMC_settings.M_est.samples)) - warning("HMC settings will only use the initial mass matrix - no estimation of the posterior covariance.\n"); + warning("HMC settings will only use the initial mass matrix - no estimation of the posterior covariance."); end diff --git a/example/+DMC/+utils/gigrnd.m b/example/+DMC/+utils/gigrnd.m index 5ca3e8a..f9d2bcc 100644 --- a/example/+DMC/+utils/gigrnd.m +++ b/example/+DMC/+utils/gigrnd.m @@ -62,6 +62,7 @@ mode = omega / (sqrt((1-lambda)^2 + omega^2) + 1-lambda); end end % gig_mode + function X = ROU_shift(lambda, lambda_old, omega, alpha) %% Ratio-of-uniforms with shift by mode % shortcuts diff --git a/src/kcBase.cu b/src/kcBase.cu index b66417a..2c2e9f4 100644 --- a/src/kcBase.cu +++ b/src/kcBase.cu @@ -573,7 +573,14 @@ cublasStatus_t GPUData::GEMM(GPUData * C, const GPUData algo = CUBLAS_GEMM_DEFAULT; } - #if __CUDA_ARCH__ >= 700 + #if __CUDA_ARCH__ >= 800 + if(sizeof(FPTYPE) <= 4) { + algo = CUBLAS_GEMM_DEFAULT_TENSOR_OP; + } + else { + algo = CUBLAS_GEMM_DEFAULT; + } + #elif __CUDA_ARCH__ >= 700 if(sizeof(FPTYPE) <= 4) { algo = CUBLAS_GEMM_ALGO0_TENSOR_OP; if((cols_op_A <= MAX_DEFAULT && rows_op_A <= MAX_DEFAULT && cols_op_B <= MAX_DEFAULT && rows_op_B <= MAX_DEFAULT) || cols_op_B > MAX_COLS_ALGO0) { @@ -621,8 +628,28 @@ cublasStatus_t GPUData::GEMM(GPUData * C, const GPUData else if(depth == 1 && (algo != CUBLAS_GEMM_DEFAULT_TENSOR_OP && algo != CUBLAS_GEMM_DEFAULT) && cols_op_B > MAX_COLS) { if(multType != NULL) {multType[0] = 3;}; //{multType[0] = algo;}; //for largish size of C, call GEMM (single - run below) + + size_t stridedRuns = cols_op_B / MAX_COLS; + if(stridedRuns > 1) { + ce = cublasGEMMEXStridedBatched(handle, + op_A, + op_B, + rows_op_A, MAX_COLS, cols_op_A, + &alpha, + getData_gpu(), getLD_gpu(), + 0, + B->getData_gpu(), B->getLD_gpu(), + B->getLD_gpu() * MAX_COLS, + &beta, + C->getData_gpu(), C->getLD_gpu(), + C->getLD_gpu() * MAX_COLS, + stridedRuns, algo); + } + else { + stridedRuns = 0; + } - for(size_t cc = 0; cc < cols_op_B && ce == CUBLAS_STATUS_SUCCESS; cc += MAX_COLS) { + for(size_t cc = stridedRuns * MAX_COLS; cc < cols_op_B && ce == CUBLAS_STATUS_SUCCESS; cc += MAX_COLS) { size_t cols_op_B_c = (cols_op_B - cc > MAX_COLS) ? MAX_COLS : (cols_op_B - cc); ce = cublasGEMMEX(handle, op_A, diff --git a/src/kcBase.hpp b/src/kcBase.hpp index 88146bf..b89ddc5 100644 --- a/src/kcBase.hpp +++ b/src/kcBase.hpp @@ -428,6 +428,17 @@ class GPUGL_base { inline bool switchToDevice(const bool printOnly = false) { return checkCudaErrors(cudaSetDevice(dev), "error switching to device", printOnly); } + bool checkDeviceComputeCapability(const bool printOnly = false) { + cudaDeviceProp deviceProp; + checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev), "Error getting CUDA device properties (checkDeviceComputeCapability)", printOnly); + if(610 <= deviceProp.major*100 + deviceProp.minor*10) { + return true; + } + else { + checkCudaErrors(cudaErrorInvalidDevice, "CUDA compute capability error (requires 6.1 or greater)", printOnly); + return false; + } + } public: diff --git a/src/kcDefs.hpp b/src/kcDefs.hpp index 3381210..6eeeb64 100644 --- a/src/kcDefs.hpp +++ b/src/kcDefs.hpp @@ -28,9 +28,9 @@ namespace kCUDA { inline dim3 make_dim3(size_t x, size_t y = 1, size_t z = 1) { dim3 dims; - dims.x = x; - dims.y = y; - dims.z = z; + dims.x = static_cast(x); + dims.y = static_cast(y); + dims.z = static_cast(z); return dims; } diff --git a/src/kcGLM.cu b/src/kcGLM.cu index 1cba5b3..c11c1a7 100644 --- a/src/kcGLM.cu +++ b/src/kcGLM.cu @@ -40,7 +40,7 @@ GPUGLM::GPUGLM(const GPUGLM_structure_args * GLMstructure, cons //build each block dim_K_ = GLMstructure->dim_K; gpu_blocks.resize(blocks.size()); - for(int bb = 0; bb < blocks.size(); bb++) { + for(unsigned int bb = 0; bb < blocks.size(); bb++) { GPUGLM_computeBlock * block = new GPUGLM_computeBlock(GLMstructure, blocks[bb], max_trials, msg); gpu_blocks[bb] = block; } @@ -64,11 +64,11 @@ void GPUGLM::computeLogLikelihood(const GPUGLM_params * params, } //call bits of LL computation - for(int bb = 0; bb < gpu_blocks.size(); bb++) { + for(unsigned int bb = 0; bb < gpu_blocks.size(); bb++) { gpu_blocks[bb]->syncStreams(); gpu_blocks[bb]->computeLogLike(opts, isSparse[bb]); } - for(int bb = 0; bb < gpu_blocks.size(); bb++) { + for(unsigned int bb = 0; bb < gpu_blocks.size(); bb++) { gpu_blocks[bb]->syncStreams(); gpu_blocks[bb]->computeDerivatives(opts, isSparse[bb]); } diff --git a/src/kcGLM_computeBlock.cu b/src/kcGLM_computeBlock.cu index dcc6969..ef2dbf0 100644 --- a/src/kcGLM_computeBlock.cu +++ b/src/kcGLM_computeBlock.cu @@ -25,6 +25,7 @@ GPUGLM_computeBlock::GPUGLM_computeBlock(const GPUGLM_structure_argsdev_num; switchToDevice(); + this->checkDeviceComputeCapability(); size_t dim_M = block->trials.size(); if(dim_M == 0) { diff --git a/src/kcGLM_dataStructures.cu b/src/kcGLM_dataStructures.cu index 9be3eb6..4d4e2ef 100644 --- a/src/kcGLM_dataStructures.cu +++ b/src/kcGLM_dataStructures.cu @@ -37,7 +37,7 @@ GPUGLM_parameters_GPU::GPUGLM_parameters_GPU(const GPUGLM_structure_args if(GLMstructure->logLikeParams.size() > 0) { logLikeParams = new GPUData(ce, GPUData_HOST_STANDARD, stream, GLMstructure->logLikeParams.size()); checkCudaErrors(ce, "GPUGLM_parameters_GPU errors: could not allocate space for logLikeParams!" ); - for(int ii = 0; ii < GLMstructure->logLikeParams.size(); ii++) { + for(unsigned int ii = 0; ii < GLMstructure->logLikeParams.size(); ii++) { (*logLikeParams)[ii] = GLMstructure->logLikeParams[ii]; } ce = logLikeParams->copyHostToGPU(stream); @@ -97,7 +97,7 @@ __global__ void kernel_ParamsSparseRunSetup_GLM(GPUData_kernel rid unsigned int mm = trial_included[tr]; unsigned int start_all = ridx_t_all[mm]; unsigned int start_sp = ridx_st_sall[tr]; - for(int nn = 0; nn < dim_N[mm]; nn++) { + for(unsigned int nn = 0; nn < dim_N[mm]; nn++) { ridx_sa_all[nn + start_sp] = start_all + nn; } } @@ -291,7 +291,7 @@ void GPUGLM_results_GPU::addToHost(const GPUGLM_parameters_GPU * //adds local results to dest if(opts->compute_trialLL) { - for(int mm = 0; mm < max_trials(); mm++) { + for(unsigned int mm = 0; mm < max_trials(); mm++) { if(dataset->isInDataset_trial[mm] && (opts->trial_weights.empty() || opts->trial_weights[mm] != 0)) { (*(results_dest->trialLL))[mm] += (*trialLL)[mm]; } @@ -299,13 +299,13 @@ void GPUGLM_results_GPU::addToHost(const GPUGLM_parameters_GPU * } if(opts->compute_dK) { - for(int kk = 0; kk < dim_K(); kk++) { + for(unsigned int kk = 0; kk < dim_K(); kk++) { (*(results_dest->dK))[kk] += (*dK)[kk]; } } if(opts->compute_d2K) { - for(int kk = 0; kk < dim_K(); kk++) { - for(int bb = kk; bb < dim_K(); bb++) { + for(unsigned int kk = 0; kk < dim_K(); kk++) { + for(unsigned int bb = kk; bb < dim_K(); bb++) { (*(results_dest->d2K))(kk, bb) += (*d2K)(kk, bb); //for symmetric matrices if(bb != kk) { @@ -360,7 +360,7 @@ GPUGLM_dataset_GPU::GPUGLM_dataset_GPU(const GPUGLM_structure_args::GPUGLM_dataset_GPU(const GPUGLM_structure_argslogLikeSettings == ll_poissExp || GLMstructure->logLikeSettings == ll_poissSoftRec) { - for(int nn = 0; nn < (*dim_N)[mm]; nn++) { + for(unsigned int nn = 0; nn < (*dim_N)[mm]; nn++) { FPTYPE Y_c = (*(block->trials[mm]->Y))[nn]; nc += (Y_c >= 0) ? -lgamma(floor(Y_c) + 1.0) : 0; } @@ -398,8 +398,8 @@ GPUGLM_dataset_GPU::GPUGLM_dataset_GPU(const GPUGLM_structure_args::GPUGLM_dataset_GPU(const GPUGLM_structure_args= 0; +} + template GPUGMLM::GPUGMLM(const GPUGMLM_structure_args * GMLMstructure, const std::vector *> blocks, std::shared_ptr msg_) : isSimultaneousPopulation_(GMLMstructure->isSimultaneousPopulation) { msg = msg_; @@ -49,7 +74,7 @@ GPUGMLM::GPUGMLM(const GPUGMLM_structure_args * GMLMstructure, //build each block gpu_blocks.resize(blocks.size()); - for(int bb = 0; bb < blocks.size(); bb++) { + for(unsigned int bb = 0; bb < blocks.size(); bb++) { if(isSimultaneousPopulation()) { gpu_blocks[bb] = new GPUGMLMPop_computeBlock(GMLMstructure, blocks[bb], max_trials, msg); } @@ -80,13 +105,13 @@ void GPUGMLM::computeLogLikelihood_async(std::shared_ptrcomputeRateParts(opts.get(), isSparse[bb]); } - for(int bb = 0; bb < gpu_blocks.size(); bb++) { + for(unsigned int bb = 0; bb < gpu_blocks.size(); bb++) { gpu_blocks[bb]->computeLogLike(opts.get(), isSparse[bb]); } - for(int bb = 0; bb < gpu_blocks.size(); bb++) { + for(unsigned int bb = 0; bb < gpu_blocks.size(); bb++) { gpu_blocks[bb]->computeDerivatives(opts.get(), isSparse[bb]); } diff --git a/src/kcGMLM.hpp b/src/kcGMLM.hpp index 9d2e8c2..4f00aa2 100644 --- a/src/kcGMLM.hpp +++ b/src/kcGMLM.hpp @@ -26,6 +26,8 @@ namespace kCUDA { +int getValidGPU(); +bool gpuAvailable(); /************************************************************************************************************************************************************** *************************************************************************************************************************************************************** diff --git a/src/kcGMLMPop_computeBlock.cu b/src/kcGMLMPop_computeBlock.cu index 64cf23d..8da76f4 100644 --- a/src/kcGMLMPop_computeBlock.cu +++ b/src/kcGMLMPop_computeBlock.cu @@ -25,6 +25,7 @@ GPUGMLMPop_computeBlock::GPUGMLMPop_computeBlock(const GPUGMLM_structure this->msg = msg_; this->dev = block->dev_num; this->switchToDevice(); + this->checkDeviceComputeCapability(); dim_J = GMLMPopstructure->Groups.size(); size_t dim_M = block->trials.size(); @@ -36,7 +37,7 @@ GPUGMLMPop_computeBlock::GPUGMLMPop_computeBlock(const GPUGMLM_structure //setup the streams this->checkCudaErrors(cudaStreamCreate(&(stream)), "GPUGMLMPop_computeBlock errors: failed initializing stream!"); stream_Groups.resize(dim_J); - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cudaStreamCreate(&(stream_Groups[jj])), "GPUGMLMPop_computeBlock errors: failed initializing group streams!"); } @@ -62,7 +63,7 @@ GPUGMLMPop_computeBlock::GPUGMLMPop_computeBlock(const GPUGMLM_structure cublasHandle_Groups.resize(dim_J); cublasWorkspaces.assign(dim_J, NULL); cublasWorkspaces_size.assign(dim_J, cublasWorkspace_size); - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cublasCreate(&(cublasHandle_Groups[jj])), "GPUGMLMPop_computeBlock errors: CUBLAS groups initialization failed."); this->checkCudaErrors(cublasSetMathMode(cublasHandle_Groups[jj], mathMode), "GPUGMLMPop_computeBlock errors: set cublas group math mode failed."); this->checkCudaErrors(cublasSetPointerMode(cublasHandle_Groups[jj], CUBLAS_POINTER_MODE_HOST), "GPUGMLMPop_computeBlock errors: set cublas groups pointer mode failed."); @@ -76,7 +77,7 @@ GPUGMLMPop_computeBlock::GPUGMLMPop_computeBlock(const GPUGMLM_structure //setup cusparse handle cusparseHandle_Groups.resize(dim_J); - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cusparseCreate( &(cusparseHandle_Groups[jj])), "GPUGMLMPop_computeBlock errors: cusparse groups initialization failed."); this->checkCudaErrors(cusparseSetPointerMode(cusparseHandle_Groups[jj], CUSPARSE_POINTER_MODE_HOST), "GPUGMLMPop_computeBlock errors: set cusparse groups pointer mode failed."); this->checkCudaErrors(cusparseSetStream( cusparseHandle_Groups[jj], stream_Groups[jj]), "GPUGMLMPop_computeBlock errors: set cusparse groups stream failed."); @@ -128,7 +129,7 @@ template bool GPUGMLMPop_computeBlock::loadParams(const GPUGMLM_params * params_host, const GPUGMLM_computeOptions * opts) { this->switchToDevice(); params->copyToGPU(params_host, dataset, stream, stream_Groups, opts); - for(int jj = 0; jj < params->dim_J(); jj++) { + for(unsigned int jj = 0; jj < params->dim_J(); jj++) { this->checkCudaErrors(results->set_dim_R(jj, params->dim_R(jj), stream), "GPUGMLMPop_computeBlock::loadParams errors: could not set results dim_R"); } bool isSparseRun = dataset->isSparseRun(params); @@ -136,7 +137,7 @@ bool GPUGMLMPop_computeBlock::loadParams(const GPUGMLM_params * results_set = true; //for each group, multiply coefficients by X*T -> XT - for(int jj = 0; jj < dim_J && jj < dataset->dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J && jj < dataset->dim_J(); jj++) { dataset->Groups[jj]->multiplyCoefficients(isSparseRun, opts->update_weights, params->Groups[jj], stream_Groups[jj], cublasHandle_Groups[jj], params->paramsLoaded_event); } } @@ -154,7 +155,7 @@ void GPUGMLMPop_computeBlock::computeRateParts(const GPUGMLM_computeOpti this->switchToDevice(); //for each group - for(int jj = 0; jj < dataset->dim_J(); jj++ ) { + for(unsigned int jj = 0; jj < dataset->dim_J(); jj++ ) { dataset->Groups[jj]->getGroupRate(isSparseRun, params->Groups[jj], opts->Groups[jj], stream_Groups[jj], cublasHandle_Groups[jj]); } } @@ -566,7 +567,7 @@ void GPUGMLMPop_computeBlock::computeDerivatives(const GPUGMLM_computeOp // or kernel to sum up dLL->dW and GEMV for dB? //for each Group - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { dataset->Groups[jj]->computeDerivatives(results->Groups[jj], isSparseRun, opts->update_weights, params->Groups[jj], opts->Groups[jj], stream_Groups[jj], cublasHandle_Groups[jj], cusparseHandle_Groups[jj], LL_event); } @@ -642,7 +643,7 @@ GPUGMLMPop_dataset_GPU::GPUGMLMPop_dataset_GPU(const GPUGMLM_structure_a this->checkCudaErrors(ce, "GPUGMLMPop_dataset_GPU errors: could not allocate normalizingConstants_trial on device!"); size_t X_lin_depth; - for(int mm = 0; mm < dim_M(); mm++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { if(mm == 0) { X_lin_depth = block->trials[mm]->X_lin->getSize(2); } @@ -674,9 +675,9 @@ GPUGMLMPop_dataset_GPU::GPUGMLMPop_dataset_GPU(const GPUGMLM_structure_a isInDataset_trial[block->trials[mm]->trial_idx] = true; - for(int pp = 0; pp < GMLMPopstructure->dim_P; pp++) { + for(unsigned int pp = 0; pp < GMLMPopstructure->dim_P; pp++) { FPTYPE nc = 0; // normalizing constant - for(int nn = 0; nn < (*dim_N)[mm]; nn++) { + for(unsigned int nn = 0; nn < (*dim_N)[mm]; nn++) { if(GMLMPopstructure->logLikeSettings == ll_poissExp || GMLMPopstructure->logLikeSettings == ll_poissSoftRec) { FPTYPE Y_c = (*(block->trials[mm]->Y))(nn, pp); nc += (Y_c >= 0) ? -lgamma(floor(Y_c) + 1.0) : 0; @@ -689,8 +690,8 @@ GPUGMLMPop_dataset_GPU::GPUGMLMPop_dataset_GPU(const GPUGMLM_structure_a this->checkCudaErrors(ce, "GPUGMLMPop_dataset_GPU errors: could not allocate id_a_trialM on device!"); size_t N_total_ctr = 0; - for(int mm = 0; mm < dim_M(); mm++) { - for(int nn = 0; nn < (*dim_N)[mm]; nn++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { + for(unsigned int nn = 0; nn < (*dim_N)[mm]; nn++) { (*id_a_trialM)[N_total_ctr + nn] = mm; } N_total_ctr += (*dim_N)[mm]; @@ -706,7 +707,7 @@ GPUGMLMPop_dataset_GPU::GPUGMLMPop_dataset_GPU(const GPUGMLM_structure_a this->checkCudaErrors(ce, "GPUGMLMPop_dataset_GPU errors: could not allocate X_lin on device!"); //copy each trial to GPU - for(int mm = 0; mm < dim_M(); mm++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { // spike counts cudaPos copyOffset = make_cudaPos((*ridx_t_all)[mm], 0, 0); this->checkCudaErrors(Y->copyTo(stream, block->trials[mm]->Y, true, copyOffset), "GPUGMLMPop_dataset_GPU errors: could not copy Y to device!"); @@ -749,7 +750,7 @@ GPUGMLMPop_dataset_GPU::GPUGMLMPop_dataset_GPU(const GPUGMLM_structure_a this->checkCudaErrors(ce, "GPUGMLMPop_dataset_GPU errors: could not allocate dW_trial on device!"); //setup the groups - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj] = new GPUGMLMPop_dataset_Group_GPU(jj, GMLMPopstructure->Groups[jj], block->trials, this, stream, cusparseHandle_Groups[jj]); } } @@ -779,7 +780,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro std::vector dim_F_c; dim_F_c.assign(dim_D(), 1); size_t max_dim_F_dim_P = parent->dim_P(); - for(int ss = 0; ss < GMLMPopGroupStructure->dim_S(); ss++) { + for(unsigned int ss = 0; ss < GMLMPopGroupStructure->dim_S(); ss++) { dim_T_total *= GMLMPopGroupStructure->dim_T[ss]; dim_F_c[GMLMPopGroupStructure->factor_idx[ss]] *= GMLMPopGroupStructure->dim_T[ss]; @@ -802,7 +803,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro //allocated space for regressors and copy to GPU size_t max_dim_X_shared = parent->dim_N_total(); - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { (*isShared)[dd] = !(GMLMPopGroupStructure->X_shared[dd]->empty()); if((*isShared)[dd]) { @@ -824,7 +825,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro this->checkCudaErrors(X[dd]->copyTo(stream, GMLMPopGroupStructure->X_shared[dd], false), "GPUGMLMPop_dataset_Group_GPU errors: could not copy X[dd] shared to device!"); // copy each trial's data to GPU - for(int mm = 0; mm < trials.size(); mm++) { + for(unsigned int mm = 0; mm < trials.size(); mm++) { cudaPos copyOffset = make_cudaPos((*(parent->ridx_t_all))[mm], 0, 0); //get row for current trial this->checkCudaErrors(iX[dd]->copyTo(stream, trials[mm]->Groups[groupNum]->iX[dd], true, copyOffset), "GPUGMLMPop_dataset_Group_GPU errors: could not copy iX[dd] shared to device!"); } @@ -832,8 +833,8 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro //check if X_shared is the identity matrix if(X[dd]->getSize(0) == X[dd]->getSize(1)) { (*isSharedIdentity)[dd] = true; - for(int ii = 0; ii < X[dd]->getSize(0) && (*isSharedIdentity)[dd]; ii++) { - for(int jj = 0; jj < X[dd]->getSize(1) && (*isSharedIdentity)[dd]; jj++) { + for(unsigned int ii = 0; ii < X[dd]->getSize(0) && (*isSharedIdentity)[dd]; ii++) { + for(unsigned int jj = 0; jj < X[dd]->getSize(1) && (*isSharedIdentity)[dd]; jj++) { if(ii == jj) { (*isSharedIdentity)[dd] = 1 == (*(GMLMPopGroupStructure->X_shared[dd]))(ii,jj); } @@ -877,7 +878,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro iX[dd] = new GPUData(ce, GPUData_HOST_NONE, stream, 0, GMLMPopGroupStructure->dim_A); // copy each trial's data - for(int mm = 0; mm < trials.size(); mm++) { + for(unsigned int mm = 0; mm < trials.size(); mm++) { cudaPos copyOffset = make_cudaPos((*(parent->ridx_t_all))[mm], 0, 0); //get row for current trial this->checkCudaErrors(X[dd]->copyTo(stream, trials[mm]->Groups[groupNum]->X[dd], true, copyOffset), "GPUGMLMPop_dataset_Group_GPU errors: could not copy X[dd] local to device!"); } @@ -903,7 +904,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro // pitched memory for lambda_d: note arrangement is (dim_N_total*dim_A) x dim_R // this stacks the events to line up with X or S lambda_d.assign(dim_D(), NULL); - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { size_t depth = dim_A; if(!((*isShared)[dd]) && X[dd]->getSize(2) == 1) { depth = 1; @@ -927,7 +928,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro spi_buffer.assign(dim_D(), NULL); spi_buffer_size.assign(dim_D(), 0); - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if((*isShared)[dd]) { //gets the rows and cols of the spm in the correct order //shorter algorithm is too slow for my level of patience, so we do this in a couple steps @@ -935,9 +936,9 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro size_t ctr = 0; std::vector row_ctr; row_ctr.resize(dim_X(dd)); - for(int mm = 0; mm < parent->dim_M(); mm++) { //for each trial - for(int aa = 0; aa < dim_A; aa++) { //for each event - for(int nn = 0; nn < trials[mm]->dim_N(msg); nn++) { //for each observation + for(unsigned int mm = 0; mm < parent->dim_M(); mm++) { //for each trial + for(unsigned int aa = 0; aa < dim_A; aa++) { //for each event + for(unsigned int nn = 0; nn < trials[mm]->dim_N(msg); nn++) { //for each observation //gets the entry in the input data int row = (*(trials[mm]->Groups[groupNum]->iX[dd]))(nn, aa); if(row >= 0 && row < dim_X(dd)) { //if valid row (invalid indices are 0's) @@ -952,7 +953,7 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro std::vector row_idx; row_idx.resize(dim_X(dd)); row_idx[0] = 0; - for(int xx = 1; xx < dim_X(dd); xx++) { + for(unsigned int xx = 1; xx < dim_X(dd); xx++) { row_idx[xx] = row_ctr[xx-1] + row_idx[xx-1]; } //goes back through the indices and adds them on @@ -962,9 +963,9 @@ GPUGMLMPop_dataset_Group_GPU::GPUGMLMPop_dataset_Group_GPU(const int gro this->checkCudaErrors(ce, "GPUGMLMPop_dataset_Group_GPU errors: could not allocate space for spi_cols[dd]!"); row_ctr.assign(dim_X(dd), 0); //reset row counter - for(int mm = 0; mm < parent->dim_M(); mm++) { //for each trial - for(int aa = 0; aa < dim_A; aa++) { //for each event - for(int nn = 0; nn < trials[mm]->dim_N(msg); nn++) { //for each observation + for(unsigned int mm = 0; mm < parent->dim_M(); mm++) { //for each trial + for(unsigned int aa = 0; aa < dim_A; aa++) { //for each event + for(unsigned int nn = 0; nn < trials[mm]->dim_N(msg); nn++) { //for each observation //gets the entry in the input data int row = (*(trials[mm]->Groups[groupNum]->iX[dd]))(nn, aa); if(row >= 0 && row < dim_X(dd)) { //if valid row @@ -1098,7 +1099,7 @@ GPUGMLMPop_dataset_Group_GPU::~GPUGMLMPop_dataset_Group_GPU() { cudaSafeFreeVector(spi_data, "GPUGMLMPop_dataset_Group_GPU errors: could not free spi_data"); cudaSafeFreeVector(spi_buffer, "GPUGMLMPop_dataset_Group_GPU errors: could not free spi_buffer"); //destroy any cusparse handles - for(int dd = 0; dd < spi_S.size(); dd++) { + for(unsigned int dd = 0; dd < spi_S.size(); dd++) { if(spi_S[dd] != NULL) { this->checkCudaErrors(cusparseDestroySpMat(*spi_S[dd]), "GPUGMLMPop_dataset_Group_GPU errors: CUSPARSE failed to destroy spi_S descr."); delete spi_S[dd]; @@ -1170,7 +1171,7 @@ void GPUGMLMPop_dataset_Group_GPU::multiplyCoefficients(const bool isSpa else if(update_weights) { this->checkCudaErrors(lambda_v->resize(stream, parent->lambda->getSize_max(0), -1, -1), "GPUGMLM_dataset_Group_GPU::multiplyCoefficients errors: could not set size for sparse runs."); } - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { GPUData * X_c = X[dd]; if(isSparseRun && update_weights) { this->checkCudaErrors(X_temp[dd]->resize( stream, parent->dim_N_temp, -1, -1), "GPUGMLMPop_dataset_Group_GPU::multiplyCoefficients errors: could not set size for sparse runs."); @@ -1364,13 +1365,13 @@ void GPUGMLMPop_dataset_Group_GPU::getGroupRate(const bool isSparseRun, grid_size.y = dim_R() / block_size.y + ((dim_R() % block_size.x == 0)? 0:1); bool compute_dT_any = false; - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { if(opts->compute_dT[ss]) { compute_dT_any = true; break; } } - + switch( params->dim_S()) { case 1: kernel_getGroupRate_pop<<>>( lambda_v->device(), GPUData::assembleKernels(lambda_d), @@ -1436,9 +1437,9 @@ void GPUGMLMPop_dataset_Group_GPU::getGroupRate(const bool isSparseRun, parent->ridx_a_all_c->device(), dim_A); break; default: - this->checkCudaErrors(cudaErrorInvalidConfiguration, "GPUGMLMPop_dataset_Group_GPU::getGroupRate errors: kernel_getGroupRate launch failed - invalid tensor order"); + this->checkCudaErrors(cudaErrorInvalidConfiguration, "GPUGMLMPop_dataset_Group_GPU::getGroupRate_pop errors: kernel_getGroupRate_pop launch failed - invalid tensor order"); } - this->checkCudaErrors("GPUGMLMPop_dataset_Group_GPU::getGroupRate errors: kernel_getGroupRate launch failed"); + this->checkCudaErrors("GPUGMLMPop_dataset_Group_GPU::getGroupRate_pop errors: kernel_getGroupRate_pop launch failed"); // multiply lambda_v * V' -> lambda(:, :, groupNum) FPTYPE alpha = 1; @@ -1548,13 +1549,13 @@ void GPUGMLMPop_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Gr //check if computing any derivatives first std::vector compute_dF; compute_dF.assign(dim_D(), false); - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { unsigned int dd = (*(params->factor_idx))[ss]; compute_dF[dd] = compute_dF[dd] || opts->compute_dT[ss]; } // compute lambda_v = dLL * V - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if(compute_dF[dd]) { this->checkCudaErrors(parent->dLL->GEMM(lambda_v, params->V, cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N), "GPUGMLMPop_dataset_Group_GPU::computeDerivatives errors: dLL*V -> lambda_v failed"); break; @@ -1562,7 +1563,7 @@ void GPUGMLMPop_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Gr } //for each factor - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if(compute_dF[dd]) { // lambda_d init setup in the kernel call in computeRateParts // two steps @@ -1584,7 +1585,7 @@ void GPUGMLMPop_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Gr FPTYPE alpha = 1; FPTYPE beta = 0; - for(int rr = 0; rr < params->dim_R(); rr++) { + for(unsigned int rr = 0; rr < params->dim_R(); rr++) { kernel_set_spi_S_pop<<>>(spi_data[dd]->device(), lambda_v->device(), spi_cols[dd]->device(), rr); this->checkCudaErrors("GPUGMLMPop_dataset_Group_GPU::computeDerivatives errors: kernel_set_spi_S launch failed"); @@ -1698,7 +1699,7 @@ void GPUGMLMPop_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Gr // matrix mults to get dT if((*(params->N_per_factor))[dd] > 1) { - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { if((*(params->factor_idx))[ss] == dd && opts->compute_dT[ss]) { this->checkCudaErrors(params->dF_dT[ss]->GEMVs(results->dT[ss], results->dF[dd], cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N), "GPUGMLMPop_dataset_Group_GPU::computeDerivatives errors: dF_dT'*dF -> dT"); } diff --git a/src/kcGMLM_common.hpp b/src/kcGMLM_common.hpp index 9371a95..30c995a 100644 --- a/src/kcGMLM_common.hpp +++ b/src/kcGMLM_common.hpp @@ -30,7 +30,7 @@ class GPUGMLM_group_params { inline size_t dim_R(std::shared_ptr msg) const { size_t dim_R_c = V->getSize(1); - for(int ss = 0; ss < T.size(); ss++) { + for(unsigned int ss = 0; ss < T.size(); ss++) { if(T[ss]->getSize(1) != dim_R_c) { std::ostringstream output_stream; output_stream << "GPUGMLM_group_params errors: inconsistent ranks!"; @@ -41,7 +41,7 @@ class GPUGMLM_group_params { } inline int dim_R() const { int dim_R_c = V->getSize(1); - for(int ss = 0; ss < T.size(); ss++) { + for(unsigned int ss = 0; ss < T.size(); ss++) { if(T[ss]->getSize(1) != dim_R_c) { return -1; } @@ -102,7 +102,7 @@ class GPUGMLM_params { return B->getSize(0); } inline unsigned int dim_J() const { - return Groups.size(); + return static_cast(Groups.size()); } }; @@ -183,7 +183,7 @@ class GPUGMLM_group_results { output_stream << "GPUGMLM_group_results errors: inconsistent ranks!"; msg->callErrMsgTxt(output_stream); } - for(int ss = 0; ss < dT.size(); ss++) { + for(unsigned int ss = 0; ss < dT.size(); ss++) { if( dT[ss] == NULL || dT[ss]->getSize(1) != dim_R) { std::ostringstream output_stream; output_stream << "GPUGMLM_group_results errors: inconsistent ranks!"; @@ -307,7 +307,7 @@ class GPUGMLM_trial_Group_args { } size_t dim_N_c = 0; - for(int ii = 0; ii < X.size(); ii++) { + for(unsigned int ii = 0; ii < X.size(); ii++) { if(X[ii] != NULL && !(X[ii]->empty()) && iX[ii] != NULL && !(iX[ii]->empty())) { std::ostringstream output_stream; output_stream << "GPUGMLM_trial_Group_args errors: mode " << ii << " has both local and shared regressors."; @@ -345,7 +345,7 @@ class GPUGMLM_trial_Group_args { } size_t dim_N_c = 0; - for(int ii = 0; ii < X.size(); ii++) { + for(unsigned int ii = 0; ii < X.size(); ii++) { if(X[ii] != NULL && !(X[ii]->empty()) && iX[ii] != NULL && !(iX[ii]->empty())) { return 0; } @@ -411,7 +411,7 @@ class GPUGMLM_trial_Group_args { return -1; } inline bool validDimA(unsigned int dim_A_check) const { - for(int mode = 0; mode < dim_D(); mode++) { + for(unsigned int mode = 0; mode < dim_D(); mode++) { if(X[mode] != NULL && !(X[mode]->empty()) && iX[mode] != NULL && !(iX[mode]->empty())) { return false; // bad setup: both local and global regressors } @@ -474,7 +474,7 @@ class GPUGMLM_trial_args { if(X_lin != NULL && !(X_lin->empty()) && X_lin->getSize(0) != dim_N_c) { dim_N_c = 0; } - for(int jj = 0; jj < Groups.size(); jj++) { + for(unsigned int jj = 0; jj < Groups.size(); jj++) { if(Groups[jj] == NULL || Groups[jj]->dim_N() != dim_N_c) { return 0; } @@ -538,9 +538,9 @@ class GPUGMLM_structure_Group_args { inline size_t dim_D() const { size_t max_factor = X_shared.size(); - for(int dd = 0; dd < max_factor; dd++) { + for(unsigned int dd = 0; dd < max_factor; dd++) { bool found = false; - for(int ss = 0; ss < factor_idx.size() && !found; ss++) { + for(unsigned int ss = 0; ss < factor_idx.size() && !found; ss++) { if(factor_idx[ss] == dd) { found = true; } @@ -579,9 +579,9 @@ class GPUGMLM_structure_Group_args { msg->callErrMsgTxt(output_stream); } //search for each factor - for(int dd = 0; dd < max_factor; dd++) { + for(unsigned int dd = 0; dd < max_factor; dd++) { bool found = false; - for(int ss = 0; ss < factor_idx.size() && !found; ss++) { + for(unsigned int ss = 0; ss < factor_idx.size() && !found; ss++) { if(factor_idx[ss] == dd) { found = true; } @@ -604,7 +604,7 @@ class GPUGMLM_structure_Group_args { size_t ff = 0; if(factor < dim_D(msg)) { ff = 1; - for(int ss = 0; ss < factor_idx.size(); ss++) { + for(unsigned int ss = 0; ss < factor_idx.size(); ss++) { if(factor_idx[ss] == factor) { ff *= dim_T[ss]; } @@ -621,7 +621,7 @@ class GPUGMLM_structure_Group_args { size_t ff = 0; if(factor < dim_D()) { ff = 1; - for(int ss = 0; ss < factor_idx.size(); ss++) { + for(unsigned int ss = 0; ss < factor_idx.size(); ss++) { if(factor_idx[ss] == factor) { ff *= dim_T[ss]; } @@ -648,7 +648,7 @@ class GPUGMLM_structure_Group_args { if(trial->dim_N() != dim_N_target || dim_N_target == 0) { return -3; } - if(!(trial->validDimA(dim_A))) { + if(!(trial->validDimA(static_cast(dim_A)))) { return -4; } @@ -705,7 +705,7 @@ class GPUGMLM_structure_args { return -100003; } - for(int jj = 0; jj < Groups.size(); jj++) { + for(unsigned int jj = 0; jj < Groups.size(); jj++) { int vd = Groups[jj]->validateTrialStructure(trial->Groups[jj], trial->dim_N()); if(vd != 1) { return vd - jj*100; // group structure was invalid @@ -715,7 +715,7 @@ class GPUGMLM_structure_args { return 1; //passed basic tests (seg faults may still exist if the pointers are bad!) } int validateTrialStructure(const GPUGMLM_GPU_block_args * block) const { - for(int mm = 0; mm < block->trials.size(); mm++) { + for(unsigned int mm = 0; mm < block->trials.size(); mm++) { //checks each trial on a block int vd = validateTrialStructure(block->trials[mm]); if(vd != 1) { @@ -725,7 +725,7 @@ class GPUGMLM_structure_args { return 1; } int validateTrialStructure(const std::vector *> blocks) const { - for(int bb = 0; bb < blocks.size(); bb++) { + for(unsigned int bb = 0; bb < blocks.size(); bb++) { //checks each block int vd = validateTrialStructure(blocks[bb]); if(vd != 1) { @@ -735,7 +735,7 @@ class GPUGMLM_structure_args { return 1; } unsigned int dim_J() { - return Groups.size(); + return static_cast(Groups.size()); } }; diff --git a/src/kcGMLM_computeBlock.cu b/src/kcGMLM_computeBlock.cu index cc20fbf..9b782c1 100644 --- a/src/kcGMLM_computeBlock.cu +++ b/src/kcGMLM_computeBlock.cu @@ -25,6 +25,7 @@ GPUGMLM_computeBlock::GPUGMLM_computeBlock(const GPUGMLM_structure_args< this->msg = msg_; this->dev = block->dev_num; this->switchToDevice(); + this->checkDeviceComputeCapability(); dim_J = GMLMstructure->Groups.size(); size_t dim_M = block->trials.size(); @@ -40,7 +41,7 @@ GPUGMLM_computeBlock::GPUGMLM_computeBlock(const GPUGMLM_structure_args< #endif this->checkCudaErrors(cudaStreamCreate(&(stream)), "GPUGMLM_computeBlock errors: failed initializing stream!"); stream_Groups.resize(dim_J); - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cudaStreamCreate(&(stream_Groups[jj])), "GPUGMLM_computeBlock errors: failed initializing group streams!"); } @@ -59,7 +60,7 @@ GPUGMLM_computeBlock::GPUGMLM_computeBlock(const GPUGMLM_structure_args< this->checkCudaErrors(cudaMallocPitch(reinterpret_cast(&(cublasWorkspace)), &cublasWorkspace_size, cublasWorkspace_size_0, 1), "GPUGMLM_computeBlock errors: allocating cublas workspace failed."); this->checkCudaErrors(cublasSetWorkspace(cublasHandle, cublasWorkspace, cublasWorkspace_size), "GPUGMLM_computeBlock errors: setting CUBLAS workspace failed."); }*/ - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cublasCreate(&(cublasHandle_Groups[jj])), "GPUGMLM_computeBlock errors: CUBLAS groups initialization failed."); this->checkCudaErrors(cublasSetMathMode(cublasHandle_Groups[jj], mathMode), "GPUGMLM_computeBlock errors: set cublas group math mode failed."); this->checkCudaErrors(cublasSetPointerMode(cublasHandle_Groups[jj], CUBLAS_POINTER_MODE_HOST), "GPUGMLM_computeBlock errors: set cublas groups pointer mode failed."); @@ -73,7 +74,7 @@ GPUGMLM_computeBlock::GPUGMLM_computeBlock(const GPUGMLM_structure_args< //setup cusparse handle cusparseHandle_Groups.resize(dim_J); - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { this->checkCudaErrors(cusparseCreate( &(cusparseHandle_Groups[jj])), "GPUGMLM_computeBlock errors: cusparse groups initialization failed."); this->checkCudaErrors(cusparseSetPointerMode(cusparseHandle_Groups[jj], CUSPARSE_POINTER_MODE_HOST), "GPUGMLM_computeBlock errors: set cusparse groups pointer mode failed."); this->checkCudaErrors(cusparseSetStream( cusparseHandle_Groups[jj], stream_Groups[jj]), "GPUGMLM_computeBlock errors: set cusparse groups stream failed."); @@ -127,7 +128,7 @@ template bool GPUGMLM_computeBlock::loadParams(const GPUGMLM_params * params_host, const GPUGMLM_computeOptions * opts) { this->switchToDevice(); params->copyToGPU(params_host, dataset, stream, stream_Groups, opts); - for(int jj = 0; jj < params->dim_J(); jj++) { + for(unsigned int jj = 0; jj < params->dim_J(); jj++) { this->checkCudaErrors(results->set_dim_R(jj, params->dim_R(jj), stream), "GPUGMLM_computeBlock::loadParams errors: could not set results dim_R"); } bool isSparseRun = dataset->isSparseRun(params); @@ -135,7 +136,7 @@ bool GPUGMLM_computeBlock::loadParams(const GPUGMLM_params * par results_set = true; //for each group, multiply coefficients by X*T -> XT - for(int jj = 0; jj < dim_J && jj < dataset->dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J && jj < dataset->dim_J(); jj++) { dataset->Groups[jj]->multiplyCoefficients(isSparseRun, opts->update_weights, params->Groups[jj], stream_Groups[jj], cublasHandle_Groups[jj], params->paramsLoaded_event); } } @@ -154,7 +155,7 @@ void GPUGMLM_computeBlock::computeRateParts(const GPUGMLM_computeOptions this->switchToDevice(); //for each group - for(int jj = 0; jj < dataset->dim_J(); jj++ ) { + for(unsigned int jj = 0; jj < dataset->dim_J(); jj++ ) { dataset->Groups[jj]->getGroupRate(isSparseRun, params->Groups[jj], opts->Groups[jj], stream_Groups[jj]); } } @@ -454,7 +455,7 @@ void GPUGMLM_computeBlock::computeDerivatives(const GPUGMLM_computeOptio this->switchToDevice(); //for each Group - for(int jj = 0; jj < dim_J; jj++) { + for(unsigned int jj = 0; jj < dim_J; jj++) { dataset->Groups[jj]->computeDerivatives(results->Groups[jj], isSparseRun, opts->update_weights, params->Groups[jj], opts->Groups[jj], stream_Groups[jj], cublasHandle_Groups[jj], cusparseHandle_Groups[jj], LL_event); } @@ -476,7 +477,7 @@ void GPUGMLM_computeBlock::computeDerivatives(const GPUGMLM_computeOptio FPTYPE beta = 0; GPUData * X_lin_c = isSparseRun ? dataset->X_lin_temp : dataset->X_lin; - for(int pp = 0; pp < dataset->dim_P(); pp++) { + for(unsigned int pp = 0; pp < dataset->dim_P(); pp++) { if(dataset->dim_N_neuron_temp[pp] > 0) {// if there are any trials for this neuron in current GPU block unsigned int neuron_start_c = isSparseRun ? (*(dataset->ridx_sn_sall))[pp] : (*(dataset->ridx_n_all))[pp]; @@ -544,12 +545,12 @@ GPUGMLM_dataset_GPU::GPUGMLM_dataset_GPU(const GPUGMLM_structure_argscheckCudaErrors(ce, "GPUGMLM_dataset_GPU errors: could not allocate normalizingConstants_trial on this->device!"); id_t_neuron.assign(dim_M(), 0); - for(int pp = 0; pp < dim_P(); pp++) { + for(unsigned int pp = 0; pp < dim_P(); pp++) { //find each trial for neuron pp size_t dim_N_c = 0; (*ridx_n_all)[pp] = dim_N_total_c; (*ridx_n_tr)[pp] = tr_ctr; - for(int mm = 0; mm < dim_M(); mm++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { if(block->trials[mm]->neuron == pp) { // is neuron //save trial indices (*ridx_t_all)[tr_ctr] = dim_N_total_c; @@ -578,7 +579,7 @@ GPUGMLM_dataset_GPU::GPUGMLM_dataset_GPU(const GPUGMLM_structure_argstrials[mm]->neuron] = true; FPTYPE nc = 0; // normalizing constant - for(int nn = 0; nn < (*dim_N)[tr_ctr]; nn++) { + for(unsigned int nn = 0; nn < (*dim_N)[tr_ctr]; nn++) { if(GMLMstructure->logLikeSettings == ll_poissExp || GMLMstructure->logLikeSettings == ll_poissSoftRec) { FPTYPE Y_c = (*(block->trials[mm]->Y))[nn]; nc += (Y_c >= 0) ? -lgamma(floor(Y_c) + 1.0) : 0; @@ -600,8 +601,8 @@ GPUGMLM_dataset_GPU::GPUGMLM_dataset_GPU(const GPUGMLM_structure_argscheckCudaErrors(ce, "GPUGMLM_dataset_GPU errors: could not allocate id_a_neuron on this->device!"); size_t N_total_ctr = 0; - for(int mm = 0; mm < dim_M(); mm++) { - for(int nn = 0; nn < (*dim_N)[mm]; nn++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { + for(unsigned int nn = 0; nn < (*dim_N)[mm]; nn++) { (*id_a_trialM)[N_total_ctr + nn] = mm; (*id_a_neuron)[N_total_ctr + nn] = id_t_neuron[mm]; } @@ -618,7 +619,7 @@ GPUGMLM_dataset_GPU::GPUGMLM_dataset_GPU(const GPUGMLM_structure_argscheckCudaErrors(ce, "GPUGMLM_dataset_GPU errors: could not allocate X_lin on this->device!"); //copy each trial to GPU - for(int mm = 0; mm < dim_M(); mm++) { + for(unsigned int mm = 0; mm < dim_M(); mm++) { int mm_0 = trial_load_order[mm]; // spike counts @@ -670,7 +671,7 @@ GPUGMLM_dataset_GPU::GPUGMLM_dataset_GPU(const GPUGMLM_structure_argscheckCudaErrors(ce, "GPUGMLM_dataset_GPU errors: could not allocate dB_trial on this->device!"); //setup the groups - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj] = new GPUGMLM_dataset_Group_GPU(jj, GMLMstructure->Groups[jj], block->trials, trial_load_order, this, stream, cusparseHandle_Groups[jj]); } } @@ -701,7 +702,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ std::vector dim_F_c; dim_F_c.assign(dim_D(), 1); size_t max_dim_F = 0; - for(int ss = 0; ss < GMLMGroupStructure->dim_S(); ss++) { + for(unsigned int ss = 0; ss < GMLMGroupStructure->dim_S(); ss++) { dim_T_total *= GMLMGroupStructure->dim_T[ss]; dim_F_c[GMLMGroupStructure->factor_idx[ss]] *= GMLMGroupStructure->dim_T[ss]; @@ -724,7 +725,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ //allocated space for regressors and copy to GPU size_t max_dim_X_shared = 0; - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { (*isShared)[dd] = !(GMLMGroupStructure->X_shared[dd]->empty()); if((*isShared)[dd]) { @@ -746,7 +747,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ this->checkCudaErrors(X[dd]->copyTo(stream, GMLMGroupStructure->X_shared[dd], false), "GPUGMLM_dataset_Group_GPU errors: could not copy X[dd] shared to this->device!"); // copy each trial's data to GPU - for(int mm = 0; mm < trials.size(); mm++) { + for(unsigned int mm = 0; mm < trials.size(); mm++) { int mm_0 = trial_load_order[mm]; cudaPos copyOffset = make_cudaPos((*(parent->ridx_t_all))[mm], 0, 0); //get row for current trial @@ -756,8 +757,8 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ //check if X_shared is the identity matrix if(X[dd]->getSize(0) == X[dd]->getSize(1)) { (*isSharedIdentity)[dd] = true; - for(int ii = 0; ii < X[dd]->getSize(0) && (*isSharedIdentity)[dd]; ii++) { - for(int jj = 0; jj < X[dd]->getSize(1) && (*isSharedIdentity)[dd]; jj++) { + for(unsigned int ii = 0; ii < X[dd]->getSize(0) && (*isSharedIdentity)[dd]; ii++) { + for(unsigned int jj = 0; jj < X[dd]->getSize(1) && (*isSharedIdentity)[dd]; jj++) { if(ii == jj) { (*isSharedIdentity)[dd] = 1 == (*(GMLMGroupStructure->X_shared[dd]))(ii,jj); } @@ -801,7 +802,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ iX[dd] = new GPUData(ce, GPUData_HOST_NONE, stream, 0, GMLMGroupStructure->dim_A); // copy each trial's data - for(int mm = 0; mm < trials.size(); mm++) { + for(unsigned int mm = 0; mm < trials.size(); mm++) { int mm_0 = trial_load_order[mm]; cudaPos copyOffset = make_cudaPos((*(parent->ridx_t_all))[mm], 0, 0); //get row for current trial @@ -832,7 +833,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ // pitched memory for lambda_d: note arrangement is (dim_N_total*dim_A) x dim_R // this stacks the events to line up with X or S lambda_d.assign(dim_D(), NULL); - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { size_t depth = dim_A; if(!((*isShared)[dd]) && X[dd]->getSize(2) == 1) { depth = 1; @@ -859,7 +860,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ spi_buffer.assign(dim_D(), NULL); spi_buffer_size.assign(dim_D(), 0); - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if((*isShared)[dd]) { //gets the rows and cols of the spm in the correct order //shorter algorithm is too slow for my level of patience, so we do this in a couple steps @@ -867,10 +868,10 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ size_t ctr = 0; std::vector row_ctr; row_ctr.resize(dim_X(dd)); - for(int mm = 0; mm < parent->dim_M(); mm++) { //for each trial + for(unsigned int mm = 0; mm < parent->dim_M(); mm++) { //for each trial unsigned int mm_0 = trial_load_order[mm]; - for(int aa = 0; aa < dim_A; aa++) { //for each event - for(int nn = 0; nn < trials[mm_0]->dim_N(msg); nn++) { //for each observation + for(unsigned int aa = 0; aa < dim_A; aa++) { //for each event + for(unsigned int nn = 0; nn < trials[mm_0]->dim_N(msg); nn++) { //for each observation //gets the entry in the input data int row = (*(trials[mm_0]->Groups[groupNum]->iX[dd]))(nn, aa); if(row >= 0 && row < dim_X(dd)) { //if valid row (invalid indices are 0's) @@ -885,7 +886,7 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ std::vector row_idx; row_idx.resize(dim_X(dd)); row_idx[0] = 0; - for(int xx = 1; xx < dim_X(dd); xx++) { + for(unsigned int xx = 1; xx < dim_X(dd); xx++) { row_idx[xx] = row_ctr[xx-1] + row_idx[xx-1]; } //goes back through the indices and adds them on @@ -895,10 +896,10 @@ GPUGMLM_dataset_Group_GPU::GPUGMLM_dataset_Group_GPU(const int groupNum_ this->checkCudaErrors(ce, "GPUGMLM_dataset_Group_GPU errors: could not allocate space for spi_cols[dd]!"); row_ctr.assign(dim_X(dd), 0); //reset row counter - for(int mm = 0; mm < parent->dim_M(); mm++) { //for each trial + for(unsigned int mm = 0; mm < parent->dim_M(); mm++) { //for each trial unsigned int mm_0 = trial_load_order[mm]; - for(int aa = 0; aa < dim_A; aa++) { //for each event - for(int nn = 0; nn < trials[mm_0]->dim_N(msg); nn++) { //for each observation + for(unsigned int aa = 0; aa < dim_A; aa++) { //for each event + for(unsigned int nn = 0; nn < trials[mm_0]->dim_N(msg); nn++) { //for each observation //gets the entry in the input data int row = (*(trials[mm_0]->Groups[groupNum]->iX[dd]))(nn, aa); if(row >= 0 && row < dim_X(dd)) { //if valid row @@ -1035,7 +1036,7 @@ GPUGMLM_dataset_Group_GPU::~GPUGMLM_dataset_Group_GPU() { cudaSafeFreeVector(spi_data, "GPUGMLM_dataset_Group_GPU errors: could not free spi_data"); cudaSafeFreeVector(spi_buffer, "GPUGMLM_dataset_Group_GPU errors: could not free spi_buffer"); //destroy any cusparse handles - for(int dd = 0; dd < spi_S.size(); dd++) { + for(unsigned int dd = 0; dd < spi_S.size(); dd++) { if(spi_S[dd] != NULL) { this->checkCudaErrors(cusparseDestroySpMat(*spi_S[dd]), "GPUGMLM_dataset_Group_GPU errors: CUSPARSE failed to destroy spi_S descr."); delete spi_S[dd]; @@ -1100,7 +1101,7 @@ void GPUGMLM_dataset_Group_GPU::multiplyCoefficients(const bool isSparse this->checkCudaErrors(lambda_v->resize(stream, parent->lambda->getSize_max(0), -1, -1), "GPUGMLM_dataset_Group_GPU::multiplyCoefficients errors: could not set size for sparse runs."); } - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { GPUData * X_c = X[dd]; if(isSparseRun && update_weights) { @@ -1304,7 +1305,7 @@ void GPUGMLM_dataset_Group_GPU::getGroupRate(const bool isSparseRun, con grid_size.x = min(max_blocks_needed, blocks_to_use); bool compute_dT_any = false; - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { if(opts->compute_dT[ss]) { compute_dT_any = true; break; @@ -1492,7 +1493,7 @@ void GPUGMLM_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Group //for each neuron FPTYPE alpha = 1; FPTYPE beta = 0; - for(int pp = 0; pp < parent->dim_P(); pp++) {// if there are any trials for this neuron in current GPU block + for(unsigned int pp = 0; pp < parent->dim_P(); pp++) {// if there are any trials for this neuron in current GPU block if(parent->dim_N_neuron_temp[pp] > 0) { unsigned int neuron_start_c = (*(parent->ridx_n_all))[pp]; if(isSparseRun) { @@ -1516,13 +1517,13 @@ void GPUGMLM_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Group //check if computing any derivatives first std::vector compute_dF; compute_dF.assign(dim_D(), false); - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { unsigned int dd = (*(params->factor_idx))[ss]; compute_dF[dd] = compute_dF[dd] || opts->compute_dT[ss]; } //for each factor - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if(compute_dF[dd]) { // lambda_t setup in the kernel call in computeRateParts //matrix mult of X'*(dLL .* lambda_t) @@ -1577,7 +1578,7 @@ void GPUGMLM_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Group FPTYPE alpha = 1; FPTYPE beta = 0; - for(int rr = 0; rr < params->dim_R(); rr++) { + for(unsigned int rr = 0; rr < params->dim_R(); rr++) { //I found - on a 1080ti at least - doing this series of SpMV ops was typically faster than a single SpMM (annoyingly) cusparseStatus_t cusparse_stat; cusparse_stat = cusparseDnVecSetValues(*(spi_lambda_d[dd]), lambda_d[dd]->getData_gpu() + rr*lambda_d[dd]->getLD_gpu()); @@ -1644,7 +1645,7 @@ void GPUGMLM_dataset_Group_GPU::computeDerivatives(GPUGMLM_results_Group // matrix mults to get dT if((*(params->N_per_factor))[dd] > 1) { - for(int ss = 0; ss < params->dim_S(); ss++) { + for(unsigned int ss = 0; ss < params->dim_S(); ss++) { if((*(params->factor_idx))[ss] == dd && opts->compute_dT[ss]) { this->checkCudaErrors(params->dF_dT[ss]->GEMVs(results->dT[ss], results->dF[dd], cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N), "GPUGMLM_dataset_Group_GPU::computeDerivatives errors: dF_dT'*dF -> dT"); } diff --git a/src/kcGMLM_dataStructures.cu b/src/kcGMLM_dataStructures.cu index 38d5ceb..3e077c0 100644 --- a/src/kcGMLM_dataStructures.cu +++ b/src/kcGMLM_dataStructures.cu @@ -38,7 +38,7 @@ GPUGMLM_parameters_GPU::GPUGMLM_parameters_GPU(const GPUGMLM_structure_a if(GMLMstructure->logLikeParams.size() > 0) { logLikeParams = new GPUData(ce, GPUData_HOST_STANDARD, stream, GMLMstructure->logLikeParams.size()); checkCudaErrors(ce, "GPUGMLM_parameters_GPU errors: could not allocate space for logLikeParams!" ); - for(int ii = 0; ii < GMLMstructure->logLikeParams.size(); ii++) { + for(unsigned int ii = 0; ii < GMLMstructure->logLikeParams.size(); ii++) { (*logLikeParams)[ii] = GMLMstructure->logLikeParams[ii]; } ce = logLikeParams->copyHostToGPU(stream); @@ -74,7 +74,7 @@ GPUGMLM_parameters_GPU::GPUGMLM_parameters_GPU(const GPUGMLM_structure_a //setup each group Groups.resize(GMLMstructure->Groups.size()); - for(int jj = 0; jj < GMLMstructure->Groups.size(); jj++) { + for(unsigned int jj = 0; jj < GMLMstructure->Groups.size(); jj++) { Groups[jj] = new GPUGMLM_parameters_Group_GPU(GMLMstructure->Groups[jj], this); } } @@ -115,7 +115,7 @@ GPUGMLM_parameters_Group_GPU::GPUGMLM_parameters_Group_GPU(const GPUGMLM } dim_F_max = 0; - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if(GMLMGroupStructure->factor_idx[ss] >= dim_D()) { output_stream << "GPUGMLM_parameters_Group_GPU errors: invalid factor index!"; msg->callErrMsgTxt(output_stream); @@ -127,19 +127,19 @@ GPUGMLM_parameters_Group_GPU::GPUGMLM_parameters_Group_GPU(const GPUGMLM T[ss] = new GPUData(ce, GPUData_HOST_PAGELOCKED, stream, GMLMGroupStructure->dim_T[ss], GMLMGroupStructure->dim_R_max); checkCudaErrors(ce, "GPUGMLM_parameters_Group_GPU errors: could not allocate space for T[ss]!" ); } - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { int dd = (*factor_idx)[ss]; dF_dT[ss] = new GPUData(ce, GPUData_HOST_NONE, stream, GMLMGroupStructure->dim_T[ss], dim_F_c[dd], GMLMGroupStructure->dim_R_max); checkCudaErrors(ce, "GPUGMLM_parameters_Group_GPU errors: could not allocate space for dF_dT[ss]!" ); } - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if((*N_per_factor)[dd] > 1) { F[dd] = new GPUData(ce, GPUData_HOST_NONE, stream, dim_F_c[dd], GMLMGroupStructure->dim_R_max); checkCudaErrors(ce, "GPUGMLM_parameters_Group_GPU errors: could not allocate space for F[dd]!" ); } else if((*N_per_factor)[dd] == 1) { //find the T for this factor if is unique - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if((*factor_idx)[ss] == dd) { F[dd] = T[ss]; break; @@ -180,14 +180,14 @@ GPUGMLM_parameters_GPU::~GPUGMLM_parameters_GPU() { cudaSafeFree(W, "GPUGMLM_parameters_GPU errors: could not free W"); cudaSafeFree(B, "GPUGMLM_parameters_GPU errors: could not free B"); - for(int jj = 0; jj < Groups.size(); jj++) { + for(unsigned int jj = 0; jj < Groups.size(); jj++) { delete Groups[jj]; } } template GPUGMLM_parameters_Group_GPU::~GPUGMLM_parameters_Group_GPU() { switchToDevice(); - for(int dd = 0; dd < N_per_factor->size(); dd++) { + for(unsigned int dd = 0; dd < N_per_factor->size(); dd++) { if((*N_per_factor)[dd] > 1) { cudaSafeFree(F[dd], "GPUGMLM_parameters_Group_GPU errors: could not free F"); } @@ -217,7 +217,7 @@ __global__ void kernel_ParamsSparseRunSetup(GPUData_kernel ridx_sa unsigned int mm = trial_included[tr]; unsigned int start_all = ridx_t_all[mm]; unsigned int start_sp = ridx_st_sall[tr]; - for(int nn = 0; nn < dim_N[mm]; nn++) { + for(unsigned int nn = 0; nn < dim_N[mm]; nn++) { ridx_sa_all[nn + start_sp] = start_all + nn; } } @@ -364,7 +364,7 @@ void GPUGMLM_parameters_GPU::copyToGPU(const GPUGMLM_params * gm checkCudaErrors(B->copyTo(stream, gmlm_params->B, false), "GPUGMLM_parameters_GPU errors: could not copy B to device!"); //for each group - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj]->copyToGPU(gmlm_params->Groups[jj], stream_Groups[jj], opts->Groups[jj]); } } @@ -399,7 +399,7 @@ void GPUGMLM_parameters_GPU::copyToGPU(const GPUGMLM_params * gm included = (*trial_weights_temp)[mm] != 0; } else { - for(int pp = 0; pp < dim_P(); pp++) { + for(unsigned int pp = 0; pp < dim_P(); pp++) { (*trial_weights_temp)(mm, pp) = (*(opts->trial_weights))((*(dataset->id_t_trial))[mm], pp); included = included || (*trial_weights_temp)(mm,pp) != 0; } @@ -504,7 +504,7 @@ void GPUGMLM_parameters_GPU::copyToGPU(const GPUGMLM_params * gm checkCudaErrors(B->copyTo(stream, gmlm_params->B, false), "GPUGMLMPop_parameters_GPU errors: could not copy B to device!"); //for each group - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj]->copyToGPU(gmlm_params->Groups[jj], stream_Groups[jj], opts->Groups[jj]); } } @@ -524,7 +524,7 @@ void GPUGMLM_parameters_Group_GPU::copyToGPU(const GPUGMLM_group_params< output_stream << "GPUGMLM_parameters_Group_GPU errors: Invalid tensor coefficient group order. received dim_S = " << gmlm_group_params->dim_S() << ", expected dim_S = " << dim_S() << std::endl; msg->callErrMsgTxt(output_stream); } - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if(gmlm_group_params->dim_T(ss, msg) != dim_T(ss)) { output_stream << "GPUGMLM_parameters_Group_GPU errors: Invalid tensor coefficient size. Received dim_T = " << gmlm_group_params->dim_T(ss, msg) << ", expected dim_T = " << dim_T(ss) << std::endl; msg->callErrMsgTxt(output_stream); @@ -537,7 +537,7 @@ void GPUGMLM_parameters_Group_GPU::copyToGPU(const GPUGMLM_group_params< msg->callErrMsgTxt(output_stream); } compute_dF->assign(false); - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { (*compute_dT)[ss] = opts->compute_dT[ss]; (*compute_dF)[(*factor_idx)[ss]] = (*compute_dF)[(*factor_idx)[ss]] || opts->compute_dT[ss]; } @@ -548,7 +548,7 @@ void GPUGMLM_parameters_Group_GPU::copyToGPU(const GPUGMLM_group_params< checkCudaErrors(V->copyTo(stream, gmlm_group_params->V, false), "GPUGMLM_parameters_Group_GPU errors: could not copy V to device!"); //copy each T - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { checkCudaErrors(T[ss]->copyTo(stream, gmlm_group_params->T[ss], false), "GPUGMLM_parameters_Group_GPU errors: could not copy T to device!"); } @@ -573,17 +573,17 @@ __global__ void kernel_assembleFactorFilter(GPUData_array_kernel::GPUGMLM_results_GPU(const GPUGMLM_structure_args Groups.size()); - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj] = new GPUGMLM_results_Group_GPU(GMLMstructure->Groups[jj], this); } } @@ -685,7 +685,7 @@ GPUGMLM_results_Group_GPU::GPUGMLM_results_Group_GPU(const GPUGMLM_struc checkCudaErrors(ce, "GPUGMLM_results_Group_GPU errors: could not allocate space for dV!" ); dT.resize(GMLMGroupStructure->dim_T.size()); - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { dT[ss] = new GPUData(ce, GPUData_HOST_PAGELOCKED, stream, GMLMGroupStructure->dim_T[ss], GMLMGroupStructure->dim_R_max); checkCudaErrors(ce, "GPUGMLM_results_Group_GPU errors: could not allocate space for T[ss]!" ); } @@ -699,12 +699,12 @@ GPUGMLM_results_Group_GPU::GPUGMLM_results_Group_GPU(const GPUGMLM_struc dim_F_c.assign(dim_D(), 1); NF.assign(dim_D(), 0); - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { NF[GMLMGroupStructure->factor_idx[ss]]++; dim_F_c[GMLMGroupStructure->factor_idx[ss]] *= GMLMGroupStructure->dim_T[ss]; } - for(int dd = 0; dd < dim_D(); dd++) { + for(unsigned int dd = 0; dd < dim_D(); dd++) { if(NF[dd] > 1) { dF[dd] = new GPUData(ce, GPUData_HOST_NONE, stream, dim_F_c[dd], GMLMGroupStructure->dim_R_max); checkCudaErrors(ce, "GPUGMLM_results_Group_GPU errors: could not allocate space for dF[dd]!" ); @@ -712,7 +712,7 @@ GPUGMLM_results_Group_GPU::GPUGMLM_results_Group_GPU(const GPUGMLM_struc } else { dF_assigned[dd] = false; - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if(GMLMGroupStructure->factor_idx[ss] == dd) { dF[dd] = dT[ss]; break; @@ -730,7 +730,7 @@ GPUGMLM_results_GPU::~GPUGMLM_results_GPU() { cudaSafeFree(dW, "GPUGMLM_results_GPU errors: could not free W"); cudaSafeFree(dB, "GPUGMLM_results_GPU errors: could not free B"); - for(int jj = 0; jj < Groups.size(); jj++) { + for(unsigned int jj = 0; jj < Groups.size(); jj++) { delete Groups[jj]; } } @@ -738,7 +738,7 @@ template GPUGMLM_results_Group_GPU::~GPUGMLM_results_Group_GPU() { switchToDevice(); cudaSafeFreeVector(dT, "GPUGMLM_results_Group_GPU errors: could not free dT"); - for(int dd = 0; dd < dF.size(); dd++) { + for(unsigned int dd = 0; dd < dF.size(); dd++) { if(dF_assigned[dd]) { cudaSafeFree(dF[dd], "GPUGMLM_results_Group_GPU errors: could not free dF"); } @@ -773,7 +773,7 @@ void GPUGMLM_results_GPU::gatherResults(const GPUGMLM_parameters_GPUcallErrMsgTxt(output_stream); } - for(int jj = 0; jj < Groups.size(); jj++) { + for(unsigned int jj = 0; jj < Groups.size(); jj++) { Groups[jj]->gatherResults(params->Groups[jj], opts->Groups[jj], stream_Groups[jj]); } } @@ -793,7 +793,7 @@ void GPUGMLM_results_Group_GPU::gatherResults(const GPUGMLM_parameters_G } //copy dT - for(int ss = 0; ss < dT.size(); ss++) { + for(unsigned int ss = 0; ss < dT.size(); ss++) { if(opts->compute_dT[ss]) { checkCudaErrors(dT[ss]->copyGPUToHost(stream),"GPUGMLM_results_Group_GPU::gatherResults errors: could not copy dT to host!"); } @@ -842,16 +842,16 @@ void GPUGMLM_results_GPU::addToHost(const GPUGMLM_parameters_GPU //adds local results to dest if(opts->compute_dW) { - for(int pp = 0; pp < dim_P(); pp++) { + for(unsigned int pp = 0; pp < dim_P(); pp++) { if(isSimultaneousPopulation || (*dim_N_neuron_temp)[pp] > 0) { (*(results_dest->dW))[pp] += (*dW)[pp]; } } } if(opts->compute_dB && dim_B() > 0) { - for(int pp = 0; pp < dim_P(); pp++) { + for(unsigned int pp = 0; pp < dim_P(); pp++) { if(isSimultaneousPopulation || (*dim_N_neuron_temp)[pp] > 0) { - for(int bb = 0; bb < dim_B(); bb++) { + for(unsigned int bb = 0; bb < dim_B(); bb++) { (*(results_dest->dB))(bb, pp) += (*dB)(bb, pp); } } @@ -860,10 +860,10 @@ void GPUGMLM_results_GPU::addToHost(const GPUGMLM_parameters_GPU //adds local results to dest if(opts->compute_trialLL) { - for(int mm = 0; mm < max_trials(); mm++) { + for(unsigned int mm = 0; mm < max_trials(); mm++) { if((*isInDataset_trial)[mm]) { int cols = isSimultaneousPopulation ? dim_P() : 1; - for(int pp = 0; pp < cols; pp++) { + for(unsigned int pp = 0; pp < cols; pp++) { FPTYPE weight = 1; if(!opts->trial_weights->empty()) { if(!isSimultaneousPopulation || opts->trial_weights->getSize(1) == 1) { @@ -881,7 +881,7 @@ void GPUGMLM_results_GPU::addToHost(const GPUGMLM_parameters_GPU } } - for(int jj = 0; jj < dim_J(); jj++) { + for(unsigned int jj = 0; jj < dim_J(); jj++) { Groups[jj]->addToHost(params->Groups[jj], results_dest->Groups[jj], opts->Groups[jj], dim_N_neuron_temp, isSimultaneousPopulation, reset); } } @@ -904,7 +904,7 @@ void GPUGMLM_results_Group_GPU::addToHost(const GPUGMLM_parameters_Group output_stream << "GPUGMLM_results_Group_GPU::addResults errors: results struct is the incorrect size!"; msg->callErrMsgTxt(output_stream); } - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if(opts->compute_dT[ss]) { if(!(dT[ss]->isEqualSize(results_dest->dT[ss]))) { output_stream << "GPUGMLM_results_Group_GPU::addResults errors: results struct is the incorrect size!"; @@ -918,19 +918,19 @@ void GPUGMLM_results_Group_GPU::addToHost(const GPUGMLM_parameters_Group //adds on results //individual model if(opts->compute_dV) { - for(int pp = 0; pp < parent->dim_P(); pp++) { + for(unsigned int pp = 0; pp < parent->dim_P(); pp++) { if(isSimultaneousPopulation || (*dim_N_neuron_temp)[pp] > 0) { //if there is anything to add for this neuron - for(int rr = 0; rr < dim_R(); rr++) { + for(unsigned int rr = 0; rr < dim_R(); rr++) { (*(results_dest->dV))(pp, rr) += (*dV)(pp, rr); } } } } - for(int ss = 0; ss < dim_S(); ss++) { + for(unsigned int ss = 0; ss < dim_S(); ss++) { if(opts->compute_dT[ss]) { - for(int tt = 0; tt < dim_T(ss); tt++) { - for(int rr = 0; rr < dim_R(); rr++) { + for(unsigned int tt = 0; tt < dim_T(ss); tt++) { + for(unsigned int rr = 0; rr < dim_R(); rr++) { (*(results_dest->dT[ss]))(tt, rr) += (*(dT[ss]))(tt, rr); } }