diff --git a/nPYc/plotting/_multivariatePlotting.py b/nPYc/plotting/_multivariatePlotting.py index 4b2f68b9..79d05fca 100644 --- a/nPYc/plotting/_multivariatePlotting.py +++ b/nPYc/plotting/_multivariatePlotting.py @@ -66,7 +66,7 @@ def plotScree(R2, Q2=None, title='', xlabel='', ylabel='', savePath=None, figure def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerDict=None, components=None, hotelling_alpha=0.05, plotAssociation=None, title='', xlabel='', figures=None, savePath=None, figureFormat='png', dpi=72, - figureSize=(11, 7)): + figureSize=(11, 7), opacity=.4): """ Plot PCA scores for each pair of components in PCAmodel, coloured by values defined in classes, and with Hotelling's T2 ellipse (95%) @@ -85,6 +85,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD :param dict figures: If not ``None``, saves location of each figure for output in html report (see multivariateReport.py) """ + print("Plotting scores %s" % colourType) # Check inputs if not isinstance(pcaModel, ChemometricsPCA): raise TypeError('PCAmodel must be an instance of ChemometricsPCA') @@ -114,8 +115,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD # If colourDict check colour defined for every unique entry in class colourDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=colourDict, dictName="colourDict") - markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict, - dictName="markerDict", defaultVal="o") + markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict,dictName="markerDict", defaultVal="o") from matplotlib.patches import Ellipse @@ -183,7 +183,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD ax.scatter(values[classes.values == u, components[i]], values[classes.values == u, components[j]], c=colourDict[u], marker=markerDict[u], - label=u, alpha=.4) + label=u, alpha=opacity) else: colors_sns = {} @@ -207,7 +207,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD if classIX < 20: ax.scatter(values[classes.values == u, components[i]], values[classes.values == u, components[j]], c=c, label=u, - alpha=.4) # olors[classIX], label=u) + alpha=opacity) # olors[classIX], label=u) elif classIX == len(uniqnonan) - 1: ax.scatter(values[classes.values == u, components[i]], values[classes.values == u, components[j]], c='0', alpha=0, label='...') @@ -217,11 +217,13 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD else: ax.scatter(values[classes.values == u, components[i]], values[classes.values == u, components[j]], c=c, - label='_nolegend_', alpha=.4) # colors[classIX], label='_nolegend_') + label='_nolegend_', alpha=opacity) # colors[classIX], label='_nolegend_') classIX = classIX + 1 - colors_sns[u] = c + colors_sns[str(u)] = c + if plotAssociation is not None: + nonans = [i for i, x in enumerate(classes) if x in {'nan', 'NaN', 'NaT', '', 'NA'}] plotClasses = classes.copy() plotClasses[nonans] = 'NA' @@ -232,14 +234,15 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD 'PC' + str(components[j] + 1)]) # Association for component[i] - ax1 = sns.stripplot(x=plotTitle, y='PC' + str(components[i] + 1), data=tempdata, ax=ax1, - palette=colors_sns) + + ax1 = sns.stripplot(x=plotTitle, y='PC' + str(components[i] + 1), + data=tempdata, ax=ax1, palette=colors_sns) ax1.set(xticklabels=[]) ax1.set(xlabel='') # Association for component[j] - ax2 = sns.stripplot(x=plotTitle, y='PC' + str(components[j] + 1), data=tempdata, ax=ax2, - palette=colors_sns) + ax2 = sns.stripplot(x=plotTitle, y='PC' + str(components[j] + 1), + data=tempdata, ax=ax2, palette=colors_sns) ax2.set(xticklabels=[]) ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) @@ -253,7 +256,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD ax.legend() cb = ax.scatter(values[plotnans == False, components[i]], values[plotnans == False, components[j]], - c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4) + c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity) cbar = plt.colorbar(cb, ax=ax) cbar.set_label(title) @@ -263,14 +266,14 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD # Association for component[i] ax1.scatter(classes[plotnans == False], values[plotnans == False, components[i]], - c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4) + c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity) ax1.scatter(numpy.ones([sum(plotnans), 1]) * xvalnan, values[plotnans, components[i]], c='#D3D3D3') ax1.set_ylabel('PC' + str(components[i] + 1)) ax1.set(xticklabels=[]) # Association for component[j] ax2.scatter(classes[plotnans == False], values[plotnans == False, components[j]], - c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4) + c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity) ax2.scatter(numpy.ones([sum(plotnans), 1]) * xvalnan, values[plotnans, components[j]], c='#D3D3D3') ax2.set_xlabel(plotTitle) ax2.set_ylabel('PC' + str(components[j] + 1)) @@ -281,6 +284,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD xlabel = 'PC' + str(components[i] + 1) + ' (' + '{0:.2f}'.format( pcaModel.modelParameters['VarExpRatio'][components[i]] * 100) + '%)' if plotAssociation is not None: + ylabel = ylabel + ' significance: ' + '{0:.2f}'.format(plotAssociation[components[j]]) xlabel = xlabel + ' significance: ' + '{0:.2f}'.format(plotAssociation[components[i]]) ax.set_ylabel(ylabel) diff --git a/nPYc/plotting/_plotTIC.py b/nPYc/plotting/_plotTIC.py index 38d7e8da..8d0ed895 100644 --- a/nPYc/plotting/_plotTIC.py +++ b/nPYc/plotting/_plotTIC.py @@ -27,7 +27,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False, colourDict=None, markerDict=None, abbrDict=None, logy=False, title='', withExclusions=True, savePath=None, - figureFormat='png', dpi=72, figureSize=(11,7)): + figureFormat='png', dpi=72, figureSize=(11,7), opacity=.6): """ Visualise TIC for all or a subset of features coloured by either dilution value or detector voltage. With the option to shade by batch. @@ -159,7 +159,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False, marker=markerDict[u], s=30, c=colourDict[u], - alpha=.4, + alpha=opacity, label=u) if addViolin: @@ -189,7 +189,8 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False, c=msData.sampleMetadata[colourBy], cmap=cmap, vmin=mincol, - vmax=maxcol) + vmax=maxcol, + alpha=opacity) # Shade by automatically defined batches (if required) if addBatchShading: @@ -225,7 +226,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False, end = end.values[0] # Plot rectangle - rect = Rectangle((start, ymin), end-start, abs(ymin)+abs(ymax), color=colors[colIX], alpha=0.4, zorder=0)#,label='Batch %d' % (i)) + rect = Rectangle((start, ymin), end-start, abs(ymin)+abs(ymax), color=colors[colIX], alpha=opacity, zorder=0)#,label='Batch %d' % (i)) ax.add_patch(rect) colIX = colIX + 1 @@ -272,7 +273,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False, def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', colourBy='Correction Batch', withExclusions=True, - destinationPath=None, autoOpen=True): + destinationPath=None, autoOpen=True, opacity=.6): """ Interactively visualise TIC or intensity for a given feature with plotly, provides tooltips to allow identification of samples. @@ -353,7 +354,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', text=hovertext[plotnans == True], name='NA', hoverinfo='text', - showlegend=True + showlegend=True, opacity=opacity ) data.append(NaNplot) @@ -371,7 +372,8 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', ), text=hovertext[plotnans == False], hoverinfo='text', - showlegend=False + showlegend=False, + opacity=opacity ) data.append(CLASSplot) @@ -390,14 +392,16 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', text=hovertext[classes == i], name=i, hoverinfo='text', - showlegend=True + showlegend=True, opacity=opacity ) data.append(CLASSplot) # Overlay SR and LTR if columns present if ('SampleType' in msData.sampleMetadata.columns) & ('AssayRole' in msData.sampleMetadata.columns): - SRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.StudyPool) & (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) - LTRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) + SRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.StudyPool) & + (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) #SPmask + LTRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & + (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) # ERmask SRplot = go.Scatter( x=msData.sampleMetadata.loc[SRmask, x], @@ -410,7 +414,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', text=hovertext[SRmask], name='Study Reference', hoverinfo='text', - showlegend=True + showlegend=True, opacity=opacity ) data.append(SRplot) @@ -425,7 +429,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order', text=hovertext[LTRmask], name='Long-Term Reference', hoverinfo='text', - showlegend=True + showlegend=True, opacity=opacity ) data.append(LTRplot) diff --git a/nPYc/plotting/_plotting.py b/nPYc/plotting/_plotting.py index 49d84789..f900b8d9 100644 --- a/nPYc/plotting/_plotting.py +++ b/nPYc/plotting/_plotting.py @@ -12,7 +12,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', histBins=100, color=None, logy=False, - logx=False, xlim=None, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7)): + logx=False, xlim=None, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7), opacity=.5): """ histogram(values, inclusionVector=None, quantiles=None, histBins=100, color=None, logy=False, logx=False, **kwargs) @@ -123,7 +123,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', localValues[localValues == 0] = numpy.finfo(numpy.float64).epsneg ax.hist(localValues, - alpha=.4, + alpha=opacity, range=(minVal, maxVal), label=key, bins=nbins) @@ -141,7 +141,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', label=label) else: ax.hist(values[mask], - alpha=.4, + alpha=opacity, label=label, bins=nbins) @@ -153,7 +153,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', label=label) else: ax.hist(values[mask], - alpha=.4, + alpha=opacity, label=label, bins=nbins) @@ -164,7 +164,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', label=label) else: ax.hist(values[mask], - alpha=.4, + alpha=opacity, label=label, bins=nbins) else: @@ -194,7 +194,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='', label=False, savePath=None, - figureFormat='png', dpi=72, figureSize=(11, 7)): + figureFormat='png', dpi=72, figureSize=(11, 7), opacity=.4): """ Visualise TIC for linearity reference (LR) samples (either all or a subset) coloured by either dilution value or detector voltage. @@ -237,12 +237,12 @@ def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='', cmap=plt.cm.get_cmap('bwr'), vmin=cMin, vmax=cMax, - edgecolors='grey') + edgecolors='grey', alpha=opacity) else: ax.scatter(runIX, tic, c=msData.sampleMetadata['Dilution'][LRmask], cmap=plt.cm.jet, - edgecolors='grey') + edgecolors='grey',alpha=opacity) # Add sample labels if label == True: @@ -276,7 +276,7 @@ def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='', def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, savePath=None, figureFormat='png', dpi=72, - figureSize=(11, 7)): + figureSize=(11, 7), opacity=.4): """ Summary plots of correlation to dilution for a subset of features, separated by sample batch. Each figure includes a scatter plot of feature intensity vs dilution, TIC of LR and surrounding SP samples, and a heatmap of correlation to dilution for each LR batch subset, overall, and mean. @@ -344,7 +344,7 @@ def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, sa # Plot scatter of LR intensity coloured by dilution ax1.scatter(runIX, msData.intensityData[LRmask, feature], c=msData.sampleMetadata['Dilution'][LRmask], - cmap=plt.cm.jet) + cmap=plt.cm.jet, alpha=opacity) # Add a line where samples are not adjacent sampletime = [x - runIX[i - 1] for i, x in enumerate(runIX)] @@ -387,12 +387,15 @@ def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, sa def checkAndSetPlotAttributes(uniqKeys, attribDict, dictName, defaultVal=None): # check all the keys of attribDict are in uniqKeys + # putting this here to see if it's a useful refactor. It may not be. if attribDict: if not all(k in attribDict.keys() for k in uniqKeys): + print(dictName + " keys are " + attribDict.keys()) + print("Category keys are " + uniqKeys) raise ValueError( 'Check keys in ' + dictName + "; some aren't present in the categories list.") else: - # only do this if a default has been specified + # only set all the vals if a default has been specified if defaultVal: attribDict = {} for u in uniqKeys: diff --git a/nPYc/plotting/_violinPlot.py b/nPYc/plotting/_violinPlot.py index d62a2966..381703b4 100644 --- a/nPYc/plotting/_violinPlot.py +++ b/nPYc/plotting/_violinPlot.py @@ -31,9 +31,9 @@ def _violinPlotHelper(ax, values, sampleMasks, xlabel, ylabel, palette=None, yli sns.set_color_codes(palette='deep') if palette is not None: - sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, palette=palette, dropna=True) + sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, palette=palette) else: - sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, dropna=True) + sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax) # ax formatting if ylimits: diff --git a/nPYc/reports/multivariateReport.py b/nPYc/reports/multivariateReport.py index 6316269e..9c4b91d8 100644 --- a/nPYc/reports/multivariateReport.py +++ b/nPYc/reports/multivariateReport.py @@ -326,6 +326,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio classes=data.sampleMetadata['SampleClass'], colourType='categorical', colourDict=data.Attributes['sampleTypeColours'], + markerDict=data.Attributes['sampleTypeMarkers'], title='SampleClass', figures=figuresQCscores, hotelling_alpha=hotellings_alpha,