Skip to content

Commit

Permalink
took dropna out of violinplot (deprecated in latest seaborn); fixed p…
Browse files Browse the repository at this point in the history
…roblem with seaborn palette plotting PCA (fig 11)
  • Loading branch information
duibuqi committed Nov 22, 2023
1 parent 5a8a6ef commit 27aad73
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 39 deletions.
32 changes: 18 additions & 14 deletions nPYc/plotting/_multivariatePlotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def plotScree(R2, Q2=None, title='', xlabel='', ylabel='', savePath=None, figure
def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerDict=None, components=None,
hotelling_alpha=0.05,
plotAssociation=None, title='', xlabel='', figures=None, savePath=None, figureFormat='png', dpi=72,
figureSize=(11, 7)):
figureSize=(11, 7), opacity=.4):
"""
Plot PCA scores for each pair of components in PCAmodel, coloured by values defined in classes, and with Hotelling's T2 ellipse (95%)
Expand All @@ -85,6 +85,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
:param dict figures: If not ``None``, saves location of each figure for output in html report (see multivariateReport.py)
"""

print("Plotting scores %s" % colourType)
# Check inputs
if not isinstance(pcaModel, ChemometricsPCA):
raise TypeError('PCAmodel must be an instance of ChemometricsPCA')
Expand Down Expand Up @@ -114,8 +115,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
# If colourDict check colour defined for every unique entry in class
colourDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=colourDict, dictName="colourDict")

markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict,
dictName="markerDict", defaultVal="o")
markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict,dictName="markerDict", defaultVal="o")

from matplotlib.patches import Ellipse

Expand Down Expand Up @@ -183,7 +183,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
ax.scatter(values[classes.values == u, components[i]],
values[classes.values == u, components[j]],
c=colourDict[u], marker=markerDict[u],
label=u, alpha=.4)
label=u, alpha=opacity)

else:
colors_sns = {}
Expand All @@ -207,7 +207,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
if classIX < 20:
ax.scatter(values[classes.values == u, components[i]],
values[classes.values == u, components[j]], c=c, label=u,
alpha=.4) # olors[classIX], label=u)
alpha=opacity) # olors[classIX], label=u)
elif classIX == len(uniqnonan) - 1:
ax.scatter(values[classes.values == u, components[i]],
values[classes.values == u, components[j]], c='0', alpha=0, label='...')
Expand All @@ -217,11 +217,13 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
else:
ax.scatter(values[classes.values == u, components[i]],
values[classes.values == u, components[j]], c=c,
label='_nolegend_', alpha=.4) # colors[classIX], label='_nolegend_')
label='_nolegend_', alpha=opacity) # colors[classIX], label='_nolegend_')
classIX = classIX + 1
colors_sns[u] = c
colors_sns[str(u)] = c


if plotAssociation is not None:

nonans = [i for i, x in enumerate(classes) if x in {'nan', 'NaN', 'NaT', '', 'NA'}]
plotClasses = classes.copy()
plotClasses[nonans] = 'NA'
Expand All @@ -232,14 +234,15 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
'PC' + str(components[j] + 1)])

# Association for component[i]
ax1 = sns.stripplot(x=plotTitle, y='PC' + str(components[i] + 1), data=tempdata, ax=ax1,
palette=colors_sns)

ax1 = sns.stripplot(x=plotTitle, y='PC' + str(components[i] + 1),
data=tempdata, ax=ax1, palette=colors_sns)
ax1.set(xticklabels=[])
ax1.set(xlabel='')

# Association for component[j]
ax2 = sns.stripplot(x=plotTitle, y='PC' + str(components[j] + 1), data=tempdata, ax=ax2,
palette=colors_sns)
ax2 = sns.stripplot(x=plotTitle, y='PC' + str(components[j] + 1),
data=tempdata, ax=ax2, palette=colors_sns)
ax2.set(xticklabels=[])

ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
Expand All @@ -253,7 +256,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
ax.legend()

cb = ax.scatter(values[plotnans == False, components[i]], values[plotnans == False, components[j]],
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4)
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity)
cbar = plt.colorbar(cb, ax=ax)
cbar.set_label(title)

Expand All @@ -263,14 +266,14 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD

# Association for component[i]
ax1.scatter(classes[plotnans == False], values[plotnans == False, components[i]],
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4)
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity)
ax1.scatter(numpy.ones([sum(plotnans), 1]) * xvalnan, values[plotnans, components[i]], c='#D3D3D3')
ax1.set_ylabel('PC' + str(components[i] + 1))
ax1.set(xticklabels=[])

# Association for component[j]
ax2.scatter(classes[plotnans == False], values[plotnans == False, components[j]],
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=.4)
c=classes[plotnans == False], cmap=plt.cm.rainbow, alpha=opacity)
ax2.scatter(numpy.ones([sum(plotnans), 1]) * xvalnan, values[plotnans, components[j]], c='#D3D3D3')
ax2.set_xlabel(plotTitle)
ax2.set_ylabel('PC' + str(components[j] + 1))
Expand All @@ -281,6 +284,7 @@ def plotScores(pcaModel, classes=None, colourType=None, colourDict=None, markerD
xlabel = 'PC' + str(components[i] + 1) + ' (' + '{0:.2f}'.format(
pcaModel.modelParameters['VarExpRatio'][components[i]] * 100) + '%)'
if plotAssociation is not None:

ylabel = ylabel + ' significance: ' + '{0:.2f}'.format(plotAssociation[components[j]])
xlabel = xlabel + ' significance: ' + '{0:.2f}'.format(plotAssociation[components[i]])
ax.set_ylabel(ylabel)
Expand Down
28 changes: 16 additions & 12 deletions nPYc/plotting/_plotTIC.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False,
colourDict=None, markerDict=None, abbrDict=None,
logy=False, title='',
withExclusions=True, savePath=None,
figureFormat='png', dpi=72, figureSize=(11,7)):
figureFormat='png', dpi=72, figureSize=(11,7), opacity=.6):
"""
Visualise TIC for all or a subset of features coloured by either dilution value or detector voltage.
With the option to shade by batch.
Expand Down Expand Up @@ -159,7 +159,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False,
marker=markerDict[u],
s=30,
c=colourDict[u],
alpha=.4,
alpha=opacity,
label=u)

if addViolin:
Expand Down Expand Up @@ -189,7 +189,8 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False,
c=msData.sampleMetadata[colourBy],
cmap=cmap,
vmin=mincol,
vmax=maxcol)
vmax=maxcol,
alpha=opacity)

# Shade by automatically defined batches (if required)
if addBatchShading:
Expand Down Expand Up @@ -225,7 +226,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False,
end = end.values[0]

# Plot rectangle
rect = Rectangle((start, ymin), end-start, abs(ymin)+abs(ymax), color=colors[colIX], alpha=0.4, zorder=0)#,label='Batch %d' % (i))
rect = Rectangle((start, ymin), end-start, abs(ymin)+abs(ymax), color=colors[colIX], alpha=opacity, zorder=0)#,label='Batch %d' % (i))
ax.add_patch(rect)
colIX = colIX + 1

Expand Down Expand Up @@ -272,7 +273,7 @@ def plotTIC(dataset, addViolin=True, addBatchShading=False,

def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
colourBy='Correction Batch', withExclusions=True,
destinationPath=None, autoOpen=True):
destinationPath=None, autoOpen=True, opacity=.6):
"""
Interactively visualise TIC or intensity for a given feature with plotly, provides tooltips to allow identification of samples.
Expand Down Expand Up @@ -353,7 +354,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
text=hovertext[plotnans == True],
name='NA',
hoverinfo='text',
showlegend=True
showlegend=True, opacity=opacity
)
data.append(NaNplot)

Expand All @@ -371,7 +372,8 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
),
text=hovertext[plotnans == False],
hoverinfo='text',
showlegend=False
showlegend=False,
opacity=opacity
)
data.append(CLASSplot)

Expand All @@ -390,14 +392,16 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
text=hovertext[classes == i],
name=i,
hoverinfo='text',
showlegend=True
showlegend=True, opacity=opacity
)
data.append(CLASSplot)

# Overlay SR and LTR if columns present
if ('SampleType' in msData.sampleMetadata.columns) & ('AssayRole' in msData.sampleMetadata.columns):
SRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.StudyPool) & (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference))
LTRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & (msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference))
SRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.StudyPool) &
(msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) #SPmask
LTRmask = ((msData.sampleMetadata['SampleType'].values == SampleType.ExternalReference) &
(msData.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)) # ERmask

SRplot = go.Scatter(
x=msData.sampleMetadata.loc[SRmask, x],
Expand All @@ -410,7 +414,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
text=hovertext[SRmask],
name='Study Reference',
hoverinfo='text',
showlegend=True
showlegend=True, opacity=opacity
)
data.append(SRplot)

Expand All @@ -425,7 +429,7 @@ def plotTICinteractive(dataset, x='Run Order', y='TIC', labelBy='Run Order',
text=hovertext[LTRmask],
name='Long-Term Reference',
hoverinfo='text',
showlegend=True
showlegend=True, opacity=opacity
)
data.append(LTRplot)

Expand Down
25 changes: 14 additions & 11 deletions nPYc/plotting/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='', histBins=100, color=None, logy=False,
logx=False, xlim=None, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7)):
logx=False, xlim=None, savePath=None, figureFormat='png', dpi=72, figureSize=(11, 7), opacity=.5):
"""
histogram(values, inclusionVector=None, quantiles=None, histBins=100, color=None, logy=False, logx=False, **kwargs)
Expand Down Expand Up @@ -123,7 +123,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='',
localValues[localValues == 0] = numpy.finfo(numpy.float64).epsneg

ax.hist(localValues,
alpha=.4,
alpha=opacity,
range=(minVal, maxVal),
label=key,
bins=nbins)
Expand All @@ -141,7 +141,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='',
label=label)
else:
ax.hist(values[mask],
alpha=.4,
alpha=opacity,
label=label,
bins=nbins)

Expand All @@ -153,7 +153,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='',
label=label)
else:
ax.hist(values[mask],
alpha=.4,
alpha=opacity,
label=label,
bins=nbins)

Expand All @@ -164,7 +164,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='',
label=label)
else:
ax.hist(values[mask],
alpha=.4,
alpha=opacity,
label=label,
bins=nbins)
else:
Expand Down Expand Up @@ -194,7 +194,7 @@ def histogram(values, inclusionVector=None, quantiles=None, title='', xlabel='',


def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='', label=False, savePath=None,
figureFormat='png', dpi=72, figureSize=(11, 7)):
figureFormat='png', dpi=72, figureSize=(11, 7), opacity=.4):
"""
Visualise TIC for linearity reference (LR) samples (either all or a subset) coloured by either dilution value or detector voltage.
Expand Down Expand Up @@ -237,12 +237,12 @@ def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='',
cmap=plt.cm.get_cmap('bwr'),
vmin=cMin,
vmax=cMax,
edgecolors='grey')
edgecolors='grey', alpha=opacity)
else:
ax.scatter(runIX, tic,
c=msData.sampleMetadata['Dilution'][LRmask],
cmap=plt.cm.jet,
edgecolors='grey')
edgecolors='grey',alpha=opacity)

# Add sample labels
if label == True:
Expand Down Expand Up @@ -276,7 +276,7 @@ def plotLRTIC(msData, sampleMask=None, colourByDetectorVoltage=False, title='',


def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, savePath=None, figureFormat='png', dpi=72,
figureSize=(11, 7)):
figureSize=(11, 7), opacity=.4):
"""
Summary plots of correlation to dilution for a subset of features, separated by sample batch. Each figure includes a scatter plot of feature intensity vs dilution, TIC of LR and surrounding SP samples, and a heatmap of correlation to dilution for each LR batch subset, overall, and mean.
Expand Down Expand Up @@ -344,7 +344,7 @@ def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, sa
# Plot scatter of LR intensity coloured by dilution
ax1.scatter(runIX, msData.intensityData[LRmask, feature],
c=msData.sampleMetadata['Dilution'][LRmask],
cmap=plt.cm.jet)
cmap=plt.cm.jet, alpha=opacity)

# Add a line where samples are not adjacent
sampletime = [x - runIX[i - 1] for i, x in enumerate(runIX)]
Expand Down Expand Up @@ -387,12 +387,15 @@ def plotCorrelationToLRbyFeature(msData, featureMask=None, title='', maxNo=5, sa

def checkAndSetPlotAttributes(uniqKeys, attribDict, dictName, defaultVal=None):
# check all the keys of attribDict are in uniqKeys
# putting this here to see if it's a useful refactor. It may not be.
if attribDict:
if not all(k in attribDict.keys() for k in uniqKeys):
print(dictName + " keys are " + attribDict.keys())
print("Category keys are " + uniqKeys)
raise ValueError(
'Check keys in ' + dictName + "; some aren't present in the categories list.")
else:
# only do this if a default has been specified
# only set all the vals if a default has been specified
if defaultVal:
attribDict = {}
for u in uniqKeys:
Expand Down
4 changes: 2 additions & 2 deletions nPYc/plotting/_violinPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def _violinPlotHelper(ax, values, sampleMasks, xlabel, ylabel, palette=None, yli
sns.set_color_codes(palette='deep')

if palette is not None:
sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, palette=palette, dropna=True)
sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, palette=palette)
else:
sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax, dropna=True)
sns.violinplot(data=localDFpre, scale='width', bw=.2, cut=0, ax=ax)

# ax formatting
if ylimits:
Expand Down
1 change: 1 addition & 0 deletions nPYc/reports/multivariateReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
classes=data.sampleMetadata['SampleClass'],
colourType='categorical',
colourDict=data.Attributes['sampleTypeColours'],
markerDict=data.Attributes['sampleTypeMarkers'],
title='SampleClass',
figures=figuresQCscores,
hotelling_alpha=hotellings_alpha,
Expand Down

0 comments on commit 27aad73

Please sign in to comment.