Skip to content

Commit

Permalink
re-revisions
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennifer Eng committed Feb 11, 2022
1 parent 1da66de commit 4001a8a
Show file tree
Hide file tree
Showing 9 changed files with 1,408 additions and 1,939 deletions.
171 changes: 99 additions & 72 deletions Normalization_testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -261,7 +261,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -477,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -493,20 +493,9 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"25"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"len(ls_marker)"
]
Expand Down Expand Up @@ -765,7 +754,31 @@
"metadata": {},
"outputs": [],
"source": [
"df_select = df_file[df_file.norm.isin(['raw_combat','raw_standard','restore_scale','raw','restore_div','restore_local','raw_regress_out'])] #'raw_restore_combat'"
"rootdir"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df_select = df_file[df_file.norm.isin(['raw_combat','raw_standard','restore_scale','raw','raw_regress_out','raw_robust','log2_combat'])] #'raw_restore_combat'\n",
"sns.set_style(\"white\")\n",
"fig, ax = plt.subplots(figsize=(4,3.4),dpi=300)\n",
"sns.lineplot(data=df_select.sort_values('mean_kbet'),x='norm',y='mean_kbet',ax=ax,err_style='bars')\n",
"labels = [item.replace('div','global').replace('raw_','').replace('_','\\n').replace('raw','none') for item in df_select.groupby('norm').mean_kbet.mean().sort_values().index.tolist()]\n",
"labels = ['z-score','combat','robust','regress\\nout','log2\\ncombat','RESTORE','raw']\n",
"ax.set_xticks(range(len(df_select.groupby('norm').mean_kbet)))\n",
"ax.set_xticklabels(labels,rotation=90)\n",
"ax.set_ylabel('Rejection Rate')\n",
"ax.set_xlabel('Normalization')\n",
"ax.set_title('kBET Evaluation of\\n Batch Correction')\n",
"fig.set_tight_layout(True)\n",
"plt.tight_layout\n",
"fig.savefig(f'{rootdir}/filtered_data/figures/BatchEffect_select.png',dpi=300)"
]
},
{
Expand All @@ -776,7 +789,7 @@
"source": [
"%matplotlib inline\n",
"sns.set_style(\"whitegrid\")\n",
"fig, ax = plt.subplots(figsize=(7,4))\n",
"fig, ax = plt.subplots(figsize=(7,4),dpi=300)\n",
"sns.lineplot(data=df_file.sort_values('Norm'),x='norm',y='mean_kbet',ax=ax,err_style='bars')\n",
"labels = [item.replace('_out','').replace('div','global') for item in ls_index.tolist()]\n",
"ax.set_xticks(range(len(df_file.groupby('norm').mean_kbet)))\n",
Expand All @@ -786,18 +799,57 @@
"ax.set_title('kBET Evaluation of Batch Correction I')\n",
"fig.set_tight_layout(True)\n",
"plt.tight_layout\n",
"fig.savefig(f'{rootdir}/{s_date}/BatchEffectI.png',dpi=200)"
"fig.savefig(f'{rootdir}/{s_date}/BatchEffectI.png',dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sns.set_style(\"whitegrid\")\n",
"#df_pearson = pd.read_csv(f'Pearson_correlation_all.csv',index_col=0)\n",
"df_pearson = pd.read_csv(f'Pearson_correlation0.6.csv',index_col=0)\n",
"#ls_order = df_pearson.mean(axis=1).sort_values(ascending=False).index.tolist()\n",
"ls_order = ['raw_combat','log2_combat','restore_scale','raw_regress_out','raw','raw_robust','raw_standard']\n",
"fig,ax = plt.subplots(figsize=(3.5,3),dpi=200)\n",
"sns.boxplot(data=df_pearson.loc[ls_order].T,ax=ax,orient='h',showfliers=False,palette='muted')\n",
"sns.stripplot(data=df_pearson.loc[ls_order].T,ax=ax,orient='h',palette='dark')\n",
"#ax.set_title(f'Cluster Correlation \\n (resolution {resolution})',fontsize=16)\n",
"ax.set_title(f'Cluster Correlation',fontsize=16)\n",
"ax.set_xlabel('Pearson Correlation',fontsize=14)\n",
"ax.yaxis.set_label_position(\"right\")\n",
"ax.set_yticklabels(['combat','log2 combat','RESTORE','regress out','raw','robust','z_score'],fontsize=14)\n",
"ax.yaxis.tick_right()\n",
"plt.tight_layout()\n",
"fig.savefig(f'{rootdir}/filtered_data/figures/PearsonCorrelation_all0.6.png',dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"df_select = df_file[df_file.norm.isin(['raw_combat','raw_standard','restore_scale','raw','restore_div','restore_local','raw_regress_out'])] #'raw_restore_combat'\n",
"sns.set_style(\"white\")\n",
"fig, ax = plt.subplots(figsize=(4.2,3.2))\n",
"fig, ax = plt.subplots(figsize=(4.2,3.2),dpi=300)\n",
"sns.lineplot(data=df_select.sort_values('mean_kbet'),x='norm',y='mean_kbet',ax=ax,err_style='bars')\n",
"labels = [item.replace('div','global').replace('raw_','').replace('_','\\n').replace('raw','none') for item in df_select.groupby('norm').mean_kbet.mean().sort_values().index.tolist()]\n",
"ax.set_xticks(range(len(df_select.groupby('norm').mean_kbet)))\n",
Expand All @@ -807,7 +859,7 @@
"ax.set_title('kBET Evaluation of Batch Correction')\n",
"fig.set_tight_layout(True)\n",
"plt.tight_layout\n",
"fig.savefig(f'{rootdir}/{s_date}/BatchEffect_select.png',dpi=200)"
"fig.savefig(f'{rootdir}/{s_date}/BatchEffect_select.png',dpi=300)"
]
},
{
Expand Down Expand Up @@ -873,11 +925,11 @@
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw.csv',\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw_restore.csv',\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw_combat.csv',\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw_standard.csv',\n",
" '20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw_standard.csv',\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_restore_scale.csv',\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_restore_local.csv'\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_restore_div.csv'\n",
" '20201207_JE-TMA-41-43-62_SampledMeanIntensity_restore_div_arcsinh.csv'\n",
" #'20201207_JE-TMA-41-43-62_SampledMeanIntensity_restore_div_arcsinh.csv'\n",
" ]\n",
"#ls_index = [ '20201208_JE-TMA-41-43-62_SampledMeanIntensity_raw.csv', '20201208_JE-TMA-41-43-62_SampledMeanIntensity_log2.csv',\n",
"# '20201207_JE-TMA-41-43-62_SampledMeanIntensity_raw.csv', '20201207_JE-TMA-41-43-62_SampledMeanIntensity_log2.csv',\n",
Expand All @@ -886,7 +938,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -898,30 +950,12 @@
" 'CK14','CK5', 'CK17', 'CK19', 'CK7', 'Ecad', 'ER', 'HER2', 'Ki67', 'PCNA', 'pHH3', 'LamAC']\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(marker_genes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# normalize \n",
Expand Down Expand Up @@ -964,34 +998,25 @@
" #umap plot\n",
" s_type = s_index.split('SampledMeanIntensity_')[1].split('.')[0]\n",
" figname = f\"UmapBatch_{s_type}_{s_norm}.png\"\n",
" fig,ax = plt.subplots(figsize=(6,4))\n",
" fig,ax = plt.subplots(figsize=(4,4),dpi=300)\n",
" sc.pl.umap(adata, color='batch',title=f\"{s_type.replace('_',' ')} {s_norm}\",wspace=.25,ax=ax,save=figname)\n",
" fig,ax = plt.subplots(figsize=(6,4))\n",
" fig,ax = plt.subplots(figsize=(4,4),dpi=300)\n",
" figname = f'UmapScene_{s_type}_{s_norm}.png'\n",
" fig = sc.pl.umap(adata, color='scene',save=figname,ax=ax,title=f\"{s_type.replace('_',' ')} {s_norm}\")\n",
" X_pca = adata.obsm['X_pca'] \n",
" # kmeans \n",
" '''\n",
" k=12\n",
" kmeans = KMeans(n_clusters=k, random_state=0).fit(X_pca) \n",
" adata.obs[f'kmeans{k}'] = kmeans.labels_.astype(str)\n",
" figname=f'Umap_Kmeans_{s_type}_{s_norm}.png'\n",
" fig,ax = plt.subplots(figsize=(6,4))\n",
" sc.pl.umap(adata, color=f'kmeans{k}',save=figname,ax=ax)\n",
" figname=f'Matrixplot_kmeans_{s_type}_{s_norm}.png'\n",
" sc.pl.matrixplot(adata, var_names=marker_genes, groupby=f'kmeans{k}',log=True,dendrogram=True,save=figname)\n",
" '''\n",
" #leiden\n",
" sc.tl.leiden(adata,resolution=0.25)\n",
" fig,ax = plt.subplots(figsize=(6,4))\n",
" figname=f'leiden_{s_type}_{s_norm}.png'\n",
" sc.pl.umap(adata, color='leiden',ax=ax,save=figname)\n",
" fig,ax = plt.subplots(figsize=(8,4))\n",
" figname=f'Matrixplot_leiden_{s_type}_{s_norm}.png'\n",
" sc.pl.matrixplot(adata, var_names=marker_genes, groupby=f'leiden',\n",
" dendrogram=True,ax=ax,save=figname,standard_scale='var',colorbar_title='Relative\\nintensity')\n",
" df = pd.DataFrame(data=adata.raw.X,index=adata.obs.index,columns=adata.var.index)\n",
" df['leiden'] = adata.obs['leiden']\n",
" #matrixplot\n",
" g=sns.clustermap(df.groupby('leiden').mean(),z_score=1,cmap='viridis')\n",
" ordered_genes=df.groupby('leiden').mean().iloc[:,g.dendrogram_col.reordered_ind].columns.tolist()\n",
" plt.close(g.fig)\n",
" sc.tl.dendrogram(adata,optimal_ordering =True, groupby=f'leiden')\n",
" fig,ax = plt.subplots(figsize=(8,4),dpi=300)\n",
" figname=f'Matrixplot_leiden_{s_type}_{s_norm}.png'\n",
" sc.pl.matrixplot(adata, groupby=f'leiden',var_names=ordered_genes,\n",
" dendrogram=True,ax=ax,save=figname,standard_scale='var',colorbar_title='Relative\\nintensity')\n",
" break\n",
" break\n",
"sc.pl.pca_variance_ratio(adata, log=True)"
Expand Down Expand Up @@ -1031,7 +1056,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#stacked bar\n",
Expand All @@ -1042,13 +1069,13 @@
"df_prop = (df.groupby([f'leiden','slide_scene']).CD4.count())/(df.groupby(['slide_scene']).CD4.count())\n",
"df_prop = df_prop.unstack().fillna(value=0).T\n",
"\n",
"fig,ax=plt.subplots(figsize=(7,3.7), dpi=200)\n",
"fig,ax=plt.subplots(figsize=(7,3.7), dpi=300)\n",
"df_prop.columns = df_prop.columns.add_categories(['slide','scene'])\n",
"df_prop.index = [item.replace('JE-TMA-','') for item in df_prop.index]\n",
"df_prop['slide'] =[item.split('_')[0] for item in df_prop.index]\n",
"df_prop['scene'] =[item.split('_')[1] for item in df_prop.index]\n",
"df_prop.sort_values(['scene','slide']).plot(kind='bar',stacked=True,ax=ax,legend=True,cmap='tab20')\n",
"ax.legend(bbox_to_anchor=(1.02, 1.2), ncol=2)\n",
"df_prop.sort_values(['scene','slide']).plot(kind='bar',stacked=True,ax=ax,legend=True,width=0.8)\n",
"ax.legend(bbox_to_anchor=(1.02, 1.2), ncol=1)\n",
"ax.set_ylabel('Fraction Positive')\n",
"ax.set_title(f\"{s_trans.replace('_',' ')} {s_norm}\")\n",
"ax.grid(False)\n",
Expand Down Expand Up @@ -1810,9 +1837,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python3.9.5",
"display_name": "python3.9.5",
"language": "python",
"name": "python3.9.5"
"name": "base"
},
"language_info": {
"codemirror_mode": {
Expand Down
Loading

0 comments on commit 4001a8a

Please sign in to comment.